From 7f42efacb6422da097fd3c88c0f360be1cea4954 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Sat, 6 Jan 2024 12:23:41 +0800 Subject: [PATCH 01/20] SOF-7194: preliminary deepmd support on the web platform --- assets/deepmd/dp_train_se_e2_r.j2.json | 67 ++++++++++++++++++++++++++ assets/deepmd/qe_cp_to_deepmd.j2.py | 23 +++++++++ executables/deepmd/dp_prepare.py.yml | 12 +++++ executables/deepmd/dp_train.yml | 12 +++++ executables/tree.yml | 3 ++ src/js/data/templates.js | 2 +- src/js/data/tree.js | 2 +- templates/deepmd/dp_prepare.py.yml | 5 ++ templates/deepmd/dp_train.yml | 5 ++ templates/templates.yml | 4 ++ 10 files changed, 133 insertions(+), 2 deletions(-) create mode 100644 assets/deepmd/dp_train_se_e2_r.j2.json create mode 100644 assets/deepmd/qe_cp_to_deepmd.j2.py create mode 100644 executables/deepmd/dp_prepare.py.yml create mode 100644 executables/deepmd/dp_train.yml create mode 100644 templates/deepmd/dp_prepare.py.yml create mode 100644 templates/deepmd/dp_train.yml diff --git a/assets/deepmd/dp_train_se_e2_r.j2.json b/assets/deepmd/dp_train_se_e2_r.j2.json new file mode 100644 index 00000000..884b21f2 --- /dev/null +++ b/assets/deepmd/dp_train_se_e2_r.j2.json @@ -0,0 +1,67 @@ +{ + "model": { + "type_map": [ + "O", + "H" + ], + "descriptor": { + "type": "se_e2_r", + "sel": [ + 23, + 46 + ], + "rcut_smth": 0.50, + "rcut": 5.00, + "neuron": [ + 5, + 5, + 5 + ], + "resnet_dt": false, + "seed": 1 + }, + "fitting_net": { + "neuron": [ + 60, + 60, + 60 + ], + "resnet_dt": true, + "seed": 1 + } + }, + "learning_rate": { + "type": "exp", + "decay_steps": 1000, + "start_lr": 0.005, + "stop_lr": 3.51e-6 + }, + "loss": { + "start_pref_e": 0.02, + "limit_pref_e": 1, + "start_pref_f": 1000, + "limit_pref_f": 1, + "start_pref_v": 0, + "limit_pref_v": 0 + }, + "training": { + "training_data": { + "systems": [ + "./training/" + ], + "batch_size": "auto" + }, + "validation_data": { + "systems": [ + "./validation/" + ], + "batch_size": "auto" + }, + "numb_steps": 301, + "seed": 1, + "disp_file": "lcurve.out", + "disp_freq": 10, + "numb_test": 1, + "save_freq": 100 + } +} diff --git a/assets/deepmd/qe_cp_to_deepmd.j2.py b/assets/deepmd/qe_cp_to_deepmd.j2.py new file mode 100644 index 00000000..50bc1130 --- /dev/null +++ b/assets/deepmd/qe_cp_to_deepmd.j2.py @@ -0,0 +1,23 @@ +import dpdata +import numpy as np + +# https://docs.deepmodeling.com/projects/dpdata/en/master/formats/QECPTrajFormat.html +data = dpdata.LabeledSystem("generate_dft_data", fmt="qe/cp/traj") +print("Dataset contains total {0} frames".format(len(data))) + +# randomly choose 20% index for validation_data +size = len(data) +size_validation = round(size * 0.2) + +index_validation = np.random.choice(size, size=size_validation, replace=False) +index_training = list(set(range(size)) - set(index_validation)) + +data_training = data.sub_system(index_training) +data_validation = data.sub_system(index_validation) + +print("Using {0} frames as training set".format(len(data_training))) +print("Using {0} frames as validation set".format(len(data_validation))) + +# save training and validation sets +data_training.to_deepmd_npy("./training") +data_validation.to_deepmd_npy("./validation") diff --git a/executables/deepmd/dp_prepare.py.yml b/executables/deepmd/dp_prepare.py.yml new file mode 100644 index 00000000..becb5ca5 --- /dev/null +++ b/executables/deepmd/dp_prepare.py.yml @@ -0,0 +1,12 @@ +monitors: + - standard_output +results: [] +flavors: + qe_cp_to_deepmd: + input: + - name: qe_cp_to_deepmd.py + results: [] + monitors: + - standard_output + applicationName: deepmd + executableName: 'dp_prepare' # python3 qe_cp_to_deepmd.py diff --git a/executables/deepmd/dp_train.yml b/executables/deepmd/dp_train.yml new file mode 100644 index 00000000..79a646ec --- /dev/null +++ b/executables/deepmd/dp_train.yml @@ -0,0 +1,12 @@ +monitors: + - standard_output +results: [] +flavors: + dp_train_se_e2_r: + input: + - name: dp_train_se_e2_r.josn + results: [] + monitors: + - standard_output + applicationName: deepmd + executableName: 'dp_train' # dp train dp_train_se_e2_r.josn && dp freeze -o graph.pb diff --git a/executables/tree.yml b/executables/tree.yml index d53ff64d..145d71bd 100644 --- a/executables/tree.yml +++ b/executables/tree.yml @@ -1,3 +1,6 @@ +deepmd: + dp_prepare: !include 'executables/deepmd/dp_prepare.py.yml' + dp_train: !include 'executables/deepmd/dp_train.yml' espresso: average.x: !include 'executables/espresso/average.x.yml' bands.x: !include 'executables/espresso/bands.x.yml' diff --git a/src/js/data/templates.js b/src/js/data/templates.js index 029f136f..3075e391 100644 --- a/src/js/data/templates.js +++ b/src/js/data/templates.js @@ -1,2 +1,2 @@ /* eslint-disable */ -module.exports = {allTemplates: [{"content":"1\npp.dat\n1.0\n3000\n3\n3.0000\n","name":"average.in","contextProviders":[],"applicationName":"espresso","executableName":"average.x"},{"content":"&BANDS\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n filband = {% raw %}'{{ JOB_WORK_DIR }}/bands.dat'{% endraw %}\n no_overlap = .true.\n/\n\n","name":"bands.in","contextProviders":[],"applicationName":"espresso","executableName":"bands.x"},{"content":"&DOS\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n degauss = 0.01\n/\n\n","name":"dos.in","contextProviders":[],"applicationName":"espresso","executableName":"dos.x"},{"content":"&INPUT\n asr = 'simple'\n flfrc = 'force_constants.fc'\n flfrq = 'frequencies.freq'\n dos = .true.\n fldos = 'phonon_dos.out'\n deltaE = 1.d0\n {% for d in igrid.dimensions -%}\n nk{{loop.index}} = {{d}}\n {% endfor %}\n /\n","name":"dynmat_grid.in","contextProviders":[{"name":"IGridFormDataManager"}],"applicationName":"espresso","executableName":"dynmat.x"},{"content":"&gw_input\n\n ! see http://www.sternheimergw.org for more information.\n\n ! config of the scf run\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n\n ! the grid used for the linear response\n kpt_grid = {{ kgrid.dimensions|join(', ') }}\n qpt_grid = {{ qgrid.dimensions|join(', ') }}\n\n ! truncation (used for both correlation and exchange)\n truncation = '2d'\n\n ! number of bands for which the GW correction is calculated\n num_band = 8\n\n ! configuration of the Coulomb solver\n thres_coul = 1.0d-2\n\n ! configuration of W in the convolution\n model_coul = 'godby-needs'\n max_freq_coul = 120\n num_freq_coul = 35\n\n ! configuration of the Green solver\n thres_green = 1.0d-3\n max_iter_green = 300\n\n ! configuration for the correlation self energy\n ecut_corr = 5.0\n max_freq_corr = 100.0\n num_freq_corr = 11\n\n ! configuration for the exchange self energy\n ecut_exch = 20.0\n\n ! configuration for the output\n eta = 0.1\n min_freq_wind = -30.0\n max_freq_wind = 30.0\n num_freq_wind = 601\n/\n\n&gw_output\n/\n\nFREQUENCIES\n2\n 0.0 0.0\n 0.0 10.0\n/\n\nK_points\n{{ explicitKPath2PIBA.length }}\n{% for point in explicitKPath2PIBA -%}\n{% for coordinate in point.coordinates %}{{ coordinate }}{% endfor %}\n{% endfor %}\n/\n\n","name":"gw_bands_plasmon_pole.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QGridFormDataManager"},{"name":"ExplicitKPath2PIBAFormDataManager"}],"applicationName":"espresso","executableName":"gw.x"},{"content":"&gw_input\n\n ! see http://www.sternheimergw.org for more information.\n\n ! config of the scf run\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n\n ! the grid used for the linear response\n kpt_grid = {{ kgrid.dimensions|join(', ') }}\n qpt_grid = {{ qgrid.dimensions|join(', ') }}\n\n ! number of bands for which the GW correction is calculated\n num_band = 8\n\n ! configuration of W in the convolution\n max_freq_coul = 200\n num_freq_coul = 51\n\n ! configuration for the correlation self energy\n ecut_corr = 6.0\n\n ! configuration for the exchange self energy\n ecut_exch = 15.0\n/\n\n&gw_output\n/\n\nFREQUENCIES\n35\n 0.0 0.0\n 0.0 0.3\n 0.0 0.9\n 0.0 1.8\n 0.0 3.0\n 0.0 4.5\n 0.0 6.3\n 0.0 8.4\n 0.0 10.8\n 0.0 13.5\n 0.0 16.5\n 0.0 19.8\n 0.0 23.4\n 0.0 27.3\n 0.0 31.5\n 0.0 36.0\n 0.0 40.8\n 0.0 45.9\n 0.0 51.3\n 0.0 57.0\n 0.0 63.0\n 0.0 69.3\n 0.0 75.9\n 0.0 82.8\n 0.0 90.0\n 0.0 97.5\n 0.0 105.3\n 0.0 113.4\n 0.0 121.8\n 0.0 130.5\n 0.0 139.5\n 0.0 148.8\n 0.0 158.4\n 0.0 168.3\n 0.0 178.5\n/\n\nK_points\n{{ explicitKPath2PIBA.length }}\n{% for point in explicitKPath2PIBA -%}\n{% for coordinate in point.coordinates %}{{ coordinate }}{% endfor %}\n{% endfor %}\n/\n\n","name":"gw_bands_full_frequency.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QGridFormDataManager"},{"name":"ExplicitKPath2PIBAFormDataManager"}],"applicationName":"espresso","executableName":"gw.x"},{"content":"&INPUT\n asr = 'simple'\n flfrc = 'force_constants.fc'\n flfrq = 'frequencies.freq'\n dos = .true.\n fldos = 'phonon_dos.out'\n deltaE = 1.d0\n {% for d in igrid.dimensions -%}\n nk{{loop.index}} = {{d}}\n {% endfor %}\n /\n","name":"matdyn_grid.in","contextProviders":[{"name":"IGridFormDataManager"}],"applicationName":"espresso","executableName":"matdyn.x"},{"content":"&INPUT\n asr = 'simple'\n flfrc ='force_constants.fc'\n flfrq ='frequencies.freq'\n flvec ='normal_modes.out'\n q_in_band_form = .true.\n /\n{{ipath.length}}\n{% for point in ipath -%}\n{% for d in point.coordinates %}{{d}} {% endfor -%}{{point.steps}}\n{% endfor %}\n","name":"matdyn_path.in","contextProviders":[{"name":"IPathFormDataManager"}],"applicationName":"espresso","executableName":"matdyn.x"},{"content":"BEGIN\nBEGIN_PATH_INPUT\n&PATH\n restart_mode = 'from_scratch'\n string_method = 'neb',\n nstep_path = 50,\n ds = 2.D0,\n opt_scheme = \"broyden\",\n num_of_images = {{ 2 + (input.INTERMEDIATE_IMAGES.length || neb.nImages) }},\n k_max = 0.3D0,\n k_min = 0.2D0,\n CI_scheme = \"auto\",\n path_thr = 0.1D0,\n/\nEND_PATH_INPUT\nBEGIN_ENGINE_INPUT\n&CONTROL\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.03\n nspin = 2\n starting_magnetization = 0.5\n/\n&ELECTRONS\n conv_thr = 1.D-8\n mixing_beta = 0.3\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nBEGIN_POSITIONS\nFIRST_IMAGE\nATOMIC_POSITIONS crystal\n{{ input.FIRST_IMAGE }}\n{%- for IMAGE in input.INTERMEDIATE_IMAGES %}\nINTERMEDIATE_IMAGE\nATOMIC_POSITIONS crystal\n{{ IMAGE }}\n{%- endfor %}\nLAST_IMAGE\nATOMIC_POSITIONS crystal\n{{ input.LAST_IMAGE }}\nEND_POSITIONS\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nEND_ENGINE_INPUT\nEND\n","name":"neb.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"NEBFormDataManager"},{"name":"QENEBInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"neb.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-12\n asr = .true.\n search_sym = .false.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n fildyn = 'dyn'\n ldisp = .true.\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_grid.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-12\n asr = .true.\n search_sym = .false.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n/\n{% for point in qpath -%}\n{% for d in point.coordinates %}{{d}} {% endfor %}\n{% endfor %}\n","name":"ph_path.in","contextProviders":[{"name":"QPathFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-12\n asr = .true.\n search_sym = .false.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n/\n0 0 0\n","name":"ph_gamma.in","contextProviders":[],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-18,\n recover = .false.\n start_irr = 0\n last_irr = 0\n ldisp = .true.\n fildyn = 'dyn0'\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_init_qpoints.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-18,\n recover = .true.\n ldisp = .true.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n fildyn = 'dyn'\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_grid_restart.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-18\n ldisp = .true.\n {% raw -%}\n start_q = {{MAP_DATA.qpoint}}\n last_q = {{MAP_DATA.qpoint}}\n start_irr = {{MAP_DATA.irr}}\n last_irr= {{MAP_DATA.irr}}\n {%- endraw %}\n recover = .true.\n fildyn = 'dyn'\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_SCRATCH_DIR }}/outdir'{% endraw %}\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_single_irr_qpt.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPP\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n filplot = 'pp.dat'\n plot_num = 0\n/\n&PLOT\n iflag = 3\n output_format = 5\n fileout ='density.xsf'\n/\n\n","name":"pp_density.in","contextProviders":[],"applicationName":"espresso","executableName":"pp.x"},{"content":"&INPUTPP\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n filplot = 'pp.dat'\n plot_num = 11\n/\n","name":"pp_electrostatic_potential.in","contextProviders":[],"applicationName":"espresso","executableName":"pp.x"},{"content":"&PROJWFC\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n degauss = 0.01\n deltaE = 0.05\n/\n","name":"projwfc.in","contextProviders":[],"applicationName":"espresso","executableName":"projwfc.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_scf.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n input_dft = 'hse',\n {% for d in qgrid.dimensions -%}\n nqx{{loop.index}} = {{d}}\n {% endfor %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS crystal\n{{ '{{' }} {{ explicitKPath.length }} {% raw %} + KPOINTS|length }} {% endraw %}\n{%- raw %}\n{% for point in KPOINTS -%}\n {% for d in point.coordinates %}{{ \"%14.9f\"|format(d) }} {% endfor -%}{{ point.weight }}\n{% endfor %}\n{% endraw -%}\n{% for point in explicitKPath -%}\n{% for d in point.coordinates %}{{d}} {% endfor -%}0.0000001\n{% endfor %}\n","name":"pw_scf_bands_hse.in","contextProviders":[{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"QGridFormDataManager"},{"name":"ExplicitKPathFormDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n assume_isolated = 'esm'\n esm_bc = '{{ boundaryConditions.type }}'\n fcp_mu = {{ boundaryConditions.targetFermiEnergy }}\n esm_w = {{ boundaryConditions.offset }}\n esm_efield = {{ boundaryConditions.electricField }}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_esm.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"BoundaryConditionsFormDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'relax'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n assume_isolated = 'esm'\n esm_bc = '{{ boundaryConditions.type }}'\n fcp_mu = {{ boundaryConditions.targetFermiEnergy }}\n esm_w = {{ boundaryConditions.offset }}\n esm_efield = {{ boundaryConditions.electricField }}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_esm_relax.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"BoundaryConditionsFormDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% raw %}{{PARAMETER | default('1')}} {{PARAMETER | default('1')}} {{PARAMETER | default('1')}} 0 0 0{% endraw %}\n","name":"pw_scf_kpt_conv.in","contextProviders":[{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'nscf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{input.RESTART_MODE}}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n{%- if subworkflowContext.NO_SYMMETRY_NO_INVERSION %}\n nosym = .true.\n noinv = .true.\n{%- endif %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_nscf.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'relax'\n nstep = 50\n title = ''\n verbosity = 'low'\n restart_mode = 'from_scratch'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_relax.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'vc-relax'\n title = ''\n verbosity = 'low'\n restart_mode = 'from_scratch'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_vc_relax.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'bands'\n title = ''\n verbosity = 'low'\n restart_mode = '{{input.RESTART_MODE}}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS crystal_b\n{{kpath.length}}\n{% for point in kpath -%}\n{% for d in point.coordinates %}{{d}} {% endfor -%}{{point.steps}}\n{% endfor %}\n","name":"pw_bands.in","contextProviders":[{"name":"KPathFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n ecutfock = 100\n occupations = 'smearing'\n degauss = 0.005\n input_dft='hse',\n nqx1 = {% if kgrid.dimensions[0]%2 == 0 %}{{kgrid.dimensions[0]/2}}{% else %}{{(kgrid.dimensions[0]+1)/2}}{% endif %}, nqx2 = {% if kgrid.dimensions[1]%2 == 0 %}{{kgrid.dimensions[1]/2}}{% else %}{{(kgrid.dimensions[1]+1)/2}}{% endif %}, nqx3 = {% if kgrid.dimensions[2]%2 == 0 %}{{kgrid.dimensions[2]/2}}{% else %}{{(kgrid.dimensions[2]+1)/2}}{% endif %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{% if d%2 == 0 %}{{d}} {% else %}{{d+1}} {% endif %}{% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_scf_hse.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nHUBBARD {ortho-atomic}\n{% for row in hubbard_u -%}\nU {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.hubbardUValue }}\n{% endfor -%}\n","name":"pw_scf_dft_u.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardUContextManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nHUBBARD {ortho-atomic}\n{% for row in hubbard_u -%}\nU {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.hubbardUValue }}\n{% endfor -%}\n{% for row in hubbard_v -%}\nV {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.atomicSpecies2 }}-{{ row.atomicOrbital2 }} {{ row.siteIndex }} {{ row.siteIndex2 }} {{ row.hubbardVValue }}\n{% endfor -%}\n","name":"pw_scf_dft_v.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardUContextManager"},{"name":"HubbardVContextManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nHUBBARD {ortho-atomic}\n{% for row in hubbard_j -%}\n{{ row.paramType }} {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.value }}\n{% endfor -%}\n","name":"pw_scf_dft_j.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardJContextManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n lda_plus_u = .true.\n lda_plus_u_kind = 0\n U_projection_type = 'ortho-atomic'\n {%- for row in hubbard_legacy %}\n Hubbard_U({{ row.atomicSpeciesIndex }}) = {{ row.hubbardUValue }}\n {%- endfor %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_scf_dft_u_legacy.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardContextManagerLegacy"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&INPUT\n fildyn = 'dyn'\n zasr = 'simple'\n flfrc = 'force_constants.fc'\n/\n","name":"q2r.in","contextProviders":[],"applicationName":"espresso","executableName":"q2r.x"},{"content":"&inputhp\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n {%- for d in qgrid.dimensions %}\n nq{{ loop.index }} = {{ d }}\n {%- endfor %}\n/\n","name":"hp.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"hp.x"},{"content":"&inputpp\n calculation = \"eps\"\n prefix = \"__prefix__\"\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n/\n\n&energy_grid\n smeartype = \"gauss\"\n intersmear = 0.2\n intrasmear = 0.0\n wmin = 0.0\n wmax = 30.0\n nw = 500\n shift = 0.0\n/\n\n","name":"epsilon.in","contextProviders":[],"applicationName":"espresso","executableName":"epsilon.x"},{"content":"# ------------------------------------------------------------------------------- #\n# #\n# Example JupyterLab requirements #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. A Python virtual environment is created #\n# - in /scratch/$USER/$JOB_ID (for build: 'Default') #\n# - in /export/share/python/ (for build: 'with-pre-installed-packages') #\n# 3. This list is used to populate a Python virtual environment #\n# 4. JupyterLab is started #\n# #\n# For more information visit: #\n# - https://jupyterlab.readthedocs.io/en/stable/index.html #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# Note: With the JupyterLab build 'with-pre-installed-packages', packages #\n# cannot be added during the notebook runtime. #\n# #\n# ------------------------------------------------------------------------------- #\n\njupyterlab==3.0.3\nnotebook>=6.2.0\nexabyte-api-client>=2020.10.19\nnumpy>=1.17.3\npandas>=1.1.4\nurllib3<2\n","name":"requirements.txt","contextProviders":[],"applicationName":"jupyterLab","executableName":"jupyter"},{"content":" start nwchem\n title \"Test\"\n charge {{ input.CHARGE }}\n geometry units au noautosym\n {{ input.ATOMIC_POSITIONS }}\n end\n basis\n * library {{ input.BASIS }}\n end\n dft\n xc {{ input.FUNCTIONAL }}\n mult {{ input.MULT }}\n end\n task dft energy\n","name":"nwchem_total_energy.inp","contextProviders":[{"name":"NWChemInputDataManager"}],"applicationName":"nwchem","executableName":"nwchem"},{"content":"# ---------------------------------------------------------------- #\n# #\n# Example python script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. runtime directory for this calculation is created #\n# 2. requirements.txt is used to create a virtual environment #\n# 3. virtual environment is activated #\n# 4. python process running this script is started #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\nimport pymatgen as mg\n\nsi = mg.Element(\"Si\")\n\nprint(si.atomic_mass)\n","name":"hello_world.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2\n","name":"requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------------ #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# Please add any packages required for this unit below following #\n# the requirements.txt specification: #\n# https://pip.pypa.io/en/stable/reference/requirements-file-format/ #\n# ------------------------------------------------------------------ #\n","name":"requirements_empty.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# -------------------------------------------------------------------------------\n# This script contains a few helpful commands for basic plotting with matplotlib.\n# The commented out blocks are optional suggestions and included for convenience.\n# -------------------------------------------------------------------------------\nimport matplotlib.pyplot as plt\nimport matplotlib.ticker as ticker\nimport numpy as np\n\n\n# Plot Settings\n# -------------\nfigure_size = (6.4, 4.8) # width, height [inches]\ndpi = 100 # resolution [dots-per-inch]\nfont_size_title = 16 # font size of title\nfont_size_axis = 12 # font size of axis label\nfont_size_tick = 12 # font size of tick label\nfont_size_legend = 14 # font size of legend\nx_axis_label = None # label for x-axis\ny_axis_label = None # label for y-axis\ntitle = None # figure title\nshow_legend = False # whether to show legend\nsave_name = \"plot.pdf\" # output filename (with suffix), e.g. 'plot.pdf'\nx_view_limits = {\"left\": None, \"right\": None} # view limits for x-axis\ny_view_limits = {\"top\": None, \"bottom\": None} # view limits for y-axis\nx_tick_spacing = None # custom tick spacing for x-axis (optional)\ny_tick_spacing = None # custom tick spacing for y-axis (optional)\nx_tick_labels = None # custom tick labels for x-axis (optional)\ny_tick_labels = None # custom tick labels for y-axis (optional)\n\n\n# Figure & axes objects\n# ---------------------\nfig = plt.figure(figsize=figure_size, dpi=dpi)\nax = fig.add_subplot(111)\n\n# Example plot (REPLACE ACCORDINGLY)\n# ------------\nx = np.linspace(0, 7, num=100)\ny = np.sin(x)\nax.plot(x, y, \"g-\", zorder=3)\n\n\n# Help lines\n# ----------\n# ax.axhline(y=0, color=\"0.25\", linewidth=0.6, zorder=1)\n# ax.axvline(x=0, color=\"0.25\", linewidth=0.6, zorder=1)\n\n\n# View limits\n# -----------\nax.set_xlim(**x_view_limits)\nax.set_ylim(**y_view_limits)\n\n\n# Grid lines\n# ----------\n# grid_style = {\n# \"linestyle\" : \"dotted\",\n# \"linewidth\" : 0.6,\n# \"color\" : \"0.25\",\n# }\n# ax.grid(**grid_style)\n\n# Custom tick spacing\n# -------------------\n# ax.xaxis.set_major_locator(ticker.MultipleLocator(x_tick_spacing))\n# ax.yaxis.set_major_locator(ticker.MultipleLocator(y_tick_spacing))\n\n# Custom tick labels\n# ------------------\nif x_tick_labels is not None:\n ax.set_xticklabels(x_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\nif y_tick_labels is not None:\n ax.set_yticklabels(y_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\n\n# Other tick settings\n# -------------------\n# ax.tick_params(axis=\"both\", which=\"major\", labelsize=font_size_tick, direction=\"in\")\n# ax.tick_params(axis=\"x\", which=\"major\", pad=10)\n# ax.tick_params(axis=\"x\", which=\"minor\", bottom=False, top=False)\n\n\n# Axis labels\n# -----------\nif x_axis_label is not None:\n ax.set_xlabel(x_axis_label, size=font_size_axis)\nif y_axis_label is not None:\n ax.set_ylabel(y_axis_label, size=font_size_axis)\n\n# Figure title\n# ------------\nif title is not None:\n ax.set_title(title, fontsize=font_size_title)\n\n# Legend\n# ------\nif show_legend:\n ax.legend(prop={'size': font_size_legend})\n\n# Save figure\n# -----------\nif save_name is not None:\n save_format = save_name.split(\".\")[-1]\n fig.savefig(save_name, format=save_format, bbox_inches=\"tight\")\n","name":"matplotlib_basic.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ---------------------------------------------------------- #\n# #\n# This script extracts q-points and irreducible #\n# representations from Quantum ESPRESSO xml data. #\n# #\n# Expects control_ph.xml and patterns.?.xml files to exist #\n# #\n# ---------------------------------------------------------- #\nfrom __future__ import print_function\nimport json\nfrom xml.dom import minidom\n\n{# JOB_WORK_DIR will be initialized at runtime => avoid substituion below #}\n{%- raw -%}\nCONTROL_PH_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/control_ph.xml\"\nPATTERNS_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/patterns.{}.xml\"\n{%- endraw -%}\n\n# get integer content of an xml tag in a document\ndef get_int_by_tag_name(doc, tag_name):\n element = doc.getElementsByTagName(tag_name)\n return int(element[0].firstChild.nodeValue)\n\nvalues = []\n\n# get number of q-points and cycle through them\nxmldoc = minidom.parse(CONTROL_PH_FILENAME)\nnumber_of_qpoints = get_int_by_tag_name(xmldoc, \"NUMBER_OF_Q_POINTS\")\n\nfor i in range(number_of_qpoints):\n # get number of irreducible representations per qpoint\n xmldoc = minidom.parse(PATTERNS_FILENAME.format(i+1))\n number_of_irr_per_qpoint = get_int_by_tag_name(xmldoc, \"NUMBER_IRR_REP\")\n # add each distinct combination of qpoint and irr as a separate entry\n for j in range(number_of_irr_per_qpoint):\n values.append({\n \"qpoint\": i + 1,\n \"irr\": j + 1\n })\n\n# store final values in standard output (STDOUT)\nprint(json.dumps(values, indent=4))\n","name":"espresso_xml_get_qpt_irr.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"import re\nimport json\n\ndouble_regex = r'[-+]?\\d*\\.\\d+(?:[eE][-+]?\\d+)?'\nregex = r\"\\s+k\\(\\s+\\d*\\)\\s+=\\s+\\(\\s+({0})\\s+({0})\\s+({0})\\),\\s+wk\\s+=\\s+({0}).+?\\n\".format(double_regex)\n\nwith open(\"pw_scf.out\") as f:\n text = f.read()\n\npattern = re.compile(regex, re.I | re.MULTILINE)\nmatch = pattern.findall(text[text.rfind(\" cryst. coord.\"):])\nkpoints = [{\"coordinates\": list(map(float, m[:3])), \"weight\": float(m[3])} for m in match]\nprint(json.dumps({\"name\": \"KPOINTS\", \"value\": kpoints, \"scope\": \"global\"}, indent=4))\n","name":"espresso_extract_kpoints.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------- #\n# This script aims to determine extrema for a given array. #\n# Please adjust the parameters according to your data. #\n# Note: This template expects the array to be defined in the #\n# context as 'array_from_context' (see details below). #\n# ----------------------------------------------------------- #\nimport numpy as np\nfrom scipy.signal import find_peaks\nimport json\nfrom munch import Munch\n\n# Data From Context\n# -----------------\n# The array 'array_from_context' is a 1D list (float or int) that has to be defined in\n# a preceding assignment unit in order to be extracted from the context.\n# Example: [0.0, 1.0, 4.0, 3.0]\n# Upon rendering the following Jinja template the extracted array will be inserted.\n{% raw %}Y = np.array({{array_from_context}}){% endraw %}\n\n# Settings\n# --------\nprominence = 0.3 # required prominence in the unit of the data array\n\n# Find Extrema\n# ------------\nmax_indices, _ = find_peaks(Y, prominence=prominence)\nmin_indices, _ = find_peaks(-1 * Y, prominence=prominence)\n\nresult = {\n \"maxima\": Y[max_indices].tolist(),\n \"minima\": Y[min_indices].tolist(),\n}\n\n# print final values to standard output (STDOUT),\n# so that they can be read by a subsequent assignment unit (using value=STDOUT)\nprint(json.dumps(result, indent=4))\n","name":"find_extrema.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# ----------------------------------------------------------------- #\n\n\nmunch==2.5.0\nnumpy>=1.19.5\nscipy>=1.5.4\nmatplotlib>=3.0.0\n","name":"processing_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# PythonML Package Requirements for use on the Exabyte.io Platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscikit-learn==0.24.1\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2;python_version>=\"3.6\"\n","name":"pyml_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# General settings for PythonML jobs on the Exabyte.io Platform #\n# #\n# This file generally shouldn't be modified directly by users. #\n# The \"datafile\" and \"is_workflow_running_to_predict\" variables #\n# are defined in the head subworkflow, and are templated into #\n# this file. This helps facilitate the workflow's behavior #\n# differing whether it is in a \"train\" or \"predict\" mode. #\n# #\n# Also in this file is the \"Context\" object, which helps maintain #\n# certain Python objects between workflow units, and between #\n# predict runs. #\n# #\n# Whenever a python object needs to be stored for subsequent runs #\n# (such as in the case of a trained model), context.save() can be #\n# called to save it. The object can then be loaded again by using #\n# context.load(). #\n# ----------------------------------------------------------------- #\n\n\nimport pickle, os\n\n# ==================================================\n# Variables modified in the Important Settings menu\n# ==================================================\n# Variables in this section can (and oftentimes need to) be modified by the user in the \"Important Settings\" tab\n# of a workflow.\n\n# Target_column_name is used during training to identify the variable the model is traing to predict.\n# For example, consider a CSV containing three columns, \"Y\", \"X1\", and \"X2\". If the goal is to train a model\n# that will predict the value of \"Y,\" then target_column_name would be set to \"Y\"\ntarget_column_name = \"{{ mlSettings.target_column_name }}\"\n\n# The type of ML problem being performed. Can be either \"regression\", \"classification,\" or \"clustering.\"\nproblem_category = \"{{ mlSettings.problem_category }}\"\n\n# =============================\n# Non user-modifiable variables\n# =============================\n# Variables in this section generally do not need to be modified.\n\n# The problem category, regression or classification or clustering. In regression, the target (predicted) variable\n# is continues. In classification, it is categorical. In clustering, there is no target - a set of labels is\n# automatically generated.\nis_regression = is_classification = is_clustering = False\nif problem_category.lower() == \"regression\":\n is_regression = True\nelif problem_category.lower() == \"classification\":\n is_classification = True\nelif problem_category.lower() == \"clustering\":\n is_clustering = True\nelse:\n raise ValueError(\n \"Variable 'problem_category' must be either 'regression', 'classification', or 'clustering'. Check settings.py\")\n\n# The variables \"is_workflow_running_to_predict\" and \"is_workflow_running_to_train\" are used to control whether\n# the workflow is in a \"training\" mode or a \"prediction\" mode. The \"IS_WORKFLOW_RUNNING_TO_PREDICT\" variable is set by\n# an assignment unit in the \"Set Up the Job\" subworkflow that executes at the start of the job. It is automatically\n# changed when the predict workflow is generated, so users should not need to modify this variable.\nis_workflow_running_to_predict = {% raw %}{{IS_WORKFLOW_RUNNING_TO_PREDICT}}{% endraw %}\nis_workflow_running_to_train = not is_workflow_running_to_predict\n\n# Sets the datafile variable. The \"datafile\" is the data that will be read in, and will be used by subsequent\n# workflow units for either training or prediction, depending on the workflow mode.\nif is_workflow_running_to_predict:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\nelse:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\n\n# The \"Context\" class allows for data to be saved and loaded between units, and between train and predict runs.\n# Variables which have been saved using the \"Save\" method are written to disk, and the predict workflow is automatically\n# configured to obtain these files when it starts.\n#\n# IMPORTANT NOTE: Do *not* adjust the value of \"context_dir_pathname\" in the Context object. If the value is changed, then\n# files will not be correctly copied into the generated predict workflow. This will cause the predict workflow to be\n# generated in a broken state, and it will not be able to make any predictions.\nclass Context(object):\n \"\"\"\n Saves and loads objects from the disk, useful for preserving data between workflow units\n\n Attributes:\n context_paths (dict): Dictionary of the format {variable_name: path}, that governs where\n pickle saves files.\n\n Methods:\n save: Used to save objects to the context directory\n load: Used to load objects from the context directory\n \"\"\"\n\n def __init__(self, context_file_basename=\"workflow_context_file_mapping\"):\n \"\"\"\n Constructor for Context objects\n\n Args:\n context_file_basename (str): Name of the file to store context paths in\n \"\"\"\n\n # Warning: DO NOT modify the context_dir_pathname variable below\n # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n context_dir_pathname = \"{% raw %}{{ CONTEXT_DIR_RELATIVE_PATH }}{% endraw %}\"\n # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n self._context_dir_pathname = context_dir_pathname\n self._context_file = os.path.join(context_dir_pathname, context_file_basename)\n\n # Make context dir if it does not exist\n if not os.path.exists(context_dir_pathname):\n os.makedirs(context_dir_pathname)\n\n # Read in the context sources dictionary, if it exists\n if os.path.exists(self._context_file):\n with open(self._context_file, \"rb\") as file_handle:\n self.context_paths: dict = pickle.load(file_handle)\n else:\n # Items is a dictionary of {varname: path}\n self.context_paths = {}\n\n def __enter__(self):\n return self\n\n def __exit__(self, exc_type, exc_value, traceback):\n self._update_context()\n\n def __contains__(self, item):\n return item in self.context_paths\n\n def _update_context(self):\n with open(self._context_file, \"wb\") as file_handle:\n pickle.dump(self.context_paths, file_handle)\n\n def load(self, name: str):\n \"\"\"\n Returns a contextd object\n\n Args:\n name (str): The name in self.context_paths of the object\n \"\"\"\n path = self.context_paths[name]\n with open(path, \"rb\") as file_handle:\n obj = pickle.load(file_handle)\n return obj\n\n def save(self, obj: object, name: str):\n \"\"\"\n Saves an object to disk using pickle\n\n Args:\n name (str): Friendly name for the object, used for lookup in load() method\n obj (object): Object to store on disk\n \"\"\"\n path = os.path.join(self._context_dir_pathname, f\"{name}.pkl\")\n self.context_paths[name] = path\n with open(path, \"wb\") as file_handle:\n pickle.dump(obj, file_handle)\n self._update_context()\n\n# Generate a context object, so that the \"with settings.context\" can be used by other units in this workflow.\ncontext = Context()\n\nis_using_train_test_split = \"is_using_train_test_split\" in context and (context.load(\"is_using_train_test_split\"))\n\n# Create a Class for a DummyScaler()\nclass DummyScaler:\n \"\"\"\n This class is a 'DummyScaler' which trivially acts on data by returning it unchanged.\n \"\"\"\n\n def fit(self, X):\n return self\n\n def transform(self, X):\n return X\n\n def fit_transform(self, X):\n return X\n\n def inverse_transform(self, X):\n return X\n\nif 'target_scaler' not in context:\n context.save(DummyScaler(), 'target_scaler')\n","name":"pyml_settings.py","contextProviders":[{"name":"MLSettingsDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Custom workflow unit template for the Exabyte.io platform #\n# #\n# This file imports a set of workflow-specific context variables #\n# from settings.py. It then uses a context manager to save and #\n# load Python objects. When saved, these objects can then be #\n# loaded either later in the same workflow, or by subsequent #\n# predict jobs. #\n# #\n# Any pickle-able Python object can be saved using #\n# settings.context. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport settings\n\n# The context manager exists to facilitate\n# saving and loading objects across Python units within a workflow.\n\n# To load an object, simply do to \\`context.load(\"name-of-the-saved-object\")\\`\n# To save an object, simply do \\`context.save(\"name-for-the-object\", object_here)\\`\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Do some transformations to the data here\n\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n descriptors = context.load(\"descriptors\")\n\n # Do some predictions or transformation to the data here\n","name":"pyml_custom.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to read in data for the ML workflow. #\n# #\n# Also showcased here is the concept of branching based on #\n# whether the workflow is in \"train\" or \"predict\" mode. #\n# #\n# If the workflow is in \"training\" mode, it will read in the data #\n# before converting it to a Numpy array and save it for use #\n# later. During training, we already have values for the output, #\n# and this gets saved to \"target.\" #\n# #\n# Finally, whether the workflow is in training or predict mode, #\n# it will always read in a set of descriptors from a datafile #\n# defined in settings.py #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport sklearn.preprocessing\nimport settings\n\nwith settings.context as context:\n data = pandas.read_csv(settings.datafile)\n\n # Train\n # By default, we don't do train/test splitting: the train and test represent the same dataset at first.\n # Other units (such as a train/test splitter) down the line can adjust this as-needed.\n if settings.is_workflow_running_to_train:\n\n # Handle the case where we are clustering\n if settings.is_clustering:\n target = data.to_numpy()[:, 0] # Just get the first column, it's not going to get used anyway\n else:\n target = data.pop(settings.target_column_name).to_numpy()\n\n # Handle the case where we are classifying. In this case, we must convert any labels provided to be categorical.\n # Specifically, labels are encoded with values between 0 and (N_Classes - 1)\n if settings.is_classification:\n label_encoder = sklearn.preprocessing.LabelEncoder()\n target = label_encoder.fit_transform(target)\n context.save(label_encoder, \"label_encoder\")\n\n target = target.reshape(-1, 1) # Reshape array from a row vector into a column vector\n\n context.save(target, \"train_target\")\n context.save(target, \"test_target\")\n\n descriptors = data.to_numpy()\n\n context.save(descriptors, \"train_descriptors\")\n context.save(descriptors, \"test_descriptors\")\n\n else:\n descriptors = data.to_numpy()\n context.save(descriptors, \"descriptors\")\n","name":"data_input_read_csv_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to perform a train/test split #\n# #\n# Splits the dataset into a training and testing set. The #\n# variable `percent_held_as_test` controls how much of the #\n# input dataset is removed for use as a testing set. By default, #\n# this unit puts 20% of the dataset into the testing set, and #\n# places the remaining 80% into the training set. #\n# #\n# Does nothing in the case of predictions. #\n# #\n# ----------------------------------------------------------------- #\n\nimport sklearn.model_selection\nimport numpy as np\nimport settings\n\n# `percent_held_as_test` is the amount of the dataset held out as the testing set. If it is set to 0.2,\n# then 20% of the dataset is held out as a testing set. The remaining 80% is the training set.\npercent_held_as_test = {{ mlTrainTestSplit.fraction_held_as_test_set }}\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Load training data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n\n # Combine datasets to facilitate train/test split\n\n # Do train/test split\n train_descriptors, test_descriptors, train_target, test_target = sklearn.model_selection.train_test_split(\n train_descriptors, train_target, test_size=percent_held_as_test)\n\n # Set the flag for using a train/test split\n context.save(True, \"is_using_train_test_split\")\n\n # Save training data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"data_input_train_test_split_sklearn.py","contextProviders":[{"name":"MLTrainTestSplitDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn MinMax Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it is on interval #\n# [0,1]. It then saves the data for use further down #\n# the road in the workflow, for use in un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the min and max of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor MinMax Scaler\n scaler = sklearn.preprocessing.MinMaxScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_min_max_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Duplicates workflow unit #\n# #\n# This workflow unit drops all duplicate rows, if it is running #\n# in the \"train\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop duplicates from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n df = df.drop_duplicates()\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop duplicates from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.drop_duplicates()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_duplicates_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Missing Workflow Unit #\n# #\n# This workflow unit allows missing rows and/or columns to be #\n# dropped from the dataset by configuring the `to_drop` #\n# parameter. #\n# #\n# Valid values for `to_drop`: #\n# - \"rows\": rows with missing values will be removed #\n# - \"columns\": columns with missing values will be removed #\n# - \"both\": rows and columns with missing values will be removed #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\n# `to_drop` can either be \"rows\" or \"columns\"\n# If it is set to \"rows\" (by default), then all rows with missing values will be dropped.\n# If it is set to \"columns\", then all columns with missing values will be dropped.\n# If it is set to \"both\", then all rows and columns with missing values will be dropped.\nto_drop = \"rows\"\n\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop missing from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n\n directions = {\n \"rows\": (\"index\",),\n \"columns\": (\"columns\",),\n \"both\": (\"index\", \"columns\"),\n }[to_drop]\n for direction in directions:\n df = df.dropna(direction)\n\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop missing from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.dropna()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_missing_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn Standard Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it a mean of 0 and #\n# a standard deviation of 1. It then saves the data for use #\n# further down the road in the workflow, for use in #\n# un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the mean and variance of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor Scaler\n scaler = sklearn.preprocessing.StandardScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_standardization_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge-regression model in Scikit-Learn. #\n# Alpha is taken from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.AdaBoostRegressor(\n n_estimators=50,\n learning_rate=1,\n loss=\"linear\",\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"adaboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"adaboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_adaboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a bagged trees regression model with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.BaggingRegressor(\n n_estimators=10,\n max_samples=1.0,\n max_features=1.0,\n bootstrap=True,\n bootstrap_features=False,\n oob_score=False,\n verbose=0,\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"bagged_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"bagged_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_bagged_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for gradient-boosted tree regression with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. Note: In the gradient- #\n# boosted trees ensemble used, the weak learners used as #\n# estimators cannot be tuned with the same level of fidelity #\n# allowed in the adaptive-boosted trees ensemble. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.GradientBoostingRegressor(\n loss=\"ls\",\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion=\"friedman_mse\",\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n max_features=None,\n alpha=0.9,\n verbose=0,\n max_leaf_nodes=None,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees regression #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBRegressor(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='reg:squarederror',\n eval_metric='rmse')\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_regression\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_regression\")\n\n # Make some predictions and unscale\n predictions = model.predict(descriptors)\n predictions = predictions.reshape(-1, 1)\n target_scaler = context.load(\"target_scaler\")\n\n predictions = target_scaler.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\")\n","name":"model_extreme_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for k-means clustering. #\n# #\n# In k-means clustering, the labels are not provided ahead of #\n# time. Instead, one supplies the number of groups the #\n# algorithm should split the dataset into. Here, we set our #\n# own default of 4 groups (fewer than sklearn's default of 8). #\n# Otherwise, the default parameters of the clustering method #\n# are the same as in sklearn. #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.cluster\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Initialize the Model\n model = sklearn.cluster.KMeans(\n n_clusters=4,\n init=\"k-means++\",\n n_init=10,\n max_iter=300,\n tol=0.0001,\n copy_x=True,\n algorithm=\"auto\",\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors)\n context.save(model, \"k_means\")\n train_labels = model.predict(train_descriptors)\n test_labels = model.predict(test_descriptors)\n\n context.save(train_labels, \"train_labels\")\n context.save(test_labels, \"test_labels\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"k_means\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_k_means_clustering_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a kernelized ridge-regression model with #\n# Scikit-Learn. Model parameters are derived from Scikit- #\n# Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.kernel_ridge\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.kernel_ridge.KernelRidge(\n alpha=1.0,\n kernel=\"linear\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"kernel_ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"kernel_ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_kernel_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a LASSO-regression model with Scikit- #\n# Learn. Model parameters derived from Scikit-Learn's #\n# Defaults. Alpha has been lowered from the default of 1.0, to #\n# 0.1. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Lasso(\n alpha=0.1,\n fit_intercept=True,\n normalize=False,\n precompute=False,\n tol=0.0001,\n positive=True,\n selection=\"cyclic\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"LASSO\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"LASSO\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_lasso_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit to train a simple feedforward neural network #\n# model on a regression problem using scikit-learn. In this #\n# template, we use the default values for hidden_layer_sizes, #\n# activation, solver, and learning rate. Other parameters are #\n# available (consult the sklearn docs), but in this case, we #\n# only include those relevant to the Adam optimizer. Sklearn #\n# Docs: Sklearn docs:http://scikit-learn.org/stable/modules/ge #\n# nerated/sklearn.neural_network.MLPRegressor.html #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.neural_network\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.neural_network.MLPRegressor(\n hidden_layer_sizes=(100,),\n activation=\"relu\",\n solver=\"adam\",\n max_iter=300,\n early_stopping=False,\n validation_fraction=0.1,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"multilayer_perceptron\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"multilayer_perceptron\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_mlp_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a random forest classification model with #\n# Scikit-Learn. Parameters derived from Scikit-Learn's #\n# defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. When #\n# the workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a filee named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestClassifier(\n n_estimators=100,\n criterion=\"gini\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n oob_score=False,\n verbose=0,\n class_weight=None,\n ccp_alpha=0.0,\n max_samples=None,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target, test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for a gradient boosted classification model with #\n# Scikit-Learn. Parameters derived from sklearn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = sklearn.ensemble.GradientBoostingClassifier(loss='deviance',\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion='friedman_mse',\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n min_impurity_split=None,\n init=None,\n random_state=None,\n max_features=None,\n verbose=0,\n max_leaf_nodes=None,\n warm_start=False,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees classification #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBClassifier(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='binary:logistic',\n eval_metric='logloss',\n use_label_encoder=False)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_extreme_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow for a random forest regression model with Scikit- #\n# Learn. Parameters are derived from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestRegressor(\n n_estimators=100,\n criterion=\"mse\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n max_samples=None,\n oob_score=False,\n ccp_alpha=0.0,\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge regression model with Scikit- #\n# Learn. Alpha is taken from Scikit-Learn's default #\n# parameters. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Ridge(\n alpha=1.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Parity plot generation unit #\n# #\n# This unit generates a parity plot based on the known values #\n# in the training data, and the predicted values generated #\n# using the training data. #\n# #\n# Because this metric compares predictions versus a ground truth, #\n# it doesn't make sense to generate the plot when a predict #\n# workflow is being run (because in that case, we generally don't #\n# know the ground truth for the values being predicted). Hence, #\n# this unit does nothing if the workflow is in \"predict\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_predictions = context.load(\"train_predictions\")\n test_target = context.load(\"test_target\")\n test_predictions = context.load(\"test_predictions\")\n\n # Un-transform the data\n target_scaler = context.load(\"target_scaler\")\n train_target = target_scaler.inverse_transform(train_target)\n train_predictions = target_scaler.inverse_transform(train_predictions)\n test_target = target_scaler.inverse_transform(test_target)\n test_predictions = target_scaler.inverse_transform(test_predictions)\n\n # Plot the data\n plt.scatter(train_target, train_predictions, c=\"#203d78\", label=\"Training Set\")\n if settings.is_using_train_test_split:\n plt.scatter(test_target, test_predictions, c=\"#67ac5b\", label=\"Testing Set\")\n plt.xlabel(\"Actual Value\")\n plt.ylabel(\"Predicted Value\")\n\n # Scale the plot\n target_range = (min(min(train_target), min(test_target)),\n max(max(train_target), max(test_target)))\n predictions_range = (min(min(train_predictions), min(test_predictions)),\n max(max(train_predictions), max(test_predictions)))\n\n limits = (min(min(target_range), min(target_range)),\n max(max(predictions_range), max(predictions_range)))\n plt.xlim = (limits[0], limits[1])\n plt.ylim = (limits[0], limits[1])\n\n # Draw a parity line, as a guide to the eye\n plt.plot((limits[0], limits[1]), (limits[0], limits[1]), c=\"black\", linestyle=\"dotted\", label=\"Parity\")\n plt.legend()\n\n # Save the figure\n plt.tight_layout()\n plt.savefig(\"my_parity_plot.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_parity_plot_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Cluster Visualization #\n# #\n# This unit takes an N-dimensional feature space, and uses #\n# Principal-component Analysis (PCA) to project into a 2D space #\n# to facilitate plotting on a scatter plot. #\n# #\n# The 2D space we project into are the first two principal #\n# components identified in PCA, which are the two vectors with #\n# the highest variance. #\n# #\n# Wikipedia Article on PCA: #\n# https://en.wikipedia.org/wiki/Principal_component_analysis #\n# #\n# We then plot the labels assigned to the train an test set, #\n# and color by class. #\n# #\n# ----------------------------------------------------------------- #\n\nimport pandas as pd\nimport matplotlib.cm\nimport matplotlib.lines\nimport matplotlib.pyplot as plt\nimport sklearn.decomposition\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_labels = context.load(\"train_labels\")\n train_descriptors = context.load(\"train_descriptors\")\n test_labels = context.load(\"test_labels\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Unscale the descriptors\n descriptor_scaler = context.load(\"descriptor_scaler\")\n train_descriptors = descriptor_scaler.inverse_transform(train_descriptors)\n test_descriptors = descriptor_scaler.inverse_transform(test_descriptors)\n\n # We need at least 2 dimensions, exit if the dataset is 1D\n if train_descriptors.ndim < 2:\n raise ValueError(\"The train descriptors do not have enough dimensions to be plot in 2D\")\n\n # The data could be multidimensional. Let's do some PCA to get things into 2 dimensions.\n pca = sklearn.decomposition.PCA(n_components=2)\n train_descriptors = pca.fit_transform(train_descriptors)\n test_descriptors = pca.transform(test_descriptors)\n xlabel = \"Principle Component 1\"\n ylabel = \"Principle Component 2\"\n\n # Determine the labels we're going to be using, and generate their colors\n labels = set(train_labels)\n colors = {}\n for count, label in enumerate(labels):\n cm = matplotlib.cm.get_cmap('jet', len(labels))\n color = cm(count / len(labels))\n colors[label] = color\n train_colors = [colors[label] for label in train_labels]\n test_colors = [colors[label] for label in test_labels]\n\n # Train / Test Split Visualization\n plt.title(\"Train Test Split Visualization\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=\"#33548c\", marker=\"o\", label=\"Training Set\")\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=\"#F0B332\", marker=\"o\", label=\"Testing Set\")\n xmin, xmax, ymin, ymax = plt.axis()\n plt.legend()\n plt.tight_layout()\n plt.savefig(\"train_test_split.png\", dpi=600)\n plt.close()\n\n def clusters_legend(cluster_colors):\n \"\"\"\n Helper function that creates a legend, given the coloration by clusters.\n Args:\n cluster_colors: A dictionary of the form {cluster_number : color_value}\n\n Returns:\n None; just creates the legend and puts it on the plot\n \"\"\"\n legend_symbols = []\n for group, color in cluster_colors.items():\n label = f\"Cluster {group}\"\n legend_symbols.append(matplotlib.lines.Line2D([], [], color=color, marker=\"o\",\n linewidth=0, label=label))\n plt.legend(handles=legend_symbols)\n\n # Training Set Clusters\n plt.title(\"Training Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=train_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"train_clusters.png\", dpi=600)\n plt.close()\n\n # Testing Set Clusters\n plt.title(\"Testing Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=test_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"test_clusters.png\", dpi=600)\n plt.close()\n\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_pca_2d_clusters_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# ROC Curve Generator #\n# #\n# Computes and displays the Receiver Operating Characteristic #\n# (ROC) curve. This is restricted to binary classification tasks. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\nimport matplotlib.collections\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n test_target = context.load(\"test_target\").flatten()\n # Slice the first column because Sklearn's ROC curve prefers probabilities for the positive class\n test_probabilities = context.load(\"test_probabilities\")[:, 1]\n\n # Exit if there's more than one label in the predictions\n if len(set(test_target)) > 2:\n exit()\n\n # ROC curve function in sklearn prefers the positive class\n false_positive_rate, true_positive_rate, thresholds = sklearn.metrics.roc_curve(test_target, test_probabilities,\n pos_label=1)\n thresholds[0] -= 1 # Sklearn arbitrarily adds 1 to the first threshold\n roc_auc = np.round(sklearn.metrics.auc(false_positive_rate, true_positive_rate), 3)\n\n # Plot the curve\n fig, ax = plt.subplots()\n points = np.array([false_positive_rate, true_positive_rate]).T.reshape(-1, 1, 2)\n segments = np.concatenate([points[:-1], points[1:]], axis=1)\n norm = plt.Normalize(thresholds.min(), thresholds.max())\n lc = matplotlib.collections.LineCollection(segments, cmap='jet', norm=norm, linewidths=2)\n lc.set_array(thresholds)\n line = ax.add_collection(lc)\n fig.colorbar(line, ax=ax).set_label('Threshold')\n\n # Padding to ensure we see the line\n ax.margins(0.01)\n\n plt.title(f\"ROC curve, AUC={roc_auc}\")\n plt.xlabel(\"False Positive Rate\")\n plt.ylabel(\"True Positive Rate\")\n plt.tight_layout()\n plt.savefig(\"my_roc_curve.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_roc_curve_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"#!/bin/bash\n# ---------------------------------------------------------------- #\n# #\n# Example shell script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. shebang line is read from the first line above #\n# 2. based on shebang one of the shell types is selected: #\n# - /bin/bash #\n# - /bin/csh #\n# - /bin/tclsh #\n# - /bin/tcsh #\n# - /bin/zsh #\n# 3. runtime directory for this calculation is created #\n# 4. the content of the script is executed #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\necho \"Hello world!\"\n","name":"hello_world.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ---------------------------------------------------------------- #\n# #\n# Example job submission script for Exabyte.io platform #\n# #\n# Shows resource manager directives for: #\n# #\n# 1. the name of the job (-N) #\n# 2. the number of nodes to be used (-l nodes=) #\n# 3. the number of processors per node (-l ppn=) #\n# 4. the walltime in dd:hh:mm:ss format (-l walltime=) #\n# 5. queue (-q) D, OR, OF, SR, SF #\n# 6. merging standard output and error (-j oe) #\n# 7. email about job abort, begin, end (-m abe) #\n# 8. email address to use (-M) #\n# #\n# For more information visit https://docs.exabyte.io/cli/jobs #\n# ---------------------------------------------------------------- #\n\n#PBS -N ESPRESSO-TEST\n#PBS -j oe\n#PBS -l nodes=1\n#PBS -l ppn=1\n#PBS -l walltime=00:00:10:00\n#PBS -q D\n#PBS -m abe\n#PBS -M info@exabyte.io\n\n# load module\nmodule add espresso/540-i-174-impi-044\n\n# go to the job working directory\ncd $PBS_O_WORKDIR\n\n# create input file\ncat > pw.in < pw.out\n","name":"job_espresso_pw_scf.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\nmkdir -p {{ JOB_SCRATCH_DIR }}/outdir/_ph0\ncd {{ JOB_SCRATCH_DIR }}/outdir\ncp -r {{ JOB_WORK_DIR }}/../outdir/__prefix__.* .\n{%- endraw -%}\n","name":"espresso_link_outdir_save.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\ncp {{ JOB_SCRATCH_DIR }}/outdir/_ph0/__prefix__.phsave/dynmat* {{ JOB_WORK_DIR }}/../outdir/_ph0/__prefix__.phsave\n{%- endraw -%}\n","name":"espresso_collect_dynmat.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ------------------------------------------------------------------ #\n# This script prepares necessary directories to run VASP NEB\n# calculation. It puts initial POSCAR into directory 00, final into 0N\n# and intermediate images in 01 to 0(N-1). It is assumed that SCF\n# calculations for initial and final structures are already done in\n# previous subworkflows and their standard outputs are written into\n# \"vasp_neb_initial.out\" and \"vasp_neb_final.out\" files respectively.\n# These outputs are here copied into initial (00) and final (0N)\n# directories to calculate the reaction energy profile.\n# ------------------------------------------------------------------ #\n\n{% raw -%}\ncd {{ JOB_WORK_DIR }}\n{%- endraw %}\n\n# Prepare First Directory\nmkdir -p 00\ncat > 00/POSCAR < 0{{ input.INTERMEDIATE_IMAGES.length + 1 }}/POSCAR < 0{{ loop.index }}/POSCAR <=6.2.0\nexabyte-api-client>=2020.10.19\nnumpy>=1.17.3\npandas>=1.1.4\nurllib3<2\n","name":"requirements.txt","contextProviders":[],"applicationName":"jupyterLab","executableName":"jupyter"},{"content":" start nwchem\n title \"Test\"\n charge {{ input.CHARGE }}\n geometry units au noautosym\n {{ input.ATOMIC_POSITIONS }}\n end\n basis\n * library {{ input.BASIS }}\n end\n dft\n xc {{ input.FUNCTIONAL }}\n mult {{ input.MULT }}\n end\n task dft energy\n","name":"nwchem_total_energy.inp","contextProviders":[{"name":"NWChemInputDataManager"}],"applicationName":"nwchem","executableName":"nwchem"},{"content":"# ---------------------------------------------------------------- #\n# #\n# Example python script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. runtime directory for this calculation is created #\n# 2. requirements.txt is used to create a virtual environment #\n# 3. virtual environment is activated #\n# 4. python process running this script is started #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\nimport pymatgen as mg\n\nsi = mg.Element(\"Si\")\n\nprint(si.atomic_mass)\n","name":"hello_world.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2\n","name":"requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------------ #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# Please add any packages required for this unit below following #\n# the requirements.txt specification: #\n# https://pip.pypa.io/en/stable/reference/requirements-file-format/ #\n# ------------------------------------------------------------------ #\n","name":"requirements_empty.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# -------------------------------------------------------------------------------\n# This script contains a few helpful commands for basic plotting with matplotlib.\n# The commented out blocks are optional suggestions and included for convenience.\n# -------------------------------------------------------------------------------\nimport matplotlib.pyplot as plt\nimport matplotlib.ticker as ticker\nimport numpy as np\n\n\n# Plot Settings\n# -------------\nfigure_size = (6.4, 4.8) # width, height [inches]\ndpi = 100 # resolution [dots-per-inch]\nfont_size_title = 16 # font size of title\nfont_size_axis = 12 # font size of axis label\nfont_size_tick = 12 # font size of tick label\nfont_size_legend = 14 # font size of legend\nx_axis_label = None # label for x-axis\ny_axis_label = None # label for y-axis\ntitle = None # figure title\nshow_legend = False # whether to show legend\nsave_name = \"plot.pdf\" # output filename (with suffix), e.g. 'plot.pdf'\nx_view_limits = {\"left\": None, \"right\": None} # view limits for x-axis\ny_view_limits = {\"top\": None, \"bottom\": None} # view limits for y-axis\nx_tick_spacing = None # custom tick spacing for x-axis (optional)\ny_tick_spacing = None # custom tick spacing for y-axis (optional)\nx_tick_labels = None # custom tick labels for x-axis (optional)\ny_tick_labels = None # custom tick labels for y-axis (optional)\n\n\n# Figure & axes objects\n# ---------------------\nfig = plt.figure(figsize=figure_size, dpi=dpi)\nax = fig.add_subplot(111)\n\n# Example plot (REPLACE ACCORDINGLY)\n# ------------\nx = np.linspace(0, 7, num=100)\ny = np.sin(x)\nax.plot(x, y, \"g-\", zorder=3)\n\n\n# Help lines\n# ----------\n# ax.axhline(y=0, color=\"0.25\", linewidth=0.6, zorder=1)\n# ax.axvline(x=0, color=\"0.25\", linewidth=0.6, zorder=1)\n\n\n# View limits\n# -----------\nax.set_xlim(**x_view_limits)\nax.set_ylim(**y_view_limits)\n\n\n# Grid lines\n# ----------\n# grid_style = {\n# \"linestyle\" : \"dotted\",\n# \"linewidth\" : 0.6,\n# \"color\" : \"0.25\",\n# }\n# ax.grid(**grid_style)\n\n# Custom tick spacing\n# -------------------\n# ax.xaxis.set_major_locator(ticker.MultipleLocator(x_tick_spacing))\n# ax.yaxis.set_major_locator(ticker.MultipleLocator(y_tick_spacing))\n\n# Custom tick labels\n# ------------------\nif x_tick_labels is not None:\n ax.set_xticklabels(x_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\nif y_tick_labels is not None:\n ax.set_yticklabels(y_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\n\n# Other tick settings\n# -------------------\n# ax.tick_params(axis=\"both\", which=\"major\", labelsize=font_size_tick, direction=\"in\")\n# ax.tick_params(axis=\"x\", which=\"major\", pad=10)\n# ax.tick_params(axis=\"x\", which=\"minor\", bottom=False, top=False)\n\n\n# Axis labels\n# -----------\nif x_axis_label is not None:\n ax.set_xlabel(x_axis_label, size=font_size_axis)\nif y_axis_label is not None:\n ax.set_ylabel(y_axis_label, size=font_size_axis)\n\n# Figure title\n# ------------\nif title is not None:\n ax.set_title(title, fontsize=font_size_title)\n\n# Legend\n# ------\nif show_legend:\n ax.legend(prop={'size': font_size_legend})\n\n# Save figure\n# -----------\nif save_name is not None:\n save_format = save_name.split(\".\")[-1]\n fig.savefig(save_name, format=save_format, bbox_inches=\"tight\")\n","name":"matplotlib_basic.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ---------------------------------------------------------- #\n# #\n# This script extracts q-points and irreducible #\n# representations from Quantum ESPRESSO xml data. #\n# #\n# Expects control_ph.xml and patterns.?.xml files to exist #\n# #\n# ---------------------------------------------------------- #\nfrom __future__ import print_function\nimport json\nfrom xml.dom import minidom\n\n{# JOB_WORK_DIR will be initialized at runtime => avoid substituion below #}\n{%- raw -%}\nCONTROL_PH_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/control_ph.xml\"\nPATTERNS_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/patterns.{}.xml\"\n{%- endraw -%}\n\n# get integer content of an xml tag in a document\ndef get_int_by_tag_name(doc, tag_name):\n element = doc.getElementsByTagName(tag_name)\n return int(element[0].firstChild.nodeValue)\n\nvalues = []\n\n# get number of q-points and cycle through them\nxmldoc = minidom.parse(CONTROL_PH_FILENAME)\nnumber_of_qpoints = get_int_by_tag_name(xmldoc, \"NUMBER_OF_Q_POINTS\")\n\nfor i in range(number_of_qpoints):\n # get number of irreducible representations per qpoint\n xmldoc = minidom.parse(PATTERNS_FILENAME.format(i+1))\n number_of_irr_per_qpoint = get_int_by_tag_name(xmldoc, \"NUMBER_IRR_REP\")\n # add each distinct combination of qpoint and irr as a separate entry\n for j in range(number_of_irr_per_qpoint):\n values.append({\n \"qpoint\": i + 1,\n \"irr\": j + 1\n })\n\n# store final values in standard output (STDOUT)\nprint(json.dumps(values, indent=4))\n","name":"espresso_xml_get_qpt_irr.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"import re\nimport json\n\ndouble_regex = r'[-+]?\\d*\\.\\d+(?:[eE][-+]?\\d+)?'\nregex = r\"\\s+k\\(\\s+\\d*\\)\\s+=\\s+\\(\\s+({0})\\s+({0})\\s+({0})\\),\\s+wk\\s+=\\s+({0}).+?\\n\".format(double_regex)\n\nwith open(\"pw_scf.out\") as f:\n text = f.read()\n\npattern = re.compile(regex, re.I | re.MULTILINE)\nmatch = pattern.findall(text[text.rfind(\" cryst. coord.\"):])\nkpoints = [{\"coordinates\": list(map(float, m[:3])), \"weight\": float(m[3])} for m in match]\nprint(json.dumps({\"name\": \"KPOINTS\", \"value\": kpoints, \"scope\": \"global\"}, indent=4))\n","name":"espresso_extract_kpoints.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------- #\n# This script aims to determine extrema for a given array. #\n# Please adjust the parameters according to your data. #\n# Note: This template expects the array to be defined in the #\n# context as 'array_from_context' (see details below). #\n# ----------------------------------------------------------- #\nimport numpy as np\nfrom scipy.signal import find_peaks\nimport json\nfrom munch import Munch\n\n# Data From Context\n# -----------------\n# The array 'array_from_context' is a 1D list (float or int) that has to be defined in\n# a preceding assignment unit in order to be extracted from the context.\n# Example: [0.0, 1.0, 4.0, 3.0]\n# Upon rendering the following Jinja template the extracted array will be inserted.\n{% raw %}Y = np.array({{array_from_context}}){% endraw %}\n\n# Settings\n# --------\nprominence = 0.3 # required prominence in the unit of the data array\n\n# Find Extrema\n# ------------\nmax_indices, _ = find_peaks(Y, prominence=prominence)\nmin_indices, _ = find_peaks(-1 * Y, prominence=prominence)\n\nresult = {\n \"maxima\": Y[max_indices].tolist(),\n \"minima\": Y[min_indices].tolist(),\n}\n\n# print final values to standard output (STDOUT),\n# so that they can be read by a subsequent assignment unit (using value=STDOUT)\nprint(json.dumps(result, indent=4))\n","name":"find_extrema.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# ----------------------------------------------------------------- #\n\n\nmunch==2.5.0\nnumpy>=1.19.5\nscipy>=1.5.4\nmatplotlib>=3.0.0\n","name":"processing_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# PythonML Package Requirements for use on the Exabyte.io Platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscikit-learn==0.24.1\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2;python_version>=\"3.6\"\n","name":"pyml_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# General settings for PythonML jobs on the Exabyte.io Platform #\n# #\n# This file generally shouldn't be modified directly by users. #\n# The \"datafile\" and \"is_workflow_running_to_predict\" variables #\n# are defined in the head subworkflow, and are templated into #\n# this file. This helps facilitate the workflow's behavior #\n# differing whether it is in a \"train\" or \"predict\" mode. #\n# #\n# Also in this file is the \"Context\" object, which helps maintain #\n# certain Python objects between workflow units, and between #\n# predict runs. #\n# #\n# Whenever a python object needs to be stored for subsequent runs #\n# (such as in the case of a trained model), context.save() can be #\n# called to save it. The object can then be loaded again by using #\n# context.load(). #\n# ----------------------------------------------------------------- #\n\n\nimport pickle, os\n\n# ==================================================\n# Variables modified in the Important Settings menu\n# ==================================================\n# Variables in this section can (and oftentimes need to) be modified by the user in the \"Important Settings\" tab\n# of a workflow.\n\n# Target_column_name is used during training to identify the variable the model is traing to predict.\n# For example, consider a CSV containing three columns, \"Y\", \"X1\", and \"X2\". If the goal is to train a model\n# that will predict the value of \"Y,\" then target_column_name would be set to \"Y\"\ntarget_column_name = \"{{ mlSettings.target_column_name }}\"\n\n# The type of ML problem being performed. Can be either \"regression\", \"classification,\" or \"clustering.\"\nproblem_category = \"{{ mlSettings.problem_category }}\"\n\n# =============================\n# Non user-modifiable variables\n# =============================\n# Variables in this section generally do not need to be modified.\n\n# The problem category, regression or classification or clustering. In regression, the target (predicted) variable\n# is continues. In classification, it is categorical. In clustering, there is no target - a set of labels is\n# automatically generated.\nis_regression = is_classification = is_clustering = False\nif problem_category.lower() == \"regression\":\n is_regression = True\nelif problem_category.lower() == \"classification\":\n is_classification = True\nelif problem_category.lower() == \"clustering\":\n is_clustering = True\nelse:\n raise ValueError(\n \"Variable 'problem_category' must be either 'regression', 'classification', or 'clustering'. Check settings.py\")\n\n# The variables \"is_workflow_running_to_predict\" and \"is_workflow_running_to_train\" are used to control whether\n# the workflow is in a \"training\" mode or a \"prediction\" mode. The \"IS_WORKFLOW_RUNNING_TO_PREDICT\" variable is set by\n# an assignment unit in the \"Set Up the Job\" subworkflow that executes at the start of the job. It is automatically\n# changed when the predict workflow is generated, so users should not need to modify this variable.\nis_workflow_running_to_predict = {% raw %}{{IS_WORKFLOW_RUNNING_TO_PREDICT}}{% endraw %}\nis_workflow_running_to_train = not is_workflow_running_to_predict\n\n# Sets the datafile variable. The \"datafile\" is the data that will be read in, and will be used by subsequent\n# workflow units for either training or prediction, depending on the workflow mode.\nif is_workflow_running_to_predict:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\nelse:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\n\n# The \"Context\" class allows for data to be saved and loaded between units, and between train and predict runs.\n# Variables which have been saved using the \"Save\" method are written to disk, and the predict workflow is automatically\n# configured to obtain these files when it starts.\n#\n# IMPORTANT NOTE: Do *not* adjust the value of \"context_dir_pathname\" in the Context object. If the value is changed, then\n# files will not be correctly copied into the generated predict workflow. This will cause the predict workflow to be\n# generated in a broken state, and it will not be able to make any predictions.\nclass Context(object):\n \"\"\"\n Saves and loads objects from the disk, useful for preserving data between workflow units\n\n Attributes:\n context_paths (dict): Dictionary of the format {variable_name: path}, that governs where\n pickle saves files.\n\n Methods:\n save: Used to save objects to the context directory\n load: Used to load objects from the context directory\n \"\"\"\n\n def __init__(self, context_file_basename=\"workflow_context_file_mapping\"):\n \"\"\"\n Constructor for Context objects\n\n Args:\n context_file_basename (str): Name of the file to store context paths in\n \"\"\"\n\n # Warning: DO NOT modify the context_dir_pathname variable below\n # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n context_dir_pathname = \"{% raw %}{{ CONTEXT_DIR_RELATIVE_PATH }}{% endraw %}\"\n # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n self._context_dir_pathname = context_dir_pathname\n self._context_file = os.path.join(context_dir_pathname, context_file_basename)\n\n # Make context dir if it does not exist\n if not os.path.exists(context_dir_pathname):\n os.makedirs(context_dir_pathname)\n\n # Read in the context sources dictionary, if it exists\n if os.path.exists(self._context_file):\n with open(self._context_file, \"rb\") as file_handle:\n self.context_paths: dict = pickle.load(file_handle)\n else:\n # Items is a dictionary of {varname: path}\n self.context_paths = {}\n\n def __enter__(self):\n return self\n\n def __exit__(self, exc_type, exc_value, traceback):\n self._update_context()\n\n def __contains__(self, item):\n return item in self.context_paths\n\n def _update_context(self):\n with open(self._context_file, \"wb\") as file_handle:\n pickle.dump(self.context_paths, file_handle)\n\n def load(self, name: str):\n \"\"\"\n Returns a contextd object\n\n Args:\n name (str): The name in self.context_paths of the object\n \"\"\"\n path = self.context_paths[name]\n with open(path, \"rb\") as file_handle:\n obj = pickle.load(file_handle)\n return obj\n\n def save(self, obj: object, name: str):\n \"\"\"\n Saves an object to disk using pickle\n\n Args:\n name (str): Friendly name for the object, used for lookup in load() method\n obj (object): Object to store on disk\n \"\"\"\n path = os.path.join(self._context_dir_pathname, f\"{name}.pkl\")\n self.context_paths[name] = path\n with open(path, \"wb\") as file_handle:\n pickle.dump(obj, file_handle)\n self._update_context()\n\n# Generate a context object, so that the \"with settings.context\" can be used by other units in this workflow.\ncontext = Context()\n\nis_using_train_test_split = \"is_using_train_test_split\" in context and (context.load(\"is_using_train_test_split\"))\n\n# Create a Class for a DummyScaler()\nclass DummyScaler:\n \"\"\"\n This class is a 'DummyScaler' which trivially acts on data by returning it unchanged.\n \"\"\"\n\n def fit(self, X):\n return self\n\n def transform(self, X):\n return X\n\n def fit_transform(self, X):\n return X\n\n def inverse_transform(self, X):\n return X\n\nif 'target_scaler' not in context:\n context.save(DummyScaler(), 'target_scaler')\n","name":"pyml_settings.py","contextProviders":[{"name":"MLSettingsDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Custom workflow unit template for the Exabyte.io platform #\n# #\n# This file imports a set of workflow-specific context variables #\n# from settings.py. It then uses a context manager to save and #\n# load Python objects. When saved, these objects can then be #\n# loaded either later in the same workflow, or by subsequent #\n# predict jobs. #\n# #\n# Any pickle-able Python object can be saved using #\n# settings.context. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport settings\n\n# The context manager exists to facilitate\n# saving and loading objects across Python units within a workflow.\n\n# To load an object, simply do to \\`context.load(\"name-of-the-saved-object\")\\`\n# To save an object, simply do \\`context.save(\"name-for-the-object\", object_here)\\`\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Do some transformations to the data here\n\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n descriptors = context.load(\"descriptors\")\n\n # Do some predictions or transformation to the data here\n","name":"pyml_custom.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to read in data for the ML workflow. #\n# #\n# Also showcased here is the concept of branching based on #\n# whether the workflow is in \"train\" or \"predict\" mode. #\n# #\n# If the workflow is in \"training\" mode, it will read in the data #\n# before converting it to a Numpy array and save it for use #\n# later. During training, we already have values for the output, #\n# and this gets saved to \"target.\" #\n# #\n# Finally, whether the workflow is in training or predict mode, #\n# it will always read in a set of descriptors from a datafile #\n# defined in settings.py #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport sklearn.preprocessing\nimport settings\n\nwith settings.context as context:\n data = pandas.read_csv(settings.datafile)\n\n # Train\n # By default, we don't do train/test splitting: the train and test represent the same dataset at first.\n # Other units (such as a train/test splitter) down the line can adjust this as-needed.\n if settings.is_workflow_running_to_train:\n\n # Handle the case where we are clustering\n if settings.is_clustering:\n target = data.to_numpy()[:, 0] # Just get the first column, it's not going to get used anyway\n else:\n target = data.pop(settings.target_column_name).to_numpy()\n\n # Handle the case where we are classifying. In this case, we must convert any labels provided to be categorical.\n # Specifically, labels are encoded with values between 0 and (N_Classes - 1)\n if settings.is_classification:\n label_encoder = sklearn.preprocessing.LabelEncoder()\n target = label_encoder.fit_transform(target)\n context.save(label_encoder, \"label_encoder\")\n\n target = target.reshape(-1, 1) # Reshape array from a row vector into a column vector\n\n context.save(target, \"train_target\")\n context.save(target, \"test_target\")\n\n descriptors = data.to_numpy()\n\n context.save(descriptors, \"train_descriptors\")\n context.save(descriptors, \"test_descriptors\")\n\n else:\n descriptors = data.to_numpy()\n context.save(descriptors, \"descriptors\")\n","name":"data_input_read_csv_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to perform a train/test split #\n# #\n# Splits the dataset into a training and testing set. The #\n# variable `percent_held_as_test` controls how much of the #\n# input dataset is removed for use as a testing set. By default, #\n# this unit puts 20% of the dataset into the testing set, and #\n# places the remaining 80% into the training set. #\n# #\n# Does nothing in the case of predictions. #\n# #\n# ----------------------------------------------------------------- #\n\nimport sklearn.model_selection\nimport numpy as np\nimport settings\n\n# `percent_held_as_test` is the amount of the dataset held out as the testing set. If it is set to 0.2,\n# then 20% of the dataset is held out as a testing set. The remaining 80% is the training set.\npercent_held_as_test = {{ mlTrainTestSplit.fraction_held_as_test_set }}\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Load training data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n\n # Combine datasets to facilitate train/test split\n\n # Do train/test split\n train_descriptors, test_descriptors, train_target, test_target = sklearn.model_selection.train_test_split(\n train_descriptors, train_target, test_size=percent_held_as_test)\n\n # Set the flag for using a train/test split\n context.save(True, \"is_using_train_test_split\")\n\n # Save training data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"data_input_train_test_split_sklearn.py","contextProviders":[{"name":"MLTrainTestSplitDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn MinMax Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it is on interval #\n# [0,1]. It then saves the data for use further down #\n# the road in the workflow, for use in un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the min and max of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor MinMax Scaler\n scaler = sklearn.preprocessing.MinMaxScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_min_max_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Duplicates workflow unit #\n# #\n# This workflow unit drops all duplicate rows, if it is running #\n# in the \"train\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop duplicates from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n df = df.drop_duplicates()\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop duplicates from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.drop_duplicates()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_duplicates_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Missing Workflow Unit #\n# #\n# This workflow unit allows missing rows and/or columns to be #\n# dropped from the dataset by configuring the `to_drop` #\n# parameter. #\n# #\n# Valid values for `to_drop`: #\n# - \"rows\": rows with missing values will be removed #\n# - \"columns\": columns with missing values will be removed #\n# - \"both\": rows and columns with missing values will be removed #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\n# `to_drop` can either be \"rows\" or \"columns\"\n# If it is set to \"rows\" (by default), then all rows with missing values will be dropped.\n# If it is set to \"columns\", then all columns with missing values will be dropped.\n# If it is set to \"both\", then all rows and columns with missing values will be dropped.\nto_drop = \"rows\"\n\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop missing from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n\n directions = {\n \"rows\": (\"index\",),\n \"columns\": (\"columns\",),\n \"both\": (\"index\", \"columns\"),\n }[to_drop]\n for direction in directions:\n df = df.dropna(direction)\n\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop missing from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.dropna()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_missing_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn Standard Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it a mean of 0 and #\n# a standard deviation of 1. It then saves the data for use #\n# further down the road in the workflow, for use in #\n# un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the mean and variance of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor Scaler\n scaler = sklearn.preprocessing.StandardScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_standardization_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge-regression model in Scikit-Learn. #\n# Alpha is taken from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.AdaBoostRegressor(\n n_estimators=50,\n learning_rate=1,\n loss=\"linear\",\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"adaboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"adaboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_adaboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a bagged trees regression model with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.BaggingRegressor(\n n_estimators=10,\n max_samples=1.0,\n max_features=1.0,\n bootstrap=True,\n bootstrap_features=False,\n oob_score=False,\n verbose=0,\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"bagged_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"bagged_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_bagged_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for gradient-boosted tree regression with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. Note: In the gradient- #\n# boosted trees ensemble used, the weak learners used as #\n# estimators cannot be tuned with the same level of fidelity #\n# allowed in the adaptive-boosted trees ensemble. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.GradientBoostingRegressor(\n loss=\"ls\",\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion=\"friedman_mse\",\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n max_features=None,\n alpha=0.9,\n verbose=0,\n max_leaf_nodes=None,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees regression #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBRegressor(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='reg:squarederror',\n eval_metric='rmse')\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_regression\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_regression\")\n\n # Make some predictions and unscale\n predictions = model.predict(descriptors)\n predictions = predictions.reshape(-1, 1)\n target_scaler = context.load(\"target_scaler\")\n\n predictions = target_scaler.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\")\n","name":"model_extreme_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for k-means clustering. #\n# #\n# In k-means clustering, the labels are not provided ahead of #\n# time. Instead, one supplies the number of groups the #\n# algorithm should split the dataset into. Here, we set our #\n# own default of 4 groups (fewer than sklearn's default of 8). #\n# Otherwise, the default parameters of the clustering method #\n# are the same as in sklearn. #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.cluster\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Initialize the Model\n model = sklearn.cluster.KMeans(\n n_clusters=4,\n init=\"k-means++\",\n n_init=10,\n max_iter=300,\n tol=0.0001,\n copy_x=True,\n algorithm=\"auto\",\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors)\n context.save(model, \"k_means\")\n train_labels = model.predict(train_descriptors)\n test_labels = model.predict(test_descriptors)\n\n context.save(train_labels, \"train_labels\")\n context.save(test_labels, \"test_labels\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"k_means\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_k_means_clustering_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a kernelized ridge-regression model with #\n# Scikit-Learn. Model parameters are derived from Scikit- #\n# Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.kernel_ridge\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.kernel_ridge.KernelRidge(\n alpha=1.0,\n kernel=\"linear\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"kernel_ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"kernel_ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_kernel_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a LASSO-regression model with Scikit- #\n# Learn. Model parameters derived from Scikit-Learn's #\n# Defaults. Alpha has been lowered from the default of 1.0, to #\n# 0.1. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Lasso(\n alpha=0.1,\n fit_intercept=True,\n normalize=False,\n precompute=False,\n tol=0.0001,\n positive=True,\n selection=\"cyclic\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"LASSO\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"LASSO\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_lasso_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit to train a simple feedforward neural network #\n# model on a regression problem using scikit-learn. In this #\n# template, we use the default values for hidden_layer_sizes, #\n# activation, solver, and learning rate. Other parameters are #\n# available (consult the sklearn docs), but in this case, we #\n# only include those relevant to the Adam optimizer. Sklearn #\n# Docs: Sklearn docs:http://scikit-learn.org/stable/modules/ge #\n# nerated/sklearn.neural_network.MLPRegressor.html #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.neural_network\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.neural_network.MLPRegressor(\n hidden_layer_sizes=(100,),\n activation=\"relu\",\n solver=\"adam\",\n max_iter=300,\n early_stopping=False,\n validation_fraction=0.1,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"multilayer_perceptron\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"multilayer_perceptron\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_mlp_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a random forest classification model with #\n# Scikit-Learn. Parameters derived from Scikit-Learn's #\n# defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. When #\n# the workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a filee named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestClassifier(\n n_estimators=100,\n criterion=\"gini\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n oob_score=False,\n verbose=0,\n class_weight=None,\n ccp_alpha=0.0,\n max_samples=None,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target, test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for a gradient boosted classification model with #\n# Scikit-Learn. Parameters derived from sklearn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = sklearn.ensemble.GradientBoostingClassifier(loss='deviance',\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion='friedman_mse',\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n min_impurity_split=None,\n init=None,\n random_state=None,\n max_features=None,\n verbose=0,\n max_leaf_nodes=None,\n warm_start=False,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees classification #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBClassifier(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='binary:logistic',\n eval_metric='logloss',\n use_label_encoder=False)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_extreme_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow for a random forest regression model with Scikit- #\n# Learn. Parameters are derived from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestRegressor(\n n_estimators=100,\n criterion=\"mse\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n max_samples=None,\n oob_score=False,\n ccp_alpha=0.0,\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge regression model with Scikit- #\n# Learn. Alpha is taken from Scikit-Learn's default #\n# parameters. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Ridge(\n alpha=1.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Parity plot generation unit #\n# #\n# This unit generates a parity plot based on the known values #\n# in the training data, and the predicted values generated #\n# using the training data. #\n# #\n# Because this metric compares predictions versus a ground truth, #\n# it doesn't make sense to generate the plot when a predict #\n# workflow is being run (because in that case, we generally don't #\n# know the ground truth for the values being predicted). Hence, #\n# this unit does nothing if the workflow is in \"predict\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_predictions = context.load(\"train_predictions\")\n test_target = context.load(\"test_target\")\n test_predictions = context.load(\"test_predictions\")\n\n # Un-transform the data\n target_scaler = context.load(\"target_scaler\")\n train_target = target_scaler.inverse_transform(train_target)\n train_predictions = target_scaler.inverse_transform(train_predictions)\n test_target = target_scaler.inverse_transform(test_target)\n test_predictions = target_scaler.inverse_transform(test_predictions)\n\n # Plot the data\n plt.scatter(train_target, train_predictions, c=\"#203d78\", label=\"Training Set\")\n if settings.is_using_train_test_split:\n plt.scatter(test_target, test_predictions, c=\"#67ac5b\", label=\"Testing Set\")\n plt.xlabel(\"Actual Value\")\n plt.ylabel(\"Predicted Value\")\n\n # Scale the plot\n target_range = (min(min(train_target), min(test_target)),\n max(max(train_target), max(test_target)))\n predictions_range = (min(min(train_predictions), min(test_predictions)),\n max(max(train_predictions), max(test_predictions)))\n\n limits = (min(min(target_range), min(target_range)),\n max(max(predictions_range), max(predictions_range)))\n plt.xlim = (limits[0], limits[1])\n plt.ylim = (limits[0], limits[1])\n\n # Draw a parity line, as a guide to the eye\n plt.plot((limits[0], limits[1]), (limits[0], limits[1]), c=\"black\", linestyle=\"dotted\", label=\"Parity\")\n plt.legend()\n\n # Save the figure\n plt.tight_layout()\n plt.savefig(\"my_parity_plot.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_parity_plot_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Cluster Visualization #\n# #\n# This unit takes an N-dimensional feature space, and uses #\n# Principal-component Analysis (PCA) to project into a 2D space #\n# to facilitate plotting on a scatter plot. #\n# #\n# The 2D space we project into are the first two principal #\n# components identified in PCA, which are the two vectors with #\n# the highest variance. #\n# #\n# Wikipedia Article on PCA: #\n# https://en.wikipedia.org/wiki/Principal_component_analysis #\n# #\n# We then plot the labels assigned to the train an test set, #\n# and color by class. #\n# #\n# ----------------------------------------------------------------- #\n\nimport pandas as pd\nimport matplotlib.cm\nimport matplotlib.lines\nimport matplotlib.pyplot as plt\nimport sklearn.decomposition\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_labels = context.load(\"train_labels\")\n train_descriptors = context.load(\"train_descriptors\")\n test_labels = context.load(\"test_labels\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Unscale the descriptors\n descriptor_scaler = context.load(\"descriptor_scaler\")\n train_descriptors = descriptor_scaler.inverse_transform(train_descriptors)\n test_descriptors = descriptor_scaler.inverse_transform(test_descriptors)\n\n # We need at least 2 dimensions, exit if the dataset is 1D\n if train_descriptors.ndim < 2:\n raise ValueError(\"The train descriptors do not have enough dimensions to be plot in 2D\")\n\n # The data could be multidimensional. Let's do some PCA to get things into 2 dimensions.\n pca = sklearn.decomposition.PCA(n_components=2)\n train_descriptors = pca.fit_transform(train_descriptors)\n test_descriptors = pca.transform(test_descriptors)\n xlabel = \"Principle Component 1\"\n ylabel = \"Principle Component 2\"\n\n # Determine the labels we're going to be using, and generate their colors\n labels = set(train_labels)\n colors = {}\n for count, label in enumerate(labels):\n cm = matplotlib.cm.get_cmap('jet', len(labels))\n color = cm(count / len(labels))\n colors[label] = color\n train_colors = [colors[label] for label in train_labels]\n test_colors = [colors[label] for label in test_labels]\n\n # Train / Test Split Visualization\n plt.title(\"Train Test Split Visualization\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=\"#33548c\", marker=\"o\", label=\"Training Set\")\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=\"#F0B332\", marker=\"o\", label=\"Testing Set\")\n xmin, xmax, ymin, ymax = plt.axis()\n plt.legend()\n plt.tight_layout()\n plt.savefig(\"train_test_split.png\", dpi=600)\n plt.close()\n\n def clusters_legend(cluster_colors):\n \"\"\"\n Helper function that creates a legend, given the coloration by clusters.\n Args:\n cluster_colors: A dictionary of the form {cluster_number : color_value}\n\n Returns:\n None; just creates the legend and puts it on the plot\n \"\"\"\n legend_symbols = []\n for group, color in cluster_colors.items():\n label = f\"Cluster {group}\"\n legend_symbols.append(matplotlib.lines.Line2D([], [], color=color, marker=\"o\",\n linewidth=0, label=label))\n plt.legend(handles=legend_symbols)\n\n # Training Set Clusters\n plt.title(\"Training Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=train_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"train_clusters.png\", dpi=600)\n plt.close()\n\n # Testing Set Clusters\n plt.title(\"Testing Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=test_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"test_clusters.png\", dpi=600)\n plt.close()\n\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_pca_2d_clusters_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# ROC Curve Generator #\n# #\n# Computes and displays the Receiver Operating Characteristic #\n# (ROC) curve. This is restricted to binary classification tasks. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\nimport matplotlib.collections\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n test_target = context.load(\"test_target\").flatten()\n # Slice the first column because Sklearn's ROC curve prefers probabilities for the positive class\n test_probabilities = context.load(\"test_probabilities\")[:, 1]\n\n # Exit if there's more than one label in the predictions\n if len(set(test_target)) > 2:\n exit()\n\n # ROC curve function in sklearn prefers the positive class\n false_positive_rate, true_positive_rate, thresholds = sklearn.metrics.roc_curve(test_target, test_probabilities,\n pos_label=1)\n thresholds[0] -= 1 # Sklearn arbitrarily adds 1 to the first threshold\n roc_auc = np.round(sklearn.metrics.auc(false_positive_rate, true_positive_rate), 3)\n\n # Plot the curve\n fig, ax = plt.subplots()\n points = np.array([false_positive_rate, true_positive_rate]).T.reshape(-1, 1, 2)\n segments = np.concatenate([points[:-1], points[1:]], axis=1)\n norm = plt.Normalize(thresholds.min(), thresholds.max())\n lc = matplotlib.collections.LineCollection(segments, cmap='jet', norm=norm, linewidths=2)\n lc.set_array(thresholds)\n line = ax.add_collection(lc)\n fig.colorbar(line, ax=ax).set_label('Threshold')\n\n # Padding to ensure we see the line\n ax.margins(0.01)\n\n plt.title(f\"ROC curve, AUC={roc_auc}\")\n plt.xlabel(\"False Positive Rate\")\n plt.ylabel(\"True Positive Rate\")\n plt.tight_layout()\n plt.savefig(\"my_roc_curve.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_roc_curve_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"#!/bin/bash\n# ---------------------------------------------------------------- #\n# #\n# Example shell script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. shebang line is read from the first line above #\n# 2. based on shebang one of the shell types is selected: #\n# - /bin/bash #\n# - /bin/csh #\n# - /bin/tclsh #\n# - /bin/tcsh #\n# - /bin/zsh #\n# 3. runtime directory for this calculation is created #\n# 4. the content of the script is executed #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\necho \"Hello world!\"\n","name":"hello_world.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ---------------------------------------------------------------- #\n# #\n# Example job submission script for Exabyte.io platform #\n# #\n# Shows resource manager directives for: #\n# #\n# 1. the name of the job (-N) #\n# 2. the number of nodes to be used (-l nodes=) #\n# 3. the number of processors per node (-l ppn=) #\n# 4. the walltime in dd:hh:mm:ss format (-l walltime=) #\n# 5. queue (-q) D, OR, OF, SR, SF #\n# 6. merging standard output and error (-j oe) #\n# 7. email about job abort, begin, end (-m abe) #\n# 8. email address to use (-M) #\n# #\n# For more information visit https://docs.exabyte.io/cli/jobs #\n# ---------------------------------------------------------------- #\n\n#PBS -N ESPRESSO-TEST\n#PBS -j oe\n#PBS -l nodes=1\n#PBS -l ppn=1\n#PBS -l walltime=00:00:10:00\n#PBS -q D\n#PBS -m abe\n#PBS -M info@exabyte.io\n\n# load module\nmodule add espresso/540-i-174-impi-044\n\n# go to the job working directory\ncd $PBS_O_WORKDIR\n\n# create input file\ncat > pw.in < pw.out\n","name":"job_espresso_pw_scf.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\nmkdir -p {{ JOB_SCRATCH_DIR }}/outdir/_ph0\ncd {{ JOB_SCRATCH_DIR }}/outdir\ncp -r {{ JOB_WORK_DIR }}/../outdir/__prefix__.* .\n{%- endraw -%}\n","name":"espresso_link_outdir_save.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\ncp {{ JOB_SCRATCH_DIR }}/outdir/_ph0/__prefix__.phsave/dynmat* {{ JOB_WORK_DIR }}/../outdir/_ph0/__prefix__.phsave\n{%- endraw -%}\n","name":"espresso_collect_dynmat.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ------------------------------------------------------------------ #\n# This script prepares necessary directories to run VASP NEB\n# calculation. It puts initial POSCAR into directory 00, final into 0N\n# and intermediate images in 01 to 0(N-1). It is assumed that SCF\n# calculations for initial and final structures are already done in\n# previous subworkflows and their standard outputs are written into\n# \"vasp_neb_initial.out\" and \"vasp_neb_final.out\" files respectively.\n# These outputs are here copied into initial (00) and final (0N)\n# directories to calculate the reaction energy profile.\n# ------------------------------------------------------------------ #\n\n{% raw -%}\ncd {{ JOB_WORK_DIR }}\n{%- endraw %}\n\n# Prepare First Directory\nmkdir -p 00\ncat > 00/POSCAR < 0{{ input.INTERMEDIATE_IMAGES.length + 1 }}/POSCAR < 0{{ loop.index }}/POSCAR < Date: Sat, 6 Jan 2024 12:28:44 +0800 Subject: [PATCH 02/20] SOF-7194: add tests --- tests/test.js | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test.js b/tests/test.js index 8d059fa8..69b95321 100644 --- a/tests/test.js +++ b/tests/test.js @@ -81,6 +81,18 @@ describe("assets for all executables", () => { templates = getAllAppTemplates(); }); + it("exists at least 1 asset for each tree entry for deepmd tree", () => { + const tree = APP_TREE.deepmd; + + Object.keys(tree).forEach((treeItemName) => { + const treeItemTemplates = templates.filter( + (template) => template.executableName === treeItemName, + ); + + assert(treeItemTemplates.length > 0); + }); + }); + it("exists at least 1 asset for each tree entry for espresso tree", () => { const tree = APP_TREE.espresso; From f017cc250d456a7c904aae5d5bb89195812e1d5c Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Sat, 6 Jan 2024 13:52:49 +0800 Subject: [PATCH 03/20] SOF-7194: include deepmd in the applications data --- applications/application_data.yml | 1 + applications/deepmd.yml | 7 +++++++ src/js/data/application_data.js | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 applications/deepmd.yml diff --git a/applications/application_data.yml b/applications/application_data.yml index b43c59bf..9dbed3f0 100644 --- a/applications/application_data.yml +++ b/applications/application_data.yml @@ -1,3 +1,4 @@ +deepmd: !include 'applications/deepmd.yml' espresso: !include 'applications/espresso.yml' exabyteml: !include 'applications/exabyteml.yml' jupyterLab: !include 'applications/jupyterLab.yml' diff --git a/applications/deepmd.yml b/applications/deepmd.yml new file mode 100644 index 00000000..d6b058c8 --- /dev/null +++ b/applications/deepmd.yml @@ -0,0 +1,7 @@ +name: deepmd +shortName: deepmd +summary: DeePMD +defaultVersion: '2.0.2' +versions: + - version: '2.0.2' + isDefault: true diff --git a/src/js/data/application_data.js b/src/js/data/application_data.js index e41042a0..c44a7d09 100644 --- a/src/js/data/application_data.js +++ b/src/js/data/application_data.js @@ -1,2 +1,2 @@ /* eslint-disable */ -module.exports = {applicationData: {"espresso":{"name":"espresso","shortName":"qe","summary":"Quantum Espresso","defaultVersion":"6.3","versions":[{"version":"5.2.1","isDefault":true,"hasAdvancedComputeOptions":true},{"version":"5.2.1","isDefault":false,"build":"GNU","hasAdvancedComputeOptions":true},{"version":"5.2.1","isDefault":false,"build":"Intel","hasAdvancedComputeOptions":true},{"version":"5.4.0","isDefault":true,"hasAdvancedComputeOptions":true},{"version":"5.4.0","isDefault":false,"build":"GNU","hasAdvancedComputeOptions":true},{"version":"5.4.0","isDefault":false,"build":"Intel","hasAdvancedComputeOptions":true},{"version":"6.0.0","isDefault":true,"hasAdvancedComputeOptions":true},{"version":"6.0.0","isDefault":false,"build":"GNU","hasAdvancedComputeOptions":true},{"version":"6.0.0","isDefault":false,"build":"Intel","hasAdvancedComputeOptions":true},{"version":"6.3","isDefault":true,"hasAdvancedComputeOptions":true},{"version":"6.3","isDefault":false,"build":"GNU","hasAdvancedComputeOptions":true},{"version":"6.3","isDefault":false,"build":"Intel","hasAdvancedComputeOptions":true},{"version":"6.4.1","isDefault":true,"hasAdvancedComputeOptions":true},{"version":"6.5.0","isDefault":true,"hasAdvancedComputeOptions":true},{"version":"6.6.0","isDefault":true,"hasAdvancedComputeOptions":true},{"version":"6.7.0","isDefault":true,"hasAdvancedComputeOptions":true},{"version":"6.8.0","isDefault":true,"hasAdvancedComputeOptions":true},{"version":"7.0","isDefault":true,"hasAdvancedComputeOptions":true},{"version":"7.0","isDefault":false,"build":"GNU","hasAdvancedComputeOptions":true},{"version":"7.0","isDefault":false,"build":"Intel","hasAdvancedComputeOptions":true},{"version":"7.2","isDefault":true,"hasAdvancedComputeOptions":true},{"version":"7.2","isDefault":false,"build":"Intel","hasAdvancedComputeOptions":true},{"version":"7.2","isDefault":false,"build":"GNU","hasAdvancedComputeOptions":true}]},"exabyteml":{"name":"exabyteml","shortName":"ml","summary":"Exabyte Machine Learning Engine","defaultVersion":"0.2.0","versions":[{"version":"0.2.0","isDefault":true}]},"jupyterLab":{"name":"jupyterLab","shortName":"jl","summary":"JupyterLab","defaultVersion":"3.0.3","versions":[{"version":"3.0.3","isDefault":true},{"version":"3.0.3","isDefault":false,"build":"with-pre-installed-packages"}]},"nwchem":{"name":"nwchem","shortName":"nwchem","summary":"NWChem","defaultVersion":"7.0.2","versions":[{"version":"7.0.2","isDefault":true},{"version":"6.6"}]},"python":{"name":"python","shortName":"py","summary":"Python Script","defaultVersion":"3.8.6","versions":[{"version":"3.6.12"},{"version":"3.7.9"},{"version":"3.8.6","isDefault":true},{"version":"3.9.1"},{"version":"anaconda3-5.2.0"}]},"shell":{"name":"shell","shortName":"sh","summary":"Shell Script","defaultVersion":"4.2.46","versions":[{"version":"4.2.46","isDefault":true}]},"vasp":{"name":"vasp","shortName":"vasp","summary":"Vienna Ab-initio Simulation Package","defaultVersion":"5.3.5","isLicensed":true,"versions":[{"version":"5.3.5","isDefault":true},{"version":"5.3.5","isDefault":false,"build":"Non-collinear"},{"version":"5.3.5","isDefault":false,"build":"VTST"},{"version":"5.4.4","isDefault":true},{"version":"5.4.4","isDefault":false,"build":"Gamma"},{"version":"5.4.4","isDefault":false,"build":"Non-collinear"},{"version":"5.4.4","isDefault":false,"build":"VTST"},{"version":"5.4.4","isDefault":false,"build":"VTST-Gamma"},{"version":"5.4.4","isDefault":false,"build":"VTST-Non-collinear"}]}}} +module.exports = {applicationData: {"deepmd":{"name":"deepmd","shortName":"deepmd","summary":"DeePMD","defaultVersion":"2.0.2","versions":[{"version":"2.0.2","isDefault":true}]},"espresso":{"name":"espresso","shortName":"qe","summary":"Quantum Espresso","defaultVersion":"6.3","versions":[{"version":"5.2.1","isDefault":true,"hasAdvancedComputeOptions":true},{"version":"5.2.1","isDefault":false,"build":"GNU","hasAdvancedComputeOptions":true},{"version":"5.2.1","isDefault":false,"build":"Intel","hasAdvancedComputeOptions":true},{"version":"5.4.0","isDefault":true,"hasAdvancedComputeOptions":true},{"version":"5.4.0","isDefault":false,"build":"GNU","hasAdvancedComputeOptions":true},{"version":"5.4.0","isDefault":false,"build":"Intel","hasAdvancedComputeOptions":true},{"version":"6.0.0","isDefault":true,"hasAdvancedComputeOptions":true},{"version":"6.0.0","isDefault":false,"build":"GNU","hasAdvancedComputeOptions":true},{"version":"6.0.0","isDefault":false,"build":"Intel","hasAdvancedComputeOptions":true},{"version":"6.3","isDefault":true,"hasAdvancedComputeOptions":true},{"version":"6.3","isDefault":false,"build":"GNU","hasAdvancedComputeOptions":true},{"version":"6.3","isDefault":false,"build":"Intel","hasAdvancedComputeOptions":true},{"version":"6.4.1","isDefault":true,"hasAdvancedComputeOptions":true},{"version":"6.5.0","isDefault":true,"hasAdvancedComputeOptions":true},{"version":"6.6.0","isDefault":true,"hasAdvancedComputeOptions":true},{"version":"6.7.0","isDefault":true,"hasAdvancedComputeOptions":true},{"version":"6.8.0","isDefault":true,"hasAdvancedComputeOptions":true},{"version":"7.0","isDefault":true,"hasAdvancedComputeOptions":true},{"version":"7.0","isDefault":false,"build":"GNU","hasAdvancedComputeOptions":true},{"version":"7.0","isDefault":false,"build":"Intel","hasAdvancedComputeOptions":true},{"version":"7.2","isDefault":true,"hasAdvancedComputeOptions":true},{"version":"7.2","isDefault":false,"build":"Intel","hasAdvancedComputeOptions":true},{"version":"7.2","isDefault":false,"build":"GNU","hasAdvancedComputeOptions":true}]},"exabyteml":{"name":"exabyteml","shortName":"ml","summary":"Exabyte Machine Learning Engine","defaultVersion":"0.2.0","versions":[{"version":"0.2.0","isDefault":true}]},"jupyterLab":{"name":"jupyterLab","shortName":"jl","summary":"JupyterLab","defaultVersion":"3.0.3","versions":[{"version":"3.0.3","isDefault":true},{"version":"3.0.3","isDefault":false,"build":"with-pre-installed-packages"}]},"nwchem":{"name":"nwchem","shortName":"nwchem","summary":"NWChem","defaultVersion":"7.0.2","versions":[{"version":"7.0.2","isDefault":true},{"version":"6.6"}]},"python":{"name":"python","shortName":"py","summary":"Python Script","defaultVersion":"3.8.6","versions":[{"version":"3.6.12"},{"version":"3.7.9"},{"version":"3.8.6","isDefault":true},{"version":"3.9.1"},{"version":"anaconda3-5.2.0"}]},"shell":{"name":"shell","shortName":"sh","summary":"Shell Script","defaultVersion":"4.2.46","versions":[{"version":"4.2.46","isDefault":true}]},"vasp":{"name":"vasp","shortName":"vasp","summary":"Vienna Ab-initio Simulation Package","defaultVersion":"5.3.5","isLicensed":true,"versions":[{"version":"5.3.5","isDefault":true},{"version":"5.3.5","isDefault":false,"build":"Non-collinear"},{"version":"5.3.5","isDefault":false,"build":"VTST"},{"version":"5.4.4","isDefault":true},{"version":"5.4.4","isDefault":false,"build":"Gamma"},{"version":"5.4.4","isDefault":false,"build":"Non-collinear"},{"version":"5.4.4","isDefault":false,"build":"VTST"},{"version":"5.4.4","isDefault":false,"build":"VTST-Gamma"},{"version":"5.4.4","isDefault":false,"build":"VTST-Non-collinear"}]}}} From 9c09052fdb8f2845a2dde3d1dfed690a5d6e8c3d Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Sat, 6 Jan 2024 16:51:13 +0800 Subject: [PATCH 04/20] SOF-7194: set default executable for deepmd --- executables/deepmd/{dp_prepare.py.yml => dp_prepare.yml} | 1 + executables/deepmd/dp_train.yml | 1 + executables/tree.yml | 2 +- src/js/data/tree.js | 2 +- 4 files changed, 4 insertions(+), 2 deletions(-) rename executables/deepmd/{dp_prepare.py.yml => dp_prepare.yml} (94%) diff --git a/executables/deepmd/dp_prepare.py.yml b/executables/deepmd/dp_prepare.yml similarity index 94% rename from executables/deepmd/dp_prepare.py.yml rename to executables/deepmd/dp_prepare.yml index becb5ca5..93bec0f0 100644 --- a/executables/deepmd/dp_prepare.py.yml +++ b/executables/deepmd/dp_prepare.yml @@ -1,3 +1,4 @@ +isDefault: true monitors: - standard_output results: [] diff --git a/executables/deepmd/dp_train.yml b/executables/deepmd/dp_train.yml index 79a646ec..21e40efd 100644 --- a/executables/deepmd/dp_train.yml +++ b/executables/deepmd/dp_train.yml @@ -1,3 +1,4 @@ +isDefault: false monitors: - standard_output results: [] diff --git a/executables/tree.yml b/executables/tree.yml index 145d71bd..66f8f5df 100644 --- a/executables/tree.yml +++ b/executables/tree.yml @@ -1,5 +1,5 @@ deepmd: - dp_prepare: !include 'executables/deepmd/dp_prepare.py.yml' + dp_prepare: !include 'executables/deepmd/dp_prepare.yml' dp_train: !include 'executables/deepmd/dp_train.yml' espresso: average.x: !include 'executables/espresso/average.x.yml' diff --git a/src/js/data/tree.js b/src/js/data/tree.js index 60acec8b..06621153 100644 --- a/src/js/data/tree.js +++ b/src/js/data/tree.js @@ -1,2 +1,2 @@ /* eslint-disable */ -module.exports = {applicationTree: {"deepmd":{"dp_prepare":{"monitors":["standard_output"],"results":[],"flavors":{"qe_cp_to_deepmd":{"input":[{"name":"qe_cp_to_deepmd.py"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"dp_prepare"}}},"dp_train":{"monitors":["standard_output"],"results":[],"flavors":{"dp_train_se_e2_r":{"input":[{"name":"dp_train_se_e2_r.josn"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"dp_train"}}}},"espresso":{"average.x":{"monitors":["standard_output"],"results":["average_potential_profile"],"flavors":{"average":{"input":[{"name":"average.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"average.x"},"average_potential":{"input":[{"name":"average.in"}],"results":["average_potential_profile"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"average.x"}}},"bands.x":{"monitors":["standard_output"],"results":["band_structure"],"flavors":{"bands":{"input":[{"name":"bands.in"}],"results":["band_structure"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"bands.x"}}},"dos.x":{"monitors":["standard_output"],"results":["density_of_states"],"flavors":{"dos":{"input":[{"name":"dos.in"}],"results":["density_of_states"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"dos.x"}}},"dynmat.x":{"monitors":["standard_output"],"results":[],"flavors":{"dynmat":{"input":[{"name":"dynmat_grid.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"dynmat.x"}}},"gw.x":{"monitors":["standard_output"],"results":["band_structure","fermi_energy","band_gaps"],"flavors":{"gw_bands_plasmon_pole":{"input":[{"name":"gw_bands_plasmon_pole.in"}],"results":["band_structure","fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"gw.x"},"gw_bands_full_frequency":{"input":[{"name":"gw_bands_full_frequency.in"}],"results":["band_structure","fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"gw.x"}}},"matdyn.x":{"monitors":["standard_output"],"results":["phonon_dos","phonon_dispersions"],"flavors":{"matdyn_grid":{"input":[{"name":"matdyn_grid.in"}],"monitors":["standard_output"],"results":["phonon_dos"],"applicationName":"espresso","executableName":"matdyn.x"},"matdyn_path":{"input":[{"name":"matdyn_path.in"}],"monitors":["standard_output"],"results":["phonon_dispersions"],"applicationName":"espresso","executableName":"matdyn.x"}}},"neb.x":{"monitors":["standard_output"],"results":["reaction_energy_barrier","reaction_energy_profile"],"flavors":{"neb":{"isMultiMaterial":true,"input":[{"name":"neb.in"}],"results":["reaction_energy_barrier","reaction_energy_profile"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"neb.x"}}},"ph.x":{"monitors":["standard_output"],"results":["phonon_dos","phonon_dispersions","zero_point_energy"],"flavors":{"ph_path":{"input":[{"name":"ph_path.in"}],"results":["phonon_dispersions"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_grid":{"input":[{"name":"ph_grid.in"}],"results":["phonon_dos"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_gamma":{"input":[{"name":"ph_gamma.in"}],"results":["zero_point_energy"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_init_qpoints":{"input":[{"name":"ph_init_qpoints.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_grid_restart":{"input":[{"name":"ph_grid_restart.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_single_irr_qpt":{"input":[{"name":"ph_single_irr_qpt.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"}}},"pp.x":{"monitors":["standard_output"],"results":[],"flavors":{"pp_density":{"input":[{"name":"pp_density.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pp.x"},"pp_electrostatic_potential":{"input":[{"name":"pp_electrostatic_potential.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pp.x"}}},"projwfc.x":{"monitors":["standard_output"],"results":["density_of_states"],"flavors":{"projwfc":{"input":[{"name":"projwfc.in"}],"results":["density_of_states"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"projwfc.x"}}},"pw.x":{"isDefault":true,"hasAdvancedComputeOptions":true,"postProcessors":["remove_non_zero_weight_kpoints"],"monitors":["standard_output","convergence_ionic","convergence_electronic"],"results":["atomic_forces","band_gaps","charge_density_profile","density_of_states","fermi_energy","final_structure","magnetic_moments","potential_profile","pressure","reaction_energy_barrier","reaction_energy_profile","stress_tensor","total_energy","total_energy_contributions","total_force"],"flavors":{"pw_scf":{"isDefault":true,"input":[{"name":"pw_scf.in"}],"results":["atomic_forces","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_bands_hse":{"input":[{"name":"pw_scf_bands_hse.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_hse":{"input":[{"name":"pw_scf_hse.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_dft_u":{"input":[{"name":"pw_scf_dft_u.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u+v":{"input":[{"name":"pw_scf_dft_v.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u+j":{"input":[{"name":"pw_scf_dft_j.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u_legacy":{"input":[{"name":"pw_scf_dft_u_legacy.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["5.2.1","5.4.0","6.0.0","6.3","6.4.1","6.5.0","6.6.0","6.7.0","6.8.0","7.0"]},"pw_esm":{"input":[{"name":"pw_esm.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor","potential_profile","charge_density_profile"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_esm_relax":{"input":[{"name":"pw_esm_relax.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor","potential_profile","charge_density_profile"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_nscf":{"input":[{"name":"pw_nscf.in"}],"results":["fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pw.x"},"pw_bands":{"input":[{"name":"pw_bands.in"}],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pw.x"},"pw_relax":{"input":[{"name":"pw_relax.in"}],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor","final_structure"],"applicationName":"espresso","executableName":"pw.x"},"pw_vc-relax":{"input":[{"name":"pw_vc_relax.in"}],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor","final_structure"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_kpt_conv":{"input":[{"name":"pw_scf_kpt_conv.in"}],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"}}},"q2r.x":{"monitors":["standard_output"],"results":[],"flavors":{"q2r":{"input":[{"name":"q2r.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"q2r.x"}}},"hp.x":{"monitors":["standard_output"],"results":["hubbard_u","hubbard_v_nn","hubbard_v"],"flavors":{"hp":{"input":[{"name":"hp.in"}],"results":["hubbard_u","hubbard_v_nn","hubbard_v"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"hp.x"}},"supportedApplicationVersions":["7.0","7.2"]},"epsilon.x":{"monitors":["standard_output"],"results":["dielectric_tensor"],"flavors":{"dielectric_tensor":{"input":[{"name":"epsilon.in"}],"results":["dielectric_tensor"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"epsilon.x"}}}},"jupyterLab":{"jupyter":{"isDefault":true,"monitors":["standard_output","jupyter_notebook_endpoint"],"results":[],"flavors":{"notebook":{"isDefault":true,"input":[{"name":"requirements.txt","templateName":"requirements.txt"}],"monitors":["standard_output","jupyter_notebook_endpoint"],"applicationName":"jupyterLab","executableName":"jupyter"}}}},"exabyteml":{"score":{"isDefault":false,"monitors":["standard_output"],"results":["predicted_properties"],"flavors":{"score":{"isDefault":true,"input":[],"monitors":["standard_output"],"applicationName":"exabyteml","executableName":"score"}}},"train":{"isDefault":true,"monitors":["standard_output"],"results":["workflow:ml_predict"],"flavors":{"train":{"isDefault":true,"input":[],"monitors":["standard_output"],"applicationName":"exabyteml","executableName":"train"}}}},"nwchem":{"nwchem":{"isDefault":true,"hasAdvancedComputeOptions":false,"postProcessors":["error_handler"],"monitors":["standard_output"],"results":["total_energy","total_energy_contributions"],"flavors":{"nwchem_total_energy":{"isDefault":true,"input":[{"name":"nwchem_total_energy.inp"}],"results":["total_energy","total_energy_contributions"],"monitors":["standard_output"],"applicationName":"nwchem","executableName":"nwchem"}}}},"python":{"python":{"isDefault":true,"monitors":["standard_output"],"results":["file_content","workflow:pyml_predict"],"flavors":{"hello_world":{"isDefault":true,"input":[{"name":"script.py","templateName":"hello_world.py"},{"name":"requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"espresso_xml_get_qpt_irr":{"input":[{"name":"espresso_xml_get_qpt_irr.py"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"espresso_extract_kpoints":{"input":[{"name":"espresso_extract_kpoints.py"},{"name":"requirements.txt","templateName":"requirements_empty.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"generic:post_processing:plot:matplotlib":{"input":[{"name":"plot.py","templateName":"matplotlib_basic.py"},{"name":"requirements.txt","templateName":"processing_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"generic:processing:find_extrema:scipy":{"input":[{"name":"find_extrema.py","templateName":"find_extrema.py"},{"name":"requirements.txt","templateName":"processing_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:setup_variables_packages":{"input":[{"name":"settings.py","templateName":"pyml_settings.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:custom":{"input":[{"name":"pyml_custom.py","templateName":"pyml_custom.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:data_input:read_csv:pandas":{"input":[{"name":"data_input_read_csv_pandas.py","templateName":"data_input_read_csv_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:data_input:train_test_split:sklearn":{"input":[{"name":"data_input_train_test_split_sklearn.py","templateName":"data_input_train_test_split_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:min_max_scaler:sklearn":{"input":[{"name":"pre_processing_min_max_sklearn.py","templateName":"pre_processing_min_max_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:remove_duplicates:pandas":{"input":[{"name":"pre_processing_remove_duplicates_pandas.py","templateName":"pre_processing_remove_duplicates_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:remove_missing:pandas":{"input":[{"name":"pre_processing_remove_missing_pandas.py","templateName":"pre_processing_remove_missing_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:standardization:sklearn":{"input":[{"name":"pre_processing_standardization_sklearn.py","templateName":"pre_processing_standardization_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:adaboosted_trees_regression:sklearn":{"input":[{"name":"model_adaboosted_trees_regression_sklearn.py","templateName":"model_adaboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"results":["workflow:pyml_predict"],"applicationName":"python","executableName":"python"},"pyml:model:bagged_trees_regression:sklearn":{"input":[{"name":"model_bagged_trees_regression_sklearn.py","templateName":"model_bagged_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:gradboosted_trees_regression:sklearn":{"input":[{"name":"model_gradboosted_trees_regression_sklearn.py","templateName":"model_gradboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:extreme_gradboosted_trees_regression:sklearn":{"input":[{"name":"model_extreme_gradboosted_trees_regression_sklearn.py","templateName":"model_extreme_gradboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:k_means_clustering:sklearn":{"input":[{"name":"model_k_means_clustering_sklearn.py","templateName":"model_k_means_clustering_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:kernel_ridge_regression:sklearn":{"input":[{"name":"model_kernel_ridge_regression_sklearn.py","templateName":"model_kernel_ridge_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:lasso_regression:sklearn":{"input":[{"name":"model_lasso_regression_sklearn.py","templateName":"model_lasso_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:multilayer_perceptron:sklearn":{"input":[{"name":"model_mlp_sklearn.py","templateName":"model_mlp_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:random_forest_classification:sklearn":{"input":[{"name":"model_random_forest_classification_sklearn.py","templateName":"model_random_forest_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:gradboosted_trees_classification:sklearn":{"input":[{"name":"model_gradboosted_trees_classification_sklearn.py","templateName":"model_gradboosted_trees_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:extreme_gradboosted_trees_classification:sklearn":{"input":[{"name":"model_extreme_gradboosted_trees_classification_sklearn.py","templateName":"model_extreme_gradboosted_trees_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:random_forest_regression:sklearn":{"input":[{"name":"model_random_forest_regression_sklearn.py","templateName":"model_random_forest_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:ridge_regression:sklearn":{"input":[{"name":"model_ridge_regression_sklearn.py","templateName":"model_ridge_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:parity_plot:matplotlib":{"input":[{"name":"post_processing_parity_plot_matplotlib.py","templateName":"post_processing_parity_plot_matplotlib.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:pca_2d_clusters:matplotlib":{"input":[{"name":"post_processing_pca_2d_clusters_matplotlib.py","templateName":"post_processing_pca_2d_clusters_matplotlib.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:roc_curve:sklearn":{"input":[{"name":"post_processing_roc_curve_sklearn.py","templateName":"post_processing_roc_curve_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"}}}},"shell":{"sh":{"isDefault":true,"monitors":["standard_output"],"results":["atomic_forces","band_gaps","band_structure","density_of_states","fermi_energy","phonon_dispersions","phonon_dos","pressure","stress_tensor","total_energy","total_energy_contributions","total_force","zero_point_energy","final_structure","magnetic_moments","reaction_energy_barrier","reaction_energy_profile","potential_profile","charge_density_profile"],"flavors":{"hello_world":{"isDefault":true,"input":[{"name":"hello_world.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"job_espresso_pw_scf":{"input":[{"name":"job_espresso_pw_scf.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"espresso_link_outdir_save":{"input":[{"name":"espresso_link_outdir_save.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"espresso_collect_dynmat":{"input":[{"name":"espresso_collect_dynmat.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"bash_vasp_prepare_neb_images":{"isMultiMaterial":true,"input":[{"name":"bash_vasp_prepare_neb_images.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"}}}},"vasp":{"vasp":{"isDefault":true,"postProcessors":["error_handler","prepare_restart","remove_non_zero_weight_kpoints"],"monitors":["standard_output","convergence_ionic","convergence_electronic"],"results":["atomic_forces","band_gaps","band_structure","density_of_states","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force","zero_point_energy","final_structure","magnetic_moments","reaction_energy_barrier","reaction_energy_profile","potential_profile","charge_density_profile"],"flavors":{"vasp":{"isDefault":true,"input":[{"name":"INCAR"},{"name":"KPOINTS"},{"name":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_bands":{"input":[{"name":"INCAR","templateName":"INCAR_BANDS"},{"name":"KPOINTS","templateName":"KPOINTS_BANDS"},{"name":"POSCAR","templateName":""}],"results":["band_structure"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_nscf":{"input":[{"name":"INCAR","templateName":"INCAR_BANDS"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["band_gaps","fermi_energy"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_HSE"},{"name":"KPOINTS"},{"name":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_bands_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_BANDS_HSE"},{"name":"KPOINTS","templateName":"KPOINTS_BANDS"},{"name":"POSCAR","templateName":""}],"results":["band_structure"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_nscf_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_BANDS_HSE"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["band_gaps","fermi_energy"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_relax":{"input":[{"name":"INCAR","templateName":"INCAR_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","atomic_forces","fermi_energy","pressure","stress_tensor","total_force","final_structure"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"postProcessors":["prepare_restart"],"applicationName":"vasp","executableName":"vasp"},"vasp_vc_relax":{"input":[{"name":"INCAR","templateName":"INCAR_VC_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","atomic_forces","fermi_energy","pressure","stress_tensor","total_force","final_structure"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"postProcessors":["prepare_restart"],"applicationName":"vasp","executableName":"vasp"},"vasp_zpe":{"input":[{"name":"INCAR","templateName":"INCAR_ZPE"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","fermi_energy","pressure","atomic_forces","stress_tensor","total_force","zero_point_energy"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"applicationName":"vasp","executableName":"vasp"},"vasp_kpt_conv":{"input":[{"name":"INCAR","templateName":"INCAR"},{"name":"KPOINTS","templateName":"KPOINTS_CONV"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_vc_relax_conv":{"input":[{"name":"INCAR","templateName":"INCAR_VC_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS_CONV"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB"},{"name":"KPOINTS","templateName":"KPOINTS"}],"results":["reaction_energy_barrier","reaction_energy_profile"],"monitors":["standard_output"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb_initial":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB_INITIAL_FINAL"},{"name":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR_NEB_INITIAL"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb_final":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB_INITIAL_FINAL"},{"name":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR_NEB_FINAL"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"}}}}}} +module.exports = {applicationTree: {"deepmd":{"dp_prepare":{"isDefault":true,"monitors":["standard_output"],"results":[],"flavors":{"qe_cp_to_deepmd":{"input":[{"name":"qe_cp_to_deepmd.py"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"dp_prepare"}}},"dp_train":{"isDefault":false,"monitors":["standard_output"],"results":[],"flavors":{"dp_train_se_e2_r":{"input":[{"name":"dp_train_se_e2_r.josn"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"dp_train"}}}},"espresso":{"average.x":{"monitors":["standard_output"],"results":["average_potential_profile"],"flavors":{"average":{"input":[{"name":"average.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"average.x"},"average_potential":{"input":[{"name":"average.in"}],"results":["average_potential_profile"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"average.x"}}},"bands.x":{"monitors":["standard_output"],"results":["band_structure"],"flavors":{"bands":{"input":[{"name":"bands.in"}],"results":["band_structure"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"bands.x"}}},"dos.x":{"monitors":["standard_output"],"results":["density_of_states"],"flavors":{"dos":{"input":[{"name":"dos.in"}],"results":["density_of_states"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"dos.x"}}},"dynmat.x":{"monitors":["standard_output"],"results":[],"flavors":{"dynmat":{"input":[{"name":"dynmat_grid.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"dynmat.x"}}},"gw.x":{"monitors":["standard_output"],"results":["band_structure","fermi_energy","band_gaps"],"flavors":{"gw_bands_plasmon_pole":{"input":[{"name":"gw_bands_plasmon_pole.in"}],"results":["band_structure","fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"gw.x"},"gw_bands_full_frequency":{"input":[{"name":"gw_bands_full_frequency.in"}],"results":["band_structure","fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"gw.x"}}},"matdyn.x":{"monitors":["standard_output"],"results":["phonon_dos","phonon_dispersions"],"flavors":{"matdyn_grid":{"input":[{"name":"matdyn_grid.in"}],"monitors":["standard_output"],"results":["phonon_dos"],"applicationName":"espresso","executableName":"matdyn.x"},"matdyn_path":{"input":[{"name":"matdyn_path.in"}],"monitors":["standard_output"],"results":["phonon_dispersions"],"applicationName":"espresso","executableName":"matdyn.x"}}},"neb.x":{"monitors":["standard_output"],"results":["reaction_energy_barrier","reaction_energy_profile"],"flavors":{"neb":{"isMultiMaterial":true,"input":[{"name":"neb.in"}],"results":["reaction_energy_barrier","reaction_energy_profile"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"neb.x"}}},"ph.x":{"monitors":["standard_output"],"results":["phonon_dos","phonon_dispersions","zero_point_energy"],"flavors":{"ph_path":{"input":[{"name":"ph_path.in"}],"results":["phonon_dispersions"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_grid":{"input":[{"name":"ph_grid.in"}],"results":["phonon_dos"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_gamma":{"input":[{"name":"ph_gamma.in"}],"results":["zero_point_energy"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_init_qpoints":{"input":[{"name":"ph_init_qpoints.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_grid_restart":{"input":[{"name":"ph_grid_restart.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_single_irr_qpt":{"input":[{"name":"ph_single_irr_qpt.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"}}},"pp.x":{"monitors":["standard_output"],"results":[],"flavors":{"pp_density":{"input":[{"name":"pp_density.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pp.x"},"pp_electrostatic_potential":{"input":[{"name":"pp_electrostatic_potential.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pp.x"}}},"projwfc.x":{"monitors":["standard_output"],"results":["density_of_states"],"flavors":{"projwfc":{"input":[{"name":"projwfc.in"}],"results":["density_of_states"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"projwfc.x"}}},"pw.x":{"isDefault":true,"hasAdvancedComputeOptions":true,"postProcessors":["remove_non_zero_weight_kpoints"],"monitors":["standard_output","convergence_ionic","convergence_electronic"],"results":["atomic_forces","band_gaps","charge_density_profile","density_of_states","fermi_energy","final_structure","magnetic_moments","potential_profile","pressure","reaction_energy_barrier","reaction_energy_profile","stress_tensor","total_energy","total_energy_contributions","total_force"],"flavors":{"pw_scf":{"isDefault":true,"input":[{"name":"pw_scf.in"}],"results":["atomic_forces","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_bands_hse":{"input":[{"name":"pw_scf_bands_hse.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_hse":{"input":[{"name":"pw_scf_hse.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_dft_u":{"input":[{"name":"pw_scf_dft_u.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u+v":{"input":[{"name":"pw_scf_dft_v.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u+j":{"input":[{"name":"pw_scf_dft_j.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u_legacy":{"input":[{"name":"pw_scf_dft_u_legacy.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["5.2.1","5.4.0","6.0.0","6.3","6.4.1","6.5.0","6.6.0","6.7.0","6.8.0","7.0"]},"pw_esm":{"input":[{"name":"pw_esm.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor","potential_profile","charge_density_profile"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_esm_relax":{"input":[{"name":"pw_esm_relax.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor","potential_profile","charge_density_profile"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_nscf":{"input":[{"name":"pw_nscf.in"}],"results":["fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pw.x"},"pw_bands":{"input":[{"name":"pw_bands.in"}],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pw.x"},"pw_relax":{"input":[{"name":"pw_relax.in"}],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor","final_structure"],"applicationName":"espresso","executableName":"pw.x"},"pw_vc-relax":{"input":[{"name":"pw_vc_relax.in"}],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor","final_structure"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_kpt_conv":{"input":[{"name":"pw_scf_kpt_conv.in"}],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"}}},"q2r.x":{"monitors":["standard_output"],"results":[],"flavors":{"q2r":{"input":[{"name":"q2r.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"q2r.x"}}},"hp.x":{"monitors":["standard_output"],"results":["hubbard_u","hubbard_v_nn","hubbard_v"],"flavors":{"hp":{"input":[{"name":"hp.in"}],"results":["hubbard_u","hubbard_v_nn","hubbard_v"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"hp.x"}},"supportedApplicationVersions":["7.0","7.2"]},"epsilon.x":{"monitors":["standard_output"],"results":["dielectric_tensor"],"flavors":{"dielectric_tensor":{"input":[{"name":"epsilon.in"}],"results":["dielectric_tensor"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"epsilon.x"}}}},"jupyterLab":{"jupyter":{"isDefault":true,"monitors":["standard_output","jupyter_notebook_endpoint"],"results":[],"flavors":{"notebook":{"isDefault":true,"input":[{"name":"requirements.txt","templateName":"requirements.txt"}],"monitors":["standard_output","jupyter_notebook_endpoint"],"applicationName":"jupyterLab","executableName":"jupyter"}}}},"exabyteml":{"score":{"isDefault":false,"monitors":["standard_output"],"results":["predicted_properties"],"flavors":{"score":{"isDefault":true,"input":[],"monitors":["standard_output"],"applicationName":"exabyteml","executableName":"score"}}},"train":{"isDefault":true,"monitors":["standard_output"],"results":["workflow:ml_predict"],"flavors":{"train":{"isDefault":true,"input":[],"monitors":["standard_output"],"applicationName":"exabyteml","executableName":"train"}}}},"nwchem":{"nwchem":{"isDefault":true,"hasAdvancedComputeOptions":false,"postProcessors":["error_handler"],"monitors":["standard_output"],"results":["total_energy","total_energy_contributions"],"flavors":{"nwchem_total_energy":{"isDefault":true,"input":[{"name":"nwchem_total_energy.inp"}],"results":["total_energy","total_energy_contributions"],"monitors":["standard_output"],"applicationName":"nwchem","executableName":"nwchem"}}}},"python":{"python":{"isDefault":true,"monitors":["standard_output"],"results":["file_content","workflow:pyml_predict"],"flavors":{"hello_world":{"isDefault":true,"input":[{"name":"script.py","templateName":"hello_world.py"},{"name":"requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"espresso_xml_get_qpt_irr":{"input":[{"name":"espresso_xml_get_qpt_irr.py"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"espresso_extract_kpoints":{"input":[{"name":"espresso_extract_kpoints.py"},{"name":"requirements.txt","templateName":"requirements_empty.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"generic:post_processing:plot:matplotlib":{"input":[{"name":"plot.py","templateName":"matplotlib_basic.py"},{"name":"requirements.txt","templateName":"processing_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"generic:processing:find_extrema:scipy":{"input":[{"name":"find_extrema.py","templateName":"find_extrema.py"},{"name":"requirements.txt","templateName":"processing_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:setup_variables_packages":{"input":[{"name":"settings.py","templateName":"pyml_settings.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:custom":{"input":[{"name":"pyml_custom.py","templateName":"pyml_custom.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:data_input:read_csv:pandas":{"input":[{"name":"data_input_read_csv_pandas.py","templateName":"data_input_read_csv_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:data_input:train_test_split:sklearn":{"input":[{"name":"data_input_train_test_split_sklearn.py","templateName":"data_input_train_test_split_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:min_max_scaler:sklearn":{"input":[{"name":"pre_processing_min_max_sklearn.py","templateName":"pre_processing_min_max_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:remove_duplicates:pandas":{"input":[{"name":"pre_processing_remove_duplicates_pandas.py","templateName":"pre_processing_remove_duplicates_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:remove_missing:pandas":{"input":[{"name":"pre_processing_remove_missing_pandas.py","templateName":"pre_processing_remove_missing_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:standardization:sklearn":{"input":[{"name":"pre_processing_standardization_sklearn.py","templateName":"pre_processing_standardization_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:adaboosted_trees_regression:sklearn":{"input":[{"name":"model_adaboosted_trees_regression_sklearn.py","templateName":"model_adaboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"results":["workflow:pyml_predict"],"applicationName":"python","executableName":"python"},"pyml:model:bagged_trees_regression:sklearn":{"input":[{"name":"model_bagged_trees_regression_sklearn.py","templateName":"model_bagged_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:gradboosted_trees_regression:sklearn":{"input":[{"name":"model_gradboosted_trees_regression_sklearn.py","templateName":"model_gradboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:extreme_gradboosted_trees_regression:sklearn":{"input":[{"name":"model_extreme_gradboosted_trees_regression_sklearn.py","templateName":"model_extreme_gradboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:k_means_clustering:sklearn":{"input":[{"name":"model_k_means_clustering_sklearn.py","templateName":"model_k_means_clustering_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:kernel_ridge_regression:sklearn":{"input":[{"name":"model_kernel_ridge_regression_sklearn.py","templateName":"model_kernel_ridge_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:lasso_regression:sklearn":{"input":[{"name":"model_lasso_regression_sklearn.py","templateName":"model_lasso_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:multilayer_perceptron:sklearn":{"input":[{"name":"model_mlp_sklearn.py","templateName":"model_mlp_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:random_forest_classification:sklearn":{"input":[{"name":"model_random_forest_classification_sklearn.py","templateName":"model_random_forest_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:gradboosted_trees_classification:sklearn":{"input":[{"name":"model_gradboosted_trees_classification_sklearn.py","templateName":"model_gradboosted_trees_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:extreme_gradboosted_trees_classification:sklearn":{"input":[{"name":"model_extreme_gradboosted_trees_classification_sklearn.py","templateName":"model_extreme_gradboosted_trees_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:random_forest_regression:sklearn":{"input":[{"name":"model_random_forest_regression_sklearn.py","templateName":"model_random_forest_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:ridge_regression:sklearn":{"input":[{"name":"model_ridge_regression_sklearn.py","templateName":"model_ridge_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:parity_plot:matplotlib":{"input":[{"name":"post_processing_parity_plot_matplotlib.py","templateName":"post_processing_parity_plot_matplotlib.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:pca_2d_clusters:matplotlib":{"input":[{"name":"post_processing_pca_2d_clusters_matplotlib.py","templateName":"post_processing_pca_2d_clusters_matplotlib.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:roc_curve:sklearn":{"input":[{"name":"post_processing_roc_curve_sklearn.py","templateName":"post_processing_roc_curve_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"}}}},"shell":{"sh":{"isDefault":true,"monitors":["standard_output"],"results":["atomic_forces","band_gaps","band_structure","density_of_states","fermi_energy","phonon_dispersions","phonon_dos","pressure","stress_tensor","total_energy","total_energy_contributions","total_force","zero_point_energy","final_structure","magnetic_moments","reaction_energy_barrier","reaction_energy_profile","potential_profile","charge_density_profile"],"flavors":{"hello_world":{"isDefault":true,"input":[{"name":"hello_world.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"job_espresso_pw_scf":{"input":[{"name":"job_espresso_pw_scf.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"espresso_link_outdir_save":{"input":[{"name":"espresso_link_outdir_save.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"espresso_collect_dynmat":{"input":[{"name":"espresso_collect_dynmat.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"bash_vasp_prepare_neb_images":{"isMultiMaterial":true,"input":[{"name":"bash_vasp_prepare_neb_images.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"}}}},"vasp":{"vasp":{"isDefault":true,"postProcessors":["error_handler","prepare_restart","remove_non_zero_weight_kpoints"],"monitors":["standard_output","convergence_ionic","convergence_electronic"],"results":["atomic_forces","band_gaps","band_structure","density_of_states","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force","zero_point_energy","final_structure","magnetic_moments","reaction_energy_barrier","reaction_energy_profile","potential_profile","charge_density_profile"],"flavors":{"vasp":{"isDefault":true,"input":[{"name":"INCAR"},{"name":"KPOINTS"},{"name":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_bands":{"input":[{"name":"INCAR","templateName":"INCAR_BANDS"},{"name":"KPOINTS","templateName":"KPOINTS_BANDS"},{"name":"POSCAR","templateName":""}],"results":["band_structure"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_nscf":{"input":[{"name":"INCAR","templateName":"INCAR_BANDS"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["band_gaps","fermi_energy"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_HSE"},{"name":"KPOINTS"},{"name":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_bands_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_BANDS_HSE"},{"name":"KPOINTS","templateName":"KPOINTS_BANDS"},{"name":"POSCAR","templateName":""}],"results":["band_structure"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_nscf_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_BANDS_HSE"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["band_gaps","fermi_energy"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_relax":{"input":[{"name":"INCAR","templateName":"INCAR_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","atomic_forces","fermi_energy","pressure","stress_tensor","total_force","final_structure"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"postProcessors":["prepare_restart"],"applicationName":"vasp","executableName":"vasp"},"vasp_vc_relax":{"input":[{"name":"INCAR","templateName":"INCAR_VC_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","atomic_forces","fermi_energy","pressure","stress_tensor","total_force","final_structure"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"postProcessors":["prepare_restart"],"applicationName":"vasp","executableName":"vasp"},"vasp_zpe":{"input":[{"name":"INCAR","templateName":"INCAR_ZPE"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","fermi_energy","pressure","atomic_forces","stress_tensor","total_force","zero_point_energy"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"applicationName":"vasp","executableName":"vasp"},"vasp_kpt_conv":{"input":[{"name":"INCAR","templateName":"INCAR"},{"name":"KPOINTS","templateName":"KPOINTS_CONV"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_vc_relax_conv":{"input":[{"name":"INCAR","templateName":"INCAR_VC_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS_CONV"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB"},{"name":"KPOINTS","templateName":"KPOINTS"}],"results":["reaction_energy_barrier","reaction_energy_profile"],"monitors":["standard_output"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb_initial":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB_INITIAL_FINAL"},{"name":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR_NEB_INITIAL"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb_final":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB_INITIAL_FINAL"},{"name":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR_NEB_FINAL"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"}}}}}} From fea0e32951daad76409728406beba93e8c7d51b3 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Sat, 6 Jan 2024 19:27:12 +0800 Subject: [PATCH 05/20] SOF-7194: include deepmd in allApplications export --- executables/deepmd/dp_prepare.yml | 2 +- executables/deepmd/dp_train.yml | 2 +- src/js/index.js | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/executables/deepmd/dp_prepare.yml b/executables/deepmd/dp_prepare.yml index 93bec0f0..815834da 100644 --- a/executables/deepmd/dp_prepare.yml +++ b/executables/deepmd/dp_prepare.yml @@ -10,4 +10,4 @@ flavors: monitors: - standard_output applicationName: deepmd - executableName: 'dp_prepare' # python3 qe_cp_to_deepmd.py + executableName: dp_prepare # python3 qe_cp_to_deepmd.py diff --git a/executables/deepmd/dp_train.yml b/executables/deepmd/dp_train.yml index 21e40efd..e0d764d4 100644 --- a/executables/deepmd/dp_train.yml +++ b/executables/deepmd/dp_train.yml @@ -10,4 +10,4 @@ flavors: monitors: - standard_output applicationName: deepmd - executableName: 'dp_train' # dp train dp_train_se_e2_r.josn && dp freeze -o graph.pb + executableName: dp_train # dp train dp_train_se_e2_r.josn && dp freeze -o graph.pb diff --git a/src/js/index.js b/src/js/index.js index f00ab90d..4d3f25bb 100644 --- a/src/js/index.js +++ b/src/js/index.js @@ -11,6 +11,7 @@ export const allApplications = [ "espresso", "jupyterLab", "exabyteml", + "deepmd", "nwchem", "python", "shell", From e0277c7d648178a4d2b90c1af5343aa3a0a0c279 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Sat, 6 Jan 2024 22:34:26 +0800 Subject: [PATCH 06/20] SOF-7194: fix typo in template name --- executables/deepmd/dp_prepare.yml | 1 + executables/deepmd/dp_train.yml | 4 ++-- src/js/data/tree.js | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/executables/deepmd/dp_prepare.yml b/executables/deepmd/dp_prepare.yml index 815834da..0fbf91ad 100644 --- a/executables/deepmd/dp_prepare.yml +++ b/executables/deepmd/dp_prepare.yml @@ -4,6 +4,7 @@ monitors: results: [] flavors: qe_cp_to_deepmd: + isDefault: true input: - name: qe_cp_to_deepmd.py results: [] diff --git a/executables/deepmd/dp_train.yml b/executables/deepmd/dp_train.yml index e0d764d4..d2579866 100644 --- a/executables/deepmd/dp_train.yml +++ b/executables/deepmd/dp_train.yml @@ -5,9 +5,9 @@ results: [] flavors: dp_train_se_e2_r: input: - - name: dp_train_se_e2_r.josn + - name: dp_train_se_e2_r.json results: [] monitors: - standard_output applicationName: deepmd - executableName: dp_train # dp train dp_train_se_e2_r.josn && dp freeze -o graph.pb + executableName: dp_train # dp train dp_train_se_e2_r.json && dp freeze -o graph.pb diff --git a/src/js/data/tree.js b/src/js/data/tree.js index 06621153..9ead3013 100644 --- a/src/js/data/tree.js +++ b/src/js/data/tree.js @@ -1,2 +1,2 @@ /* eslint-disable */ -module.exports = {applicationTree: {"deepmd":{"dp_prepare":{"isDefault":true,"monitors":["standard_output"],"results":[],"flavors":{"qe_cp_to_deepmd":{"input":[{"name":"qe_cp_to_deepmd.py"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"dp_prepare"}}},"dp_train":{"isDefault":false,"monitors":["standard_output"],"results":[],"flavors":{"dp_train_se_e2_r":{"input":[{"name":"dp_train_se_e2_r.josn"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"dp_train"}}}},"espresso":{"average.x":{"monitors":["standard_output"],"results":["average_potential_profile"],"flavors":{"average":{"input":[{"name":"average.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"average.x"},"average_potential":{"input":[{"name":"average.in"}],"results":["average_potential_profile"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"average.x"}}},"bands.x":{"monitors":["standard_output"],"results":["band_structure"],"flavors":{"bands":{"input":[{"name":"bands.in"}],"results":["band_structure"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"bands.x"}}},"dos.x":{"monitors":["standard_output"],"results":["density_of_states"],"flavors":{"dos":{"input":[{"name":"dos.in"}],"results":["density_of_states"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"dos.x"}}},"dynmat.x":{"monitors":["standard_output"],"results":[],"flavors":{"dynmat":{"input":[{"name":"dynmat_grid.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"dynmat.x"}}},"gw.x":{"monitors":["standard_output"],"results":["band_structure","fermi_energy","band_gaps"],"flavors":{"gw_bands_plasmon_pole":{"input":[{"name":"gw_bands_plasmon_pole.in"}],"results":["band_structure","fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"gw.x"},"gw_bands_full_frequency":{"input":[{"name":"gw_bands_full_frequency.in"}],"results":["band_structure","fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"gw.x"}}},"matdyn.x":{"monitors":["standard_output"],"results":["phonon_dos","phonon_dispersions"],"flavors":{"matdyn_grid":{"input":[{"name":"matdyn_grid.in"}],"monitors":["standard_output"],"results":["phonon_dos"],"applicationName":"espresso","executableName":"matdyn.x"},"matdyn_path":{"input":[{"name":"matdyn_path.in"}],"monitors":["standard_output"],"results":["phonon_dispersions"],"applicationName":"espresso","executableName":"matdyn.x"}}},"neb.x":{"monitors":["standard_output"],"results":["reaction_energy_barrier","reaction_energy_profile"],"flavors":{"neb":{"isMultiMaterial":true,"input":[{"name":"neb.in"}],"results":["reaction_energy_barrier","reaction_energy_profile"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"neb.x"}}},"ph.x":{"monitors":["standard_output"],"results":["phonon_dos","phonon_dispersions","zero_point_energy"],"flavors":{"ph_path":{"input":[{"name":"ph_path.in"}],"results":["phonon_dispersions"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_grid":{"input":[{"name":"ph_grid.in"}],"results":["phonon_dos"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_gamma":{"input":[{"name":"ph_gamma.in"}],"results":["zero_point_energy"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_init_qpoints":{"input":[{"name":"ph_init_qpoints.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_grid_restart":{"input":[{"name":"ph_grid_restart.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_single_irr_qpt":{"input":[{"name":"ph_single_irr_qpt.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"}}},"pp.x":{"monitors":["standard_output"],"results":[],"flavors":{"pp_density":{"input":[{"name":"pp_density.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pp.x"},"pp_electrostatic_potential":{"input":[{"name":"pp_electrostatic_potential.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pp.x"}}},"projwfc.x":{"monitors":["standard_output"],"results":["density_of_states"],"flavors":{"projwfc":{"input":[{"name":"projwfc.in"}],"results":["density_of_states"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"projwfc.x"}}},"pw.x":{"isDefault":true,"hasAdvancedComputeOptions":true,"postProcessors":["remove_non_zero_weight_kpoints"],"monitors":["standard_output","convergence_ionic","convergence_electronic"],"results":["atomic_forces","band_gaps","charge_density_profile","density_of_states","fermi_energy","final_structure","magnetic_moments","potential_profile","pressure","reaction_energy_barrier","reaction_energy_profile","stress_tensor","total_energy","total_energy_contributions","total_force"],"flavors":{"pw_scf":{"isDefault":true,"input":[{"name":"pw_scf.in"}],"results":["atomic_forces","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_bands_hse":{"input":[{"name":"pw_scf_bands_hse.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_hse":{"input":[{"name":"pw_scf_hse.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_dft_u":{"input":[{"name":"pw_scf_dft_u.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u+v":{"input":[{"name":"pw_scf_dft_v.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u+j":{"input":[{"name":"pw_scf_dft_j.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u_legacy":{"input":[{"name":"pw_scf_dft_u_legacy.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["5.2.1","5.4.0","6.0.0","6.3","6.4.1","6.5.0","6.6.0","6.7.0","6.8.0","7.0"]},"pw_esm":{"input":[{"name":"pw_esm.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor","potential_profile","charge_density_profile"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_esm_relax":{"input":[{"name":"pw_esm_relax.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor","potential_profile","charge_density_profile"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_nscf":{"input":[{"name":"pw_nscf.in"}],"results":["fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pw.x"},"pw_bands":{"input":[{"name":"pw_bands.in"}],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pw.x"},"pw_relax":{"input":[{"name":"pw_relax.in"}],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor","final_structure"],"applicationName":"espresso","executableName":"pw.x"},"pw_vc-relax":{"input":[{"name":"pw_vc_relax.in"}],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor","final_structure"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_kpt_conv":{"input":[{"name":"pw_scf_kpt_conv.in"}],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"}}},"q2r.x":{"monitors":["standard_output"],"results":[],"flavors":{"q2r":{"input":[{"name":"q2r.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"q2r.x"}}},"hp.x":{"monitors":["standard_output"],"results":["hubbard_u","hubbard_v_nn","hubbard_v"],"flavors":{"hp":{"input":[{"name":"hp.in"}],"results":["hubbard_u","hubbard_v_nn","hubbard_v"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"hp.x"}},"supportedApplicationVersions":["7.0","7.2"]},"epsilon.x":{"monitors":["standard_output"],"results":["dielectric_tensor"],"flavors":{"dielectric_tensor":{"input":[{"name":"epsilon.in"}],"results":["dielectric_tensor"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"epsilon.x"}}}},"jupyterLab":{"jupyter":{"isDefault":true,"monitors":["standard_output","jupyter_notebook_endpoint"],"results":[],"flavors":{"notebook":{"isDefault":true,"input":[{"name":"requirements.txt","templateName":"requirements.txt"}],"monitors":["standard_output","jupyter_notebook_endpoint"],"applicationName":"jupyterLab","executableName":"jupyter"}}}},"exabyteml":{"score":{"isDefault":false,"monitors":["standard_output"],"results":["predicted_properties"],"flavors":{"score":{"isDefault":true,"input":[],"monitors":["standard_output"],"applicationName":"exabyteml","executableName":"score"}}},"train":{"isDefault":true,"monitors":["standard_output"],"results":["workflow:ml_predict"],"flavors":{"train":{"isDefault":true,"input":[],"monitors":["standard_output"],"applicationName":"exabyteml","executableName":"train"}}}},"nwchem":{"nwchem":{"isDefault":true,"hasAdvancedComputeOptions":false,"postProcessors":["error_handler"],"monitors":["standard_output"],"results":["total_energy","total_energy_contributions"],"flavors":{"nwchem_total_energy":{"isDefault":true,"input":[{"name":"nwchem_total_energy.inp"}],"results":["total_energy","total_energy_contributions"],"monitors":["standard_output"],"applicationName":"nwchem","executableName":"nwchem"}}}},"python":{"python":{"isDefault":true,"monitors":["standard_output"],"results":["file_content","workflow:pyml_predict"],"flavors":{"hello_world":{"isDefault":true,"input":[{"name":"script.py","templateName":"hello_world.py"},{"name":"requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"espresso_xml_get_qpt_irr":{"input":[{"name":"espresso_xml_get_qpt_irr.py"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"espresso_extract_kpoints":{"input":[{"name":"espresso_extract_kpoints.py"},{"name":"requirements.txt","templateName":"requirements_empty.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"generic:post_processing:plot:matplotlib":{"input":[{"name":"plot.py","templateName":"matplotlib_basic.py"},{"name":"requirements.txt","templateName":"processing_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"generic:processing:find_extrema:scipy":{"input":[{"name":"find_extrema.py","templateName":"find_extrema.py"},{"name":"requirements.txt","templateName":"processing_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:setup_variables_packages":{"input":[{"name":"settings.py","templateName":"pyml_settings.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:custom":{"input":[{"name":"pyml_custom.py","templateName":"pyml_custom.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:data_input:read_csv:pandas":{"input":[{"name":"data_input_read_csv_pandas.py","templateName":"data_input_read_csv_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:data_input:train_test_split:sklearn":{"input":[{"name":"data_input_train_test_split_sklearn.py","templateName":"data_input_train_test_split_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:min_max_scaler:sklearn":{"input":[{"name":"pre_processing_min_max_sklearn.py","templateName":"pre_processing_min_max_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:remove_duplicates:pandas":{"input":[{"name":"pre_processing_remove_duplicates_pandas.py","templateName":"pre_processing_remove_duplicates_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:remove_missing:pandas":{"input":[{"name":"pre_processing_remove_missing_pandas.py","templateName":"pre_processing_remove_missing_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:standardization:sklearn":{"input":[{"name":"pre_processing_standardization_sklearn.py","templateName":"pre_processing_standardization_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:adaboosted_trees_regression:sklearn":{"input":[{"name":"model_adaboosted_trees_regression_sklearn.py","templateName":"model_adaboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"results":["workflow:pyml_predict"],"applicationName":"python","executableName":"python"},"pyml:model:bagged_trees_regression:sklearn":{"input":[{"name":"model_bagged_trees_regression_sklearn.py","templateName":"model_bagged_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:gradboosted_trees_regression:sklearn":{"input":[{"name":"model_gradboosted_trees_regression_sklearn.py","templateName":"model_gradboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:extreme_gradboosted_trees_regression:sklearn":{"input":[{"name":"model_extreme_gradboosted_trees_regression_sklearn.py","templateName":"model_extreme_gradboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:k_means_clustering:sklearn":{"input":[{"name":"model_k_means_clustering_sklearn.py","templateName":"model_k_means_clustering_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:kernel_ridge_regression:sklearn":{"input":[{"name":"model_kernel_ridge_regression_sklearn.py","templateName":"model_kernel_ridge_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:lasso_regression:sklearn":{"input":[{"name":"model_lasso_regression_sklearn.py","templateName":"model_lasso_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:multilayer_perceptron:sklearn":{"input":[{"name":"model_mlp_sklearn.py","templateName":"model_mlp_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:random_forest_classification:sklearn":{"input":[{"name":"model_random_forest_classification_sklearn.py","templateName":"model_random_forest_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:gradboosted_trees_classification:sklearn":{"input":[{"name":"model_gradboosted_trees_classification_sklearn.py","templateName":"model_gradboosted_trees_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:extreme_gradboosted_trees_classification:sklearn":{"input":[{"name":"model_extreme_gradboosted_trees_classification_sklearn.py","templateName":"model_extreme_gradboosted_trees_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:random_forest_regression:sklearn":{"input":[{"name":"model_random_forest_regression_sklearn.py","templateName":"model_random_forest_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:ridge_regression:sklearn":{"input":[{"name":"model_ridge_regression_sklearn.py","templateName":"model_ridge_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:parity_plot:matplotlib":{"input":[{"name":"post_processing_parity_plot_matplotlib.py","templateName":"post_processing_parity_plot_matplotlib.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:pca_2d_clusters:matplotlib":{"input":[{"name":"post_processing_pca_2d_clusters_matplotlib.py","templateName":"post_processing_pca_2d_clusters_matplotlib.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:roc_curve:sklearn":{"input":[{"name":"post_processing_roc_curve_sklearn.py","templateName":"post_processing_roc_curve_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"}}}},"shell":{"sh":{"isDefault":true,"monitors":["standard_output"],"results":["atomic_forces","band_gaps","band_structure","density_of_states","fermi_energy","phonon_dispersions","phonon_dos","pressure","stress_tensor","total_energy","total_energy_contributions","total_force","zero_point_energy","final_structure","magnetic_moments","reaction_energy_barrier","reaction_energy_profile","potential_profile","charge_density_profile"],"flavors":{"hello_world":{"isDefault":true,"input":[{"name":"hello_world.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"job_espresso_pw_scf":{"input":[{"name":"job_espresso_pw_scf.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"espresso_link_outdir_save":{"input":[{"name":"espresso_link_outdir_save.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"espresso_collect_dynmat":{"input":[{"name":"espresso_collect_dynmat.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"bash_vasp_prepare_neb_images":{"isMultiMaterial":true,"input":[{"name":"bash_vasp_prepare_neb_images.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"}}}},"vasp":{"vasp":{"isDefault":true,"postProcessors":["error_handler","prepare_restart","remove_non_zero_weight_kpoints"],"monitors":["standard_output","convergence_ionic","convergence_electronic"],"results":["atomic_forces","band_gaps","band_structure","density_of_states","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force","zero_point_energy","final_structure","magnetic_moments","reaction_energy_barrier","reaction_energy_profile","potential_profile","charge_density_profile"],"flavors":{"vasp":{"isDefault":true,"input":[{"name":"INCAR"},{"name":"KPOINTS"},{"name":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_bands":{"input":[{"name":"INCAR","templateName":"INCAR_BANDS"},{"name":"KPOINTS","templateName":"KPOINTS_BANDS"},{"name":"POSCAR","templateName":""}],"results":["band_structure"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_nscf":{"input":[{"name":"INCAR","templateName":"INCAR_BANDS"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["band_gaps","fermi_energy"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_HSE"},{"name":"KPOINTS"},{"name":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_bands_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_BANDS_HSE"},{"name":"KPOINTS","templateName":"KPOINTS_BANDS"},{"name":"POSCAR","templateName":""}],"results":["band_structure"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_nscf_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_BANDS_HSE"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["band_gaps","fermi_energy"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_relax":{"input":[{"name":"INCAR","templateName":"INCAR_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","atomic_forces","fermi_energy","pressure","stress_tensor","total_force","final_structure"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"postProcessors":["prepare_restart"],"applicationName":"vasp","executableName":"vasp"},"vasp_vc_relax":{"input":[{"name":"INCAR","templateName":"INCAR_VC_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","atomic_forces","fermi_energy","pressure","stress_tensor","total_force","final_structure"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"postProcessors":["prepare_restart"],"applicationName":"vasp","executableName":"vasp"},"vasp_zpe":{"input":[{"name":"INCAR","templateName":"INCAR_ZPE"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","fermi_energy","pressure","atomic_forces","stress_tensor","total_force","zero_point_energy"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"applicationName":"vasp","executableName":"vasp"},"vasp_kpt_conv":{"input":[{"name":"INCAR","templateName":"INCAR"},{"name":"KPOINTS","templateName":"KPOINTS_CONV"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_vc_relax_conv":{"input":[{"name":"INCAR","templateName":"INCAR_VC_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS_CONV"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB"},{"name":"KPOINTS","templateName":"KPOINTS"}],"results":["reaction_energy_barrier","reaction_energy_profile"],"monitors":["standard_output"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb_initial":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB_INITIAL_FINAL"},{"name":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR_NEB_INITIAL"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb_final":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB_INITIAL_FINAL"},{"name":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR_NEB_FINAL"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"}}}}}} +module.exports = {applicationTree: {"deepmd":{"dp_prepare":{"isDefault":true,"monitors":["standard_output"],"results":[],"flavors":{"qe_cp_to_deepmd":{"isDefault":true,"input":[{"name":"qe_cp_to_deepmd.py"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"dp_prepare"}}},"dp_train":{"isDefault":false,"monitors":["standard_output"],"results":[],"flavors":{"dp_train_se_e2_r":{"input":[{"name":"dp_train_se_e2_r.json"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"dp_train"}}}},"espresso":{"average.x":{"monitors":["standard_output"],"results":["average_potential_profile"],"flavors":{"average":{"input":[{"name":"average.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"average.x"},"average_potential":{"input":[{"name":"average.in"}],"results":["average_potential_profile"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"average.x"}}},"bands.x":{"monitors":["standard_output"],"results":["band_structure"],"flavors":{"bands":{"input":[{"name":"bands.in"}],"results":["band_structure"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"bands.x"}}},"dos.x":{"monitors":["standard_output"],"results":["density_of_states"],"flavors":{"dos":{"input":[{"name":"dos.in"}],"results":["density_of_states"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"dos.x"}}},"dynmat.x":{"monitors":["standard_output"],"results":[],"flavors":{"dynmat":{"input":[{"name":"dynmat_grid.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"dynmat.x"}}},"gw.x":{"monitors":["standard_output"],"results":["band_structure","fermi_energy","band_gaps"],"flavors":{"gw_bands_plasmon_pole":{"input":[{"name":"gw_bands_plasmon_pole.in"}],"results":["band_structure","fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"gw.x"},"gw_bands_full_frequency":{"input":[{"name":"gw_bands_full_frequency.in"}],"results":["band_structure","fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"gw.x"}}},"matdyn.x":{"monitors":["standard_output"],"results":["phonon_dos","phonon_dispersions"],"flavors":{"matdyn_grid":{"input":[{"name":"matdyn_grid.in"}],"monitors":["standard_output"],"results":["phonon_dos"],"applicationName":"espresso","executableName":"matdyn.x"},"matdyn_path":{"input":[{"name":"matdyn_path.in"}],"monitors":["standard_output"],"results":["phonon_dispersions"],"applicationName":"espresso","executableName":"matdyn.x"}}},"neb.x":{"monitors":["standard_output"],"results":["reaction_energy_barrier","reaction_energy_profile"],"flavors":{"neb":{"isMultiMaterial":true,"input":[{"name":"neb.in"}],"results":["reaction_energy_barrier","reaction_energy_profile"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"neb.x"}}},"ph.x":{"monitors":["standard_output"],"results":["phonon_dos","phonon_dispersions","zero_point_energy"],"flavors":{"ph_path":{"input":[{"name":"ph_path.in"}],"results":["phonon_dispersions"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_grid":{"input":[{"name":"ph_grid.in"}],"results":["phonon_dos"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_gamma":{"input":[{"name":"ph_gamma.in"}],"results":["zero_point_energy"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_init_qpoints":{"input":[{"name":"ph_init_qpoints.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_grid_restart":{"input":[{"name":"ph_grid_restart.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_single_irr_qpt":{"input":[{"name":"ph_single_irr_qpt.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"}}},"pp.x":{"monitors":["standard_output"],"results":[],"flavors":{"pp_density":{"input":[{"name":"pp_density.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pp.x"},"pp_electrostatic_potential":{"input":[{"name":"pp_electrostatic_potential.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pp.x"}}},"projwfc.x":{"monitors":["standard_output"],"results":["density_of_states"],"flavors":{"projwfc":{"input":[{"name":"projwfc.in"}],"results":["density_of_states"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"projwfc.x"}}},"pw.x":{"isDefault":true,"hasAdvancedComputeOptions":true,"postProcessors":["remove_non_zero_weight_kpoints"],"monitors":["standard_output","convergence_ionic","convergence_electronic"],"results":["atomic_forces","band_gaps","charge_density_profile","density_of_states","fermi_energy","final_structure","magnetic_moments","potential_profile","pressure","reaction_energy_barrier","reaction_energy_profile","stress_tensor","total_energy","total_energy_contributions","total_force"],"flavors":{"pw_scf":{"isDefault":true,"input":[{"name":"pw_scf.in"}],"results":["atomic_forces","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_bands_hse":{"input":[{"name":"pw_scf_bands_hse.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_hse":{"input":[{"name":"pw_scf_hse.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_dft_u":{"input":[{"name":"pw_scf_dft_u.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u+v":{"input":[{"name":"pw_scf_dft_v.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u+j":{"input":[{"name":"pw_scf_dft_j.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u_legacy":{"input":[{"name":"pw_scf_dft_u_legacy.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["5.2.1","5.4.0","6.0.0","6.3","6.4.1","6.5.0","6.6.0","6.7.0","6.8.0","7.0"]},"pw_esm":{"input":[{"name":"pw_esm.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor","potential_profile","charge_density_profile"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_esm_relax":{"input":[{"name":"pw_esm_relax.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor","potential_profile","charge_density_profile"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_nscf":{"input":[{"name":"pw_nscf.in"}],"results":["fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pw.x"},"pw_bands":{"input":[{"name":"pw_bands.in"}],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pw.x"},"pw_relax":{"input":[{"name":"pw_relax.in"}],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor","final_structure"],"applicationName":"espresso","executableName":"pw.x"},"pw_vc-relax":{"input":[{"name":"pw_vc_relax.in"}],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor","final_structure"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_kpt_conv":{"input":[{"name":"pw_scf_kpt_conv.in"}],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"}}},"q2r.x":{"monitors":["standard_output"],"results":[],"flavors":{"q2r":{"input":[{"name":"q2r.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"q2r.x"}}},"hp.x":{"monitors":["standard_output"],"results":["hubbard_u","hubbard_v_nn","hubbard_v"],"flavors":{"hp":{"input":[{"name":"hp.in"}],"results":["hubbard_u","hubbard_v_nn","hubbard_v"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"hp.x"}},"supportedApplicationVersions":["7.0","7.2"]},"epsilon.x":{"monitors":["standard_output"],"results":["dielectric_tensor"],"flavors":{"dielectric_tensor":{"input":[{"name":"epsilon.in"}],"results":["dielectric_tensor"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"epsilon.x"}}}},"jupyterLab":{"jupyter":{"isDefault":true,"monitors":["standard_output","jupyter_notebook_endpoint"],"results":[],"flavors":{"notebook":{"isDefault":true,"input":[{"name":"requirements.txt","templateName":"requirements.txt"}],"monitors":["standard_output","jupyter_notebook_endpoint"],"applicationName":"jupyterLab","executableName":"jupyter"}}}},"exabyteml":{"score":{"isDefault":false,"monitors":["standard_output"],"results":["predicted_properties"],"flavors":{"score":{"isDefault":true,"input":[],"monitors":["standard_output"],"applicationName":"exabyteml","executableName":"score"}}},"train":{"isDefault":true,"monitors":["standard_output"],"results":["workflow:ml_predict"],"flavors":{"train":{"isDefault":true,"input":[],"monitors":["standard_output"],"applicationName":"exabyteml","executableName":"train"}}}},"nwchem":{"nwchem":{"isDefault":true,"hasAdvancedComputeOptions":false,"postProcessors":["error_handler"],"monitors":["standard_output"],"results":["total_energy","total_energy_contributions"],"flavors":{"nwchem_total_energy":{"isDefault":true,"input":[{"name":"nwchem_total_energy.inp"}],"results":["total_energy","total_energy_contributions"],"monitors":["standard_output"],"applicationName":"nwchem","executableName":"nwchem"}}}},"python":{"python":{"isDefault":true,"monitors":["standard_output"],"results":["file_content","workflow:pyml_predict"],"flavors":{"hello_world":{"isDefault":true,"input":[{"name":"script.py","templateName":"hello_world.py"},{"name":"requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"espresso_xml_get_qpt_irr":{"input":[{"name":"espresso_xml_get_qpt_irr.py"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"espresso_extract_kpoints":{"input":[{"name":"espresso_extract_kpoints.py"},{"name":"requirements.txt","templateName":"requirements_empty.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"generic:post_processing:plot:matplotlib":{"input":[{"name":"plot.py","templateName":"matplotlib_basic.py"},{"name":"requirements.txt","templateName":"processing_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"generic:processing:find_extrema:scipy":{"input":[{"name":"find_extrema.py","templateName":"find_extrema.py"},{"name":"requirements.txt","templateName":"processing_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:setup_variables_packages":{"input":[{"name":"settings.py","templateName":"pyml_settings.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:custom":{"input":[{"name":"pyml_custom.py","templateName":"pyml_custom.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:data_input:read_csv:pandas":{"input":[{"name":"data_input_read_csv_pandas.py","templateName":"data_input_read_csv_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:data_input:train_test_split:sklearn":{"input":[{"name":"data_input_train_test_split_sklearn.py","templateName":"data_input_train_test_split_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:min_max_scaler:sklearn":{"input":[{"name":"pre_processing_min_max_sklearn.py","templateName":"pre_processing_min_max_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:remove_duplicates:pandas":{"input":[{"name":"pre_processing_remove_duplicates_pandas.py","templateName":"pre_processing_remove_duplicates_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:remove_missing:pandas":{"input":[{"name":"pre_processing_remove_missing_pandas.py","templateName":"pre_processing_remove_missing_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:standardization:sklearn":{"input":[{"name":"pre_processing_standardization_sklearn.py","templateName":"pre_processing_standardization_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:adaboosted_trees_regression:sklearn":{"input":[{"name":"model_adaboosted_trees_regression_sklearn.py","templateName":"model_adaboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"results":["workflow:pyml_predict"],"applicationName":"python","executableName":"python"},"pyml:model:bagged_trees_regression:sklearn":{"input":[{"name":"model_bagged_trees_regression_sklearn.py","templateName":"model_bagged_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:gradboosted_trees_regression:sklearn":{"input":[{"name":"model_gradboosted_trees_regression_sklearn.py","templateName":"model_gradboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:extreme_gradboosted_trees_regression:sklearn":{"input":[{"name":"model_extreme_gradboosted_trees_regression_sklearn.py","templateName":"model_extreme_gradboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:k_means_clustering:sklearn":{"input":[{"name":"model_k_means_clustering_sklearn.py","templateName":"model_k_means_clustering_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:kernel_ridge_regression:sklearn":{"input":[{"name":"model_kernel_ridge_regression_sklearn.py","templateName":"model_kernel_ridge_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:lasso_regression:sklearn":{"input":[{"name":"model_lasso_regression_sklearn.py","templateName":"model_lasso_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:multilayer_perceptron:sklearn":{"input":[{"name":"model_mlp_sklearn.py","templateName":"model_mlp_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:random_forest_classification:sklearn":{"input":[{"name":"model_random_forest_classification_sklearn.py","templateName":"model_random_forest_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:gradboosted_trees_classification:sklearn":{"input":[{"name":"model_gradboosted_trees_classification_sklearn.py","templateName":"model_gradboosted_trees_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:extreme_gradboosted_trees_classification:sklearn":{"input":[{"name":"model_extreme_gradboosted_trees_classification_sklearn.py","templateName":"model_extreme_gradboosted_trees_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:random_forest_regression:sklearn":{"input":[{"name":"model_random_forest_regression_sklearn.py","templateName":"model_random_forest_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:ridge_regression:sklearn":{"input":[{"name":"model_ridge_regression_sklearn.py","templateName":"model_ridge_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:parity_plot:matplotlib":{"input":[{"name":"post_processing_parity_plot_matplotlib.py","templateName":"post_processing_parity_plot_matplotlib.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:pca_2d_clusters:matplotlib":{"input":[{"name":"post_processing_pca_2d_clusters_matplotlib.py","templateName":"post_processing_pca_2d_clusters_matplotlib.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:roc_curve:sklearn":{"input":[{"name":"post_processing_roc_curve_sklearn.py","templateName":"post_processing_roc_curve_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"}}}},"shell":{"sh":{"isDefault":true,"monitors":["standard_output"],"results":["atomic_forces","band_gaps","band_structure","density_of_states","fermi_energy","phonon_dispersions","phonon_dos","pressure","stress_tensor","total_energy","total_energy_contributions","total_force","zero_point_energy","final_structure","magnetic_moments","reaction_energy_barrier","reaction_energy_profile","potential_profile","charge_density_profile"],"flavors":{"hello_world":{"isDefault":true,"input":[{"name":"hello_world.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"job_espresso_pw_scf":{"input":[{"name":"job_espresso_pw_scf.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"espresso_link_outdir_save":{"input":[{"name":"espresso_link_outdir_save.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"espresso_collect_dynmat":{"input":[{"name":"espresso_collect_dynmat.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"bash_vasp_prepare_neb_images":{"isMultiMaterial":true,"input":[{"name":"bash_vasp_prepare_neb_images.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"}}}},"vasp":{"vasp":{"isDefault":true,"postProcessors":["error_handler","prepare_restart","remove_non_zero_weight_kpoints"],"monitors":["standard_output","convergence_ionic","convergence_electronic"],"results":["atomic_forces","band_gaps","band_structure","density_of_states","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force","zero_point_energy","final_structure","magnetic_moments","reaction_energy_barrier","reaction_energy_profile","potential_profile","charge_density_profile"],"flavors":{"vasp":{"isDefault":true,"input":[{"name":"INCAR"},{"name":"KPOINTS"},{"name":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_bands":{"input":[{"name":"INCAR","templateName":"INCAR_BANDS"},{"name":"KPOINTS","templateName":"KPOINTS_BANDS"},{"name":"POSCAR","templateName":""}],"results":["band_structure"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_nscf":{"input":[{"name":"INCAR","templateName":"INCAR_BANDS"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["band_gaps","fermi_energy"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_HSE"},{"name":"KPOINTS"},{"name":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_bands_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_BANDS_HSE"},{"name":"KPOINTS","templateName":"KPOINTS_BANDS"},{"name":"POSCAR","templateName":""}],"results":["band_structure"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_nscf_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_BANDS_HSE"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["band_gaps","fermi_energy"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_relax":{"input":[{"name":"INCAR","templateName":"INCAR_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","atomic_forces","fermi_energy","pressure","stress_tensor","total_force","final_structure"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"postProcessors":["prepare_restart"],"applicationName":"vasp","executableName":"vasp"},"vasp_vc_relax":{"input":[{"name":"INCAR","templateName":"INCAR_VC_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","atomic_forces","fermi_energy","pressure","stress_tensor","total_force","final_structure"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"postProcessors":["prepare_restart"],"applicationName":"vasp","executableName":"vasp"},"vasp_zpe":{"input":[{"name":"INCAR","templateName":"INCAR_ZPE"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","fermi_energy","pressure","atomic_forces","stress_tensor","total_force","zero_point_energy"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"applicationName":"vasp","executableName":"vasp"},"vasp_kpt_conv":{"input":[{"name":"INCAR","templateName":"INCAR"},{"name":"KPOINTS","templateName":"KPOINTS_CONV"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_vc_relax_conv":{"input":[{"name":"INCAR","templateName":"INCAR_VC_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS_CONV"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB"},{"name":"KPOINTS","templateName":"KPOINTS"}],"results":["reaction_energy_barrier","reaction_energy_profile"],"monitors":["standard_output"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb_initial":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB_INITIAL_FINAL"},{"name":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR_NEB_INITIAL"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb_final":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB_INITIAL_FINAL"},{"name":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR_NEB_FINAL"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"}}}}}} From 692528900b11aed06c402891050f4c1ba150eeab Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Tue, 9 Jan 2024 07:53:49 +0800 Subject: [PATCH 07/20] SOF-7194: add lammps executable and templates --- assets/deepmd/in.lammps | 25 +++++ assets/deepmd/structure.lmp | 202 ++++++++++++++++++++++++++++++++++ executables/deepmd/dp_lmp.yml | 15 +++ templates/deepmd/dp_lmp.yml | 11 ++ 4 files changed, 253 insertions(+) create mode 100644 assets/deepmd/in.lammps create mode 100644 assets/deepmd/structure.lmp create mode 100644 executables/deepmd/dp_lmp.yml create mode 100644 templates/deepmd/dp_lmp.yml diff --git a/assets/deepmd/in.lammps b/assets/deepmd/in.lammps new file mode 100644 index 00000000..01aa2ce2 --- /dev/null +++ b/assets/deepmd/in.lammps @@ -0,0 +1,25 @@ +# LAMMPS input, deepmd-kit version. Built from bulk water calculation. + +units metal +boundary p p p +atom_style atomic + +neighbor 2.0 bin +neigh_modify every 10 delay 0 check no + +read_data structure.lmp +mass 1 16 +mass 2 2 + +pair_style deepmd ./graph.pb +pair_coeff * * + +velocity all create 330.0 23456789 + +fix 1 all nvt temp 330.0 330.0 0.5 +timestep 0.0005 +thermo_style custom step pe ke etotal temp press vol +thermo 100 +dump 1 all custom 100 structure.dump id type x y z + +run 100 diff --git a/assets/deepmd/structure.lmp b/assets/deepmd/structure.lmp new file mode 100644 index 00000000..8bd1def3 --- /dev/null +++ b/assets/deepmd/structure.lmp @@ -0,0 +1,202 @@ +# LAMMPS data +192 atoms +2 atom types +0.0 12.44470 xlo xhi +0.0 12.44470 ylo yhi +0.0 12.44470 zlo zhi +0.0 0.0 0.0 xy xz yz + +Atoms # metal + +1 1 11.83 2.56 2.18 +2 1 5.36 6 1.81 +3 1 12.17 7.34 2.71 +4 1 3.38 4.28 4.84 +5 1 4.54 11.73 11.9 +6 1 7.34 0.18 2.18 +7 1 5.73 9.9 10.31 +8 1 5.32 11.19 1.83 +9 1 9.39 11.14 1.1 +10 1 12.06 0.64 8.92 +11 1 5.01 7.72 11.74 +12 1 3.64 3.19 1.08 +13 1 11.88 10.9 4.7 +14 1 3.88 8.05 2.51 +15 1 9.82 9.16 10.18 +16 1 11 5.31 5.17 +17 1 0.97 4.12 0.84 +18 1 1.24 5.99 4.76 +19 1 10.1 9.09 3.32 +20 1 8.39 5.72 10.39 +21 1 8.06 4.72 1.3 +22 1 3.23 3.34 8.26 +23 1 5.02 6.21 7.29 +24 1 3.94 10.55 3.95 +25 1 6.25 4.56 4.21 +26 1 12.18 2.35 11.11 +27 1 6.76 8.57 4.07 +28 1 5.87 5.84 9.79 +29 1 2.18 6.91 6.99 +30 1 10.89 7.69 6.78 +31 1 8.03 4.28 6.57 +32 1 5.29 1.92 11.56 +33 1 1.53 12.16 3.45 +34 1 2.04 5.49 9.71 +35 1 6.66 9.05 1.21 +36 1 11.75 6.18 11.01 +37 1 6.11 3.07 7.96 +38 1 8.22 1.38 9.43 +39 1 1.31 2.02 7.09 +40 1 9.48 2.11 5.92 +41 1 9.33 3.34 10.97 +42 1 1.31 8.49 0.81 +43 1 11.25 3.48 7.51 +44 1 11.7 0.33 0.6 +45 1 7.63 2.04 0.28 +46 1 8.86 9.52 7.4 +47 1 2.01 11.54 6.89 +48 1 8.02 11.27 4.66 +49 1 5.82 9.18 7.71 +50 1 8.11 11.19 11.22 +51 1 5.94 0.84 5.91 +52 1 1.88 11.66 0.64 +53 1 4.66 11.28 6.73 +54 1 10.32 6.26 0.89 +55 1 2.01 11.07 10.33 +56 1 5.36 2.06 3.14 +57 1 8.56 7.59 12.17 +58 1 10.51 5.81 8.66 +59 1 2.37 6.23 12.1 +60 1 0.3 8.77 10.05 +61 1 9.47 12.13 7.57 +62 1 1.41 2.32 4.17 +63 1 9.69 3.69 3.56 +64 1 0.75 9.22 7.39 +65 2 11.09 2.87 2.74 +66 2 5.51 5.51 2.6 +67 2 0.24 6.8 3.29 +68 2 4.28 4.19 4.46 +69 2 3.63 11.56 11.76 +70 2 6.53 12.07 1.99 +71 2 5.37 9.14 10.85 +72 2 5.74 10.31 1.64 +73 2 8.71 11.07 0.41 +74 2 0.27 1.04 8.27 +75 2 5.46 7.08 11.15 +76 2 3.84 4.17 1.06 +77 2 11.55 10.19 4.15 +78 2 3 8.17 2.14 +79 2 9.53 10.04 10.56 +80 2 11.18 4.84 6.05 +81 2 0.68 3.76 12.44 +82 2 0.43 5.51 4.88 +83 2 9.51 9.48 3.94 +84 2 9.08 5.89 9.68 +85 2 7.92 3.83 0.8 +86 2 4.23 3.22 8.21 +87 2 5.27 5.9 8.19 +88 2 4.44 10.9 3.14 +89 2 6.93 4.56 4.89 +90 2 11.88 1.61 11.75 +91 2 6.79 8.54 3.09 +92 2 5.62 4.99 10.13 +93 2 3.09 6.66 6.98 +94 2 11.73 8.22 6.86 +95 2 8.4 3.45 6.18 +96 2 4.64 2.37 12.16 +97 2 0.74 11.69 3.86 +98 2 1.1 5.3 9.72 +99 2 7.54 9.11 0.83 +100 2 11.36 6.2 11.9 +101 2 5.85 2.21 7.53 +102 2 8.81 2.01 9.89 +103 2 2.17 2.43 7.46 +104 2 10.1 2.53 6.55 +105 2 9.07 4.29 10.91 +106 2 0.64 8.13 1.45 +107 2 11.12 4.2 8.14 +108 2 11.03 12.03 0.58 +109 2 6.83 1.84 12.19 +110 2 9.13 10.45 7.25 +111 2 1.76 12.44 6.8 +112 2 7.37 11.88 5.08 +113 2 5.59 8.29 7.44 +114 2 8.14 12.12 10.86 +115 2 5.48 0.06 6.32 +116 2 0.99 11.99 0.41 +117 2 4.54 10.95 5.79 +118 2 10.94 6.29 1.65 +119 2 2.52 10.55 9.76 +120 2 4.74 2.41 2.42 +121 2 8.84 8.27 11.56 +122 2 10.89 6.06 9.58 +123 2 2.15 5.99 11.18 +124 2 0.49 9.06 9.13 +125 2 8.69 12.34 8.15 +126 2 1.76 1.44 4.07 +127 2 10.05 4.44 4.1 +128 2 1.44 8.49 7.32 +129 2 12.25 3.32 1.68 +130 2 6.27 6.22 1.56 +131 2 11.5 7.85 3.14 +132 2 2.96 3.44 4.61 +133 2 5.04 11.17 11.3 +134 2 7.6 12.39 3.03 +135 2 5.94 9.68 9.37 +136 2 4.93 11.46 0.96 +137 2 8.92 11.68 1.69 +138 2 12.07 1.36 9.54 +139 2 4.17 7.28 11.8 +140 2 2.67 3.09 1.05 +141 2 11.89 10.59 5.56 +142 2 4.27 7.23 2.16 +143 2 10.8 9.22 10.18 +144 2 10.68 6.2 5.44 +145 2 1.51 4.9 0.53 +146 2 1.94 5.36 4.61 +147 2 10.02 9.64 2.47 +148 2 8.41 6.43 11.04 +149 2 8.58 4.29 2.01 +150 2 3.12 4.28 8.41 +151 2 5.12 5.54 6.6 +152 2 3.97 9.58 3.89 +153 2 6.17 3.63 3.9 +154 2 11.4 2.92 10.99 +155 2 5.96 8.02 4.26 +156 2 6.81 5.9 9.83 +157 2 1.83 6.59 6.16 +158 2 10.19 8.36 6.88 +159 2 8.74 4.67 7.12 +160 2 4.84 1.08 11.38 +161 2 2.38 11.71 3.7 +162 2 2.02 6.1 8.92 +163 2 6.05 8.48 0.61 +164 2 12.12 7.05 10.81 +165 2 6.81 3.46 7.31 +166 2 7.52 1.99 9.1 +167 2 1.25 2.26 6.18 +168 2 9.31 1.19 6.35 +169 2 8.84 2.9 11.69 +170 2 1.39 9.44 0.85 +171 2 12.13 3.22 7.5 +172 2 11.38 0.95 1.34 +173 2 7.43 1.82 1.24 +174 2 9.15 9.43 8.28 +175 2 2.97 11.66 6.88 +176 2 7.5 10.43 4.47 +177 2 6.69 9.4 7.47 +178 2 7.29 10.77 10.93 +179 2 5.45 1.07 5.12 +180 2 1.92 11.57 1.62 +181 2 5.05 10.55 7.26 +182 2 9.73 5.52 1.11 +183 2 1.73 11.85 9.82 +184 2 6.02 1.57 2.67 +185 2 9.34 7.2 0.21 +186 2 10.71 6.56 7.97 +187 2 1.86 7.03 12.37 +188 2 1.08 9.16 10.59 +189 2 10.19 12.39 8.15 +190 2 0.92 2.56 3.28 +191 2 9.34 3.01 4.17 +192 2 1.11 10.09 7.21 diff --git a/executables/deepmd/dp_lmp.yml b/executables/deepmd/dp_lmp.yml new file mode 100644 index 00000000..c7b6dbd6 --- /dev/null +++ b/executables/deepmd/dp_lmp.yml @@ -0,0 +1,15 @@ +isDefault: false +monitors: + - standard_output +results: [] +flavors: + dp_lmp: + input: + - name: + - in.lammps + - structure.lmp + results: [] + monitors: + - standard_output + applicationName: deepmd + executableName: dp_lmp # mpirun -np $NP lmp -in in.lammps diff --git a/templates/deepmd/dp_lmp.yml b/templates/deepmd/dp_lmp.yml new file mode 100644 index 00000000..f9e42f59 --- /dev/null +++ b/templates/deepmd/dp_lmp.yml @@ -0,0 +1,11 @@ +- content: !readFile 'assets/deepmd/in.lammps' + name: in.lammps + contextProviders: [] + applicationName: deepmd + executableName: dp_lmp + +- content: !readFile 'assets/deepmd/structure.lmp' + name: structure.lmp + contextProviders: [] + applicationName: deepmd + executableName: dp_lmp From a1773332dd414e501de11215e1b31cf9a4282113 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Tue, 9 Jan 2024 08:00:54 +0800 Subject: [PATCH 08/20] SOF-7194: fix lammps input template array --- executables/deepmd/dp_lmp.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/executables/deepmd/dp_lmp.yml b/executables/deepmd/dp_lmp.yml index c7b6dbd6..83a1c5bb 100644 --- a/executables/deepmd/dp_lmp.yml +++ b/executables/deepmd/dp_lmp.yml @@ -5,9 +5,8 @@ results: [] flavors: dp_lmp: input: - - name: - - in.lammps - - structure.lmp + - in.lammps + - structure.lmp results: [] monitors: - standard_output From 40b94068be6f9cb1915021f1ddd1a66d07718fcc Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Tue, 9 Jan 2024 08:44:15 +0800 Subject: [PATCH 09/20] SOF-7194: include lammps in executable tree --- executables/tree.yml | 1 + src/js/data/templates.js | 2 +- src/js/data/tree.js | 2 +- templates/templates.yml | 1 + 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/executables/tree.yml b/executables/tree.yml index 66f8f5df..8945c1fa 100644 --- a/executables/tree.yml +++ b/executables/tree.yml @@ -1,4 +1,5 @@ deepmd: + dp_lmp: !include 'executables/deepmd/dp_lmp.yml' dp_prepare: !include 'executables/deepmd/dp_prepare.yml' dp_train: !include 'executables/deepmd/dp_train.yml' espresso: diff --git a/src/js/data/templates.js b/src/js/data/templates.js index 3075e391..d2e94b35 100644 --- a/src/js/data/templates.js +++ b/src/js/data/templates.js @@ -1,2 +1,2 @@ /* eslint-disable */ -module.exports = {allTemplates: [{"content":"import dpdata\nimport numpy as np\n\n# https://docs.deepmodeling.com/projects/dpdata/en/master/formats/QECPTrajFormat.html\ndata = dpdata.LabeledSystem(\"generate_dft_data\", fmt=\"qe/cp/traj\")\nprint(\"Dataset contains total {0} frames\".format(len(data)))\n\n# randomly choose 20% index for validation_data\nsize = len(data)\nsize_validation = round(size * 0.2)\n\nindex_validation = np.random.choice(size, size=size_validation, replace=False)\nindex_training = list(set(range(size)) - set(index_validation))\n\ndata_training = data.sub_system(index_training)\ndata_validation = data.sub_system(index_validation)\n\nprint(\"Using {0} frames as training set\".format(len(data_training)))\nprint(\"Using {0} frames as validation set\".format(len(data_validation)))\n\n# save training and validation sets\ndata_training.to_deepmd_npy(\"./training\")\ndata_validation.to_deepmd_npy(\"./validation\")\n","name":"qe_cp_to_deepmd.py","contextProviders":[],"applicationName":"deepmd","executableName":"dp_prepare"},{"content":"{\n \"model\": {\n \"type_map\": [\n \"O\",\n \"H\"\n ],\n \"descriptor\": {\n \"type\": \"se_e2_r\",\n \"sel\": [\n 23,\n 46\n ],\n \"rcut_smth\": 0.50,\n \"rcut\": 5.00,\n \"neuron\": [\n 5,\n 5,\n 5\n ],\n \"resnet_dt\": false,\n \"seed\": 1\n },\n \"fitting_net\": {\n \"neuron\": [\n 60,\n 60,\n 60\n ],\n \"resnet_dt\": true,\n \"seed\": 1\n }\n },\n \"learning_rate\": {\n \"type\": \"exp\",\n \"decay_steps\": 1000,\n \"start_lr\": 0.005,\n \"stop_lr\": 3.51e-6\n },\n \"loss\": {\n \"start_pref_e\": 0.02,\n \"limit_pref_e\": 1,\n \"start_pref_f\": 1000,\n \"limit_pref_f\": 1,\n \"start_pref_v\": 0,\n \"limit_pref_v\": 0\n },\n \"training\": {\n \"training_data\": {\n \"systems\": [\n \"./training/\"\n ],\n \"batch_size\": \"auto\"\n },\n \"validation_data\": {\n \"systems\": [\n \"./validation/\"\n ],\n \"batch_size\": \"auto\"\n },\n \"numb_steps\": 301,\n \"seed\": 1,\n \"disp_file\": \"lcurve.out\",\n \"disp_freq\": 10,\n \"numb_test\": 1,\n \"save_freq\": 100\n }\n}\n","name":"dp_train_se_e2_r.json","contextProviders":[],"applicationName":"deepmd","executableName":"dp_train"},{"content":"1\npp.dat\n1.0\n3000\n3\n3.0000\n","name":"average.in","contextProviders":[],"applicationName":"espresso","executableName":"average.x"},{"content":"&BANDS\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n filband = {% raw %}'{{ JOB_WORK_DIR }}/bands.dat'{% endraw %}\n no_overlap = .true.\n/\n\n","name":"bands.in","contextProviders":[],"applicationName":"espresso","executableName":"bands.x"},{"content":"&DOS\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n degauss = 0.01\n/\n\n","name":"dos.in","contextProviders":[],"applicationName":"espresso","executableName":"dos.x"},{"content":"&INPUT\n asr = 'simple'\n flfrc = 'force_constants.fc'\n flfrq = 'frequencies.freq'\n dos = .true.\n fldos = 'phonon_dos.out'\n deltaE = 1.d0\n {% for d in igrid.dimensions -%}\n nk{{loop.index}} = {{d}}\n {% endfor %}\n /\n","name":"dynmat_grid.in","contextProviders":[{"name":"IGridFormDataManager"}],"applicationName":"espresso","executableName":"dynmat.x"},{"content":"&gw_input\n\n ! see http://www.sternheimergw.org for more information.\n\n ! config of the scf run\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n\n ! the grid used for the linear response\n kpt_grid = {{ kgrid.dimensions|join(', ') }}\n qpt_grid = {{ qgrid.dimensions|join(', ') }}\n\n ! truncation (used for both correlation and exchange)\n truncation = '2d'\n\n ! number of bands for which the GW correction is calculated\n num_band = 8\n\n ! configuration of the Coulomb solver\n thres_coul = 1.0d-2\n\n ! configuration of W in the convolution\n model_coul = 'godby-needs'\n max_freq_coul = 120\n num_freq_coul = 35\n\n ! configuration of the Green solver\n thres_green = 1.0d-3\n max_iter_green = 300\n\n ! configuration for the correlation self energy\n ecut_corr = 5.0\n max_freq_corr = 100.0\n num_freq_corr = 11\n\n ! configuration for the exchange self energy\n ecut_exch = 20.0\n\n ! configuration for the output\n eta = 0.1\n min_freq_wind = -30.0\n max_freq_wind = 30.0\n num_freq_wind = 601\n/\n\n&gw_output\n/\n\nFREQUENCIES\n2\n 0.0 0.0\n 0.0 10.0\n/\n\nK_points\n{{ explicitKPath2PIBA.length }}\n{% for point in explicitKPath2PIBA -%}\n{% for coordinate in point.coordinates %}{{ coordinate }}{% endfor %}\n{% endfor %}\n/\n\n","name":"gw_bands_plasmon_pole.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QGridFormDataManager"},{"name":"ExplicitKPath2PIBAFormDataManager"}],"applicationName":"espresso","executableName":"gw.x"},{"content":"&gw_input\n\n ! see http://www.sternheimergw.org for more information.\n\n ! config of the scf run\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n\n ! the grid used for the linear response\n kpt_grid = {{ kgrid.dimensions|join(', ') }}\n qpt_grid = {{ qgrid.dimensions|join(', ') }}\n\n ! number of bands for which the GW correction is calculated\n num_band = 8\n\n ! configuration of W in the convolution\n max_freq_coul = 200\n num_freq_coul = 51\n\n ! configuration for the correlation self energy\n ecut_corr = 6.0\n\n ! configuration for the exchange self energy\n ecut_exch = 15.0\n/\n\n&gw_output\n/\n\nFREQUENCIES\n35\n 0.0 0.0\n 0.0 0.3\n 0.0 0.9\n 0.0 1.8\n 0.0 3.0\n 0.0 4.5\n 0.0 6.3\n 0.0 8.4\n 0.0 10.8\n 0.0 13.5\n 0.0 16.5\n 0.0 19.8\n 0.0 23.4\n 0.0 27.3\n 0.0 31.5\n 0.0 36.0\n 0.0 40.8\n 0.0 45.9\n 0.0 51.3\n 0.0 57.0\n 0.0 63.0\n 0.0 69.3\n 0.0 75.9\n 0.0 82.8\n 0.0 90.0\n 0.0 97.5\n 0.0 105.3\n 0.0 113.4\n 0.0 121.8\n 0.0 130.5\n 0.0 139.5\n 0.0 148.8\n 0.0 158.4\n 0.0 168.3\n 0.0 178.5\n/\n\nK_points\n{{ explicitKPath2PIBA.length }}\n{% for point in explicitKPath2PIBA -%}\n{% for coordinate in point.coordinates %}{{ coordinate }}{% endfor %}\n{% endfor %}\n/\n\n","name":"gw_bands_full_frequency.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QGridFormDataManager"},{"name":"ExplicitKPath2PIBAFormDataManager"}],"applicationName":"espresso","executableName":"gw.x"},{"content":"&INPUT\n asr = 'simple'\n flfrc = 'force_constants.fc'\n flfrq = 'frequencies.freq'\n dos = .true.\n fldos = 'phonon_dos.out'\n deltaE = 1.d0\n {% for d in igrid.dimensions -%}\n nk{{loop.index}} = {{d}}\n {% endfor %}\n /\n","name":"matdyn_grid.in","contextProviders":[{"name":"IGridFormDataManager"}],"applicationName":"espresso","executableName":"matdyn.x"},{"content":"&INPUT\n asr = 'simple'\n flfrc ='force_constants.fc'\n flfrq ='frequencies.freq'\n flvec ='normal_modes.out'\n q_in_band_form = .true.\n /\n{{ipath.length}}\n{% for point in ipath -%}\n{% for d in point.coordinates %}{{d}} {% endfor -%}{{point.steps}}\n{% endfor %}\n","name":"matdyn_path.in","contextProviders":[{"name":"IPathFormDataManager"}],"applicationName":"espresso","executableName":"matdyn.x"},{"content":"BEGIN\nBEGIN_PATH_INPUT\n&PATH\n restart_mode = 'from_scratch'\n string_method = 'neb',\n nstep_path = 50,\n ds = 2.D0,\n opt_scheme = \"broyden\",\n num_of_images = {{ 2 + (input.INTERMEDIATE_IMAGES.length || neb.nImages) }},\n k_max = 0.3D0,\n k_min = 0.2D0,\n CI_scheme = \"auto\",\n path_thr = 0.1D0,\n/\nEND_PATH_INPUT\nBEGIN_ENGINE_INPUT\n&CONTROL\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.03\n nspin = 2\n starting_magnetization = 0.5\n/\n&ELECTRONS\n conv_thr = 1.D-8\n mixing_beta = 0.3\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nBEGIN_POSITIONS\nFIRST_IMAGE\nATOMIC_POSITIONS crystal\n{{ input.FIRST_IMAGE }}\n{%- for IMAGE in input.INTERMEDIATE_IMAGES %}\nINTERMEDIATE_IMAGE\nATOMIC_POSITIONS crystal\n{{ IMAGE }}\n{%- endfor %}\nLAST_IMAGE\nATOMIC_POSITIONS crystal\n{{ input.LAST_IMAGE }}\nEND_POSITIONS\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nEND_ENGINE_INPUT\nEND\n","name":"neb.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"NEBFormDataManager"},{"name":"QENEBInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"neb.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-12\n asr = .true.\n search_sym = .false.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n fildyn = 'dyn'\n ldisp = .true.\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_grid.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-12\n asr = .true.\n search_sym = .false.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n/\n{% for point in qpath -%}\n{% for d in point.coordinates %}{{d}} {% endfor %}\n{% endfor %}\n","name":"ph_path.in","contextProviders":[{"name":"QPathFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-12\n asr = .true.\n search_sym = .false.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n/\n0 0 0\n","name":"ph_gamma.in","contextProviders":[],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-18,\n recover = .false.\n start_irr = 0\n last_irr = 0\n ldisp = .true.\n fildyn = 'dyn0'\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_init_qpoints.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-18,\n recover = .true.\n ldisp = .true.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n fildyn = 'dyn'\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_grid_restart.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-18\n ldisp = .true.\n {% raw -%}\n start_q = {{MAP_DATA.qpoint}}\n last_q = {{MAP_DATA.qpoint}}\n start_irr = {{MAP_DATA.irr}}\n last_irr= {{MAP_DATA.irr}}\n {%- endraw %}\n recover = .true.\n fildyn = 'dyn'\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_SCRATCH_DIR }}/outdir'{% endraw %}\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_single_irr_qpt.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPP\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n filplot = 'pp.dat'\n plot_num = 0\n/\n&PLOT\n iflag = 3\n output_format = 5\n fileout ='density.xsf'\n/\n\n","name":"pp_density.in","contextProviders":[],"applicationName":"espresso","executableName":"pp.x"},{"content":"&INPUTPP\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n filplot = 'pp.dat'\n plot_num = 11\n/\n","name":"pp_electrostatic_potential.in","contextProviders":[],"applicationName":"espresso","executableName":"pp.x"},{"content":"&PROJWFC\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n degauss = 0.01\n deltaE = 0.05\n/\n","name":"projwfc.in","contextProviders":[],"applicationName":"espresso","executableName":"projwfc.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_scf.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n input_dft = 'hse',\n {% for d in qgrid.dimensions -%}\n nqx{{loop.index}} = {{d}}\n {% endfor %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS crystal\n{{ '{{' }} {{ explicitKPath.length }} {% raw %} + KPOINTS|length }} {% endraw %}\n{%- raw %}\n{% for point in KPOINTS -%}\n {% for d in point.coordinates %}{{ \"%14.9f\"|format(d) }} {% endfor -%}{{ point.weight }}\n{% endfor %}\n{% endraw -%}\n{% for point in explicitKPath -%}\n{% for d in point.coordinates %}{{d}} {% endfor -%}0.0000001\n{% endfor %}\n","name":"pw_scf_bands_hse.in","contextProviders":[{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"QGridFormDataManager"},{"name":"ExplicitKPathFormDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n assume_isolated = 'esm'\n esm_bc = '{{ boundaryConditions.type }}'\n fcp_mu = {{ boundaryConditions.targetFermiEnergy }}\n esm_w = {{ boundaryConditions.offset }}\n esm_efield = {{ boundaryConditions.electricField }}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_esm.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"BoundaryConditionsFormDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'relax'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n assume_isolated = 'esm'\n esm_bc = '{{ boundaryConditions.type }}'\n fcp_mu = {{ boundaryConditions.targetFermiEnergy }}\n esm_w = {{ boundaryConditions.offset }}\n esm_efield = {{ boundaryConditions.electricField }}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_esm_relax.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"BoundaryConditionsFormDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% raw %}{{PARAMETER | default('1')}} {{PARAMETER | default('1')}} {{PARAMETER | default('1')}} 0 0 0{% endraw %}\n","name":"pw_scf_kpt_conv.in","contextProviders":[{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'nscf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{input.RESTART_MODE}}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n{%- if subworkflowContext.NO_SYMMETRY_NO_INVERSION %}\n nosym = .true.\n noinv = .true.\n{%- endif %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_nscf.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'relax'\n nstep = 50\n title = ''\n verbosity = 'low'\n restart_mode = 'from_scratch'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_relax.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'vc-relax'\n title = ''\n verbosity = 'low'\n restart_mode = 'from_scratch'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_vc_relax.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'bands'\n title = ''\n verbosity = 'low'\n restart_mode = '{{input.RESTART_MODE}}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS crystal_b\n{{kpath.length}}\n{% for point in kpath -%}\n{% for d in point.coordinates %}{{d}} {% endfor -%}{{point.steps}}\n{% endfor %}\n","name":"pw_bands.in","contextProviders":[{"name":"KPathFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n ecutfock = 100\n occupations = 'smearing'\n degauss = 0.005\n input_dft='hse',\n nqx1 = {% if kgrid.dimensions[0]%2 == 0 %}{{kgrid.dimensions[0]/2}}{% else %}{{(kgrid.dimensions[0]+1)/2}}{% endif %}, nqx2 = {% if kgrid.dimensions[1]%2 == 0 %}{{kgrid.dimensions[1]/2}}{% else %}{{(kgrid.dimensions[1]+1)/2}}{% endif %}, nqx3 = {% if kgrid.dimensions[2]%2 == 0 %}{{kgrid.dimensions[2]/2}}{% else %}{{(kgrid.dimensions[2]+1)/2}}{% endif %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{% if d%2 == 0 %}{{d}} {% else %}{{d+1}} {% endif %}{% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_scf_hse.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nHUBBARD {ortho-atomic}\n{% for row in hubbard_u -%}\nU {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.hubbardUValue }}\n{% endfor -%}\n","name":"pw_scf_dft_u.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardUContextManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nHUBBARD {ortho-atomic}\n{% for row in hubbard_u -%}\nU {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.hubbardUValue }}\n{% endfor -%}\n{% for row in hubbard_v -%}\nV {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.atomicSpecies2 }}-{{ row.atomicOrbital2 }} {{ row.siteIndex }} {{ row.siteIndex2 }} {{ row.hubbardVValue }}\n{% endfor -%}\n","name":"pw_scf_dft_v.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardUContextManager"},{"name":"HubbardVContextManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nHUBBARD {ortho-atomic}\n{% for row in hubbard_j -%}\n{{ row.paramType }} {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.value }}\n{% endfor -%}\n","name":"pw_scf_dft_j.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardJContextManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n lda_plus_u = .true.\n lda_plus_u_kind = 0\n U_projection_type = 'ortho-atomic'\n {%- for row in hubbard_legacy %}\n Hubbard_U({{ row.atomicSpeciesIndex }}) = {{ row.hubbardUValue }}\n {%- endfor %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_scf_dft_u_legacy.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardContextManagerLegacy"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&INPUT\n fildyn = 'dyn'\n zasr = 'simple'\n flfrc = 'force_constants.fc'\n/\n","name":"q2r.in","contextProviders":[],"applicationName":"espresso","executableName":"q2r.x"},{"content":"&inputhp\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n {%- for d in qgrid.dimensions %}\n nq{{ loop.index }} = {{ d }}\n {%- endfor %}\n/\n","name":"hp.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"hp.x"},{"content":"&inputpp\n calculation = \"eps\"\n prefix = \"__prefix__\"\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n/\n\n&energy_grid\n smeartype = \"gauss\"\n intersmear = 0.2\n intrasmear = 0.0\n wmin = 0.0\n wmax = 30.0\n nw = 500\n shift = 0.0\n/\n\n","name":"epsilon.in","contextProviders":[],"applicationName":"espresso","executableName":"epsilon.x"},{"content":"# ------------------------------------------------------------------------------- #\n# #\n# Example JupyterLab requirements #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. A Python virtual environment is created #\n# - in /scratch/$USER/$JOB_ID (for build: 'Default') #\n# - in /export/share/python/ (for build: 'with-pre-installed-packages') #\n# 3. This list is used to populate a Python virtual environment #\n# 4. JupyterLab is started #\n# #\n# For more information visit: #\n# - https://jupyterlab.readthedocs.io/en/stable/index.html #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# Note: With the JupyterLab build 'with-pre-installed-packages', packages #\n# cannot be added during the notebook runtime. #\n# #\n# ------------------------------------------------------------------------------- #\n\njupyterlab==3.0.3\nnotebook>=6.2.0\nexabyte-api-client>=2020.10.19\nnumpy>=1.17.3\npandas>=1.1.4\nurllib3<2\n","name":"requirements.txt","contextProviders":[],"applicationName":"jupyterLab","executableName":"jupyter"},{"content":" start nwchem\n title \"Test\"\n charge {{ input.CHARGE }}\n geometry units au noautosym\n {{ input.ATOMIC_POSITIONS }}\n end\n basis\n * library {{ input.BASIS }}\n end\n dft\n xc {{ input.FUNCTIONAL }}\n mult {{ input.MULT }}\n end\n task dft energy\n","name":"nwchem_total_energy.inp","contextProviders":[{"name":"NWChemInputDataManager"}],"applicationName":"nwchem","executableName":"nwchem"},{"content":"# ---------------------------------------------------------------- #\n# #\n# Example python script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. runtime directory for this calculation is created #\n# 2. requirements.txt is used to create a virtual environment #\n# 3. virtual environment is activated #\n# 4. python process running this script is started #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\nimport pymatgen as mg\n\nsi = mg.Element(\"Si\")\n\nprint(si.atomic_mass)\n","name":"hello_world.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2\n","name":"requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------------ #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# Please add any packages required for this unit below following #\n# the requirements.txt specification: #\n# https://pip.pypa.io/en/stable/reference/requirements-file-format/ #\n# ------------------------------------------------------------------ #\n","name":"requirements_empty.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# -------------------------------------------------------------------------------\n# This script contains a few helpful commands for basic plotting with matplotlib.\n# The commented out blocks are optional suggestions and included for convenience.\n# -------------------------------------------------------------------------------\nimport matplotlib.pyplot as plt\nimport matplotlib.ticker as ticker\nimport numpy as np\n\n\n# Plot Settings\n# -------------\nfigure_size = (6.4, 4.8) # width, height [inches]\ndpi = 100 # resolution [dots-per-inch]\nfont_size_title = 16 # font size of title\nfont_size_axis = 12 # font size of axis label\nfont_size_tick = 12 # font size of tick label\nfont_size_legend = 14 # font size of legend\nx_axis_label = None # label for x-axis\ny_axis_label = None # label for y-axis\ntitle = None # figure title\nshow_legend = False # whether to show legend\nsave_name = \"plot.pdf\" # output filename (with suffix), e.g. 'plot.pdf'\nx_view_limits = {\"left\": None, \"right\": None} # view limits for x-axis\ny_view_limits = {\"top\": None, \"bottom\": None} # view limits for y-axis\nx_tick_spacing = None # custom tick spacing for x-axis (optional)\ny_tick_spacing = None # custom tick spacing for y-axis (optional)\nx_tick_labels = None # custom tick labels for x-axis (optional)\ny_tick_labels = None # custom tick labels for y-axis (optional)\n\n\n# Figure & axes objects\n# ---------------------\nfig = plt.figure(figsize=figure_size, dpi=dpi)\nax = fig.add_subplot(111)\n\n# Example plot (REPLACE ACCORDINGLY)\n# ------------\nx = np.linspace(0, 7, num=100)\ny = np.sin(x)\nax.plot(x, y, \"g-\", zorder=3)\n\n\n# Help lines\n# ----------\n# ax.axhline(y=0, color=\"0.25\", linewidth=0.6, zorder=1)\n# ax.axvline(x=0, color=\"0.25\", linewidth=0.6, zorder=1)\n\n\n# View limits\n# -----------\nax.set_xlim(**x_view_limits)\nax.set_ylim(**y_view_limits)\n\n\n# Grid lines\n# ----------\n# grid_style = {\n# \"linestyle\" : \"dotted\",\n# \"linewidth\" : 0.6,\n# \"color\" : \"0.25\",\n# }\n# ax.grid(**grid_style)\n\n# Custom tick spacing\n# -------------------\n# ax.xaxis.set_major_locator(ticker.MultipleLocator(x_tick_spacing))\n# ax.yaxis.set_major_locator(ticker.MultipleLocator(y_tick_spacing))\n\n# Custom tick labels\n# ------------------\nif x_tick_labels is not None:\n ax.set_xticklabels(x_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\nif y_tick_labels is not None:\n ax.set_yticklabels(y_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\n\n# Other tick settings\n# -------------------\n# ax.tick_params(axis=\"both\", which=\"major\", labelsize=font_size_tick, direction=\"in\")\n# ax.tick_params(axis=\"x\", which=\"major\", pad=10)\n# ax.tick_params(axis=\"x\", which=\"minor\", bottom=False, top=False)\n\n\n# Axis labels\n# -----------\nif x_axis_label is not None:\n ax.set_xlabel(x_axis_label, size=font_size_axis)\nif y_axis_label is not None:\n ax.set_ylabel(y_axis_label, size=font_size_axis)\n\n# Figure title\n# ------------\nif title is not None:\n ax.set_title(title, fontsize=font_size_title)\n\n# Legend\n# ------\nif show_legend:\n ax.legend(prop={'size': font_size_legend})\n\n# Save figure\n# -----------\nif save_name is not None:\n save_format = save_name.split(\".\")[-1]\n fig.savefig(save_name, format=save_format, bbox_inches=\"tight\")\n","name":"matplotlib_basic.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ---------------------------------------------------------- #\n# #\n# This script extracts q-points and irreducible #\n# representations from Quantum ESPRESSO xml data. #\n# #\n# Expects control_ph.xml and patterns.?.xml files to exist #\n# #\n# ---------------------------------------------------------- #\nfrom __future__ import print_function\nimport json\nfrom xml.dom import minidom\n\n{# JOB_WORK_DIR will be initialized at runtime => avoid substituion below #}\n{%- raw -%}\nCONTROL_PH_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/control_ph.xml\"\nPATTERNS_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/patterns.{}.xml\"\n{%- endraw -%}\n\n# get integer content of an xml tag in a document\ndef get_int_by_tag_name(doc, tag_name):\n element = doc.getElementsByTagName(tag_name)\n return int(element[0].firstChild.nodeValue)\n\nvalues = []\n\n# get number of q-points and cycle through them\nxmldoc = minidom.parse(CONTROL_PH_FILENAME)\nnumber_of_qpoints = get_int_by_tag_name(xmldoc, \"NUMBER_OF_Q_POINTS\")\n\nfor i in range(number_of_qpoints):\n # get number of irreducible representations per qpoint\n xmldoc = minidom.parse(PATTERNS_FILENAME.format(i+1))\n number_of_irr_per_qpoint = get_int_by_tag_name(xmldoc, \"NUMBER_IRR_REP\")\n # add each distinct combination of qpoint and irr as a separate entry\n for j in range(number_of_irr_per_qpoint):\n values.append({\n \"qpoint\": i + 1,\n \"irr\": j + 1\n })\n\n# store final values in standard output (STDOUT)\nprint(json.dumps(values, indent=4))\n","name":"espresso_xml_get_qpt_irr.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"import re\nimport json\n\ndouble_regex = r'[-+]?\\d*\\.\\d+(?:[eE][-+]?\\d+)?'\nregex = r\"\\s+k\\(\\s+\\d*\\)\\s+=\\s+\\(\\s+({0})\\s+({0})\\s+({0})\\),\\s+wk\\s+=\\s+({0}).+?\\n\".format(double_regex)\n\nwith open(\"pw_scf.out\") as f:\n text = f.read()\n\npattern = re.compile(regex, re.I | re.MULTILINE)\nmatch = pattern.findall(text[text.rfind(\" cryst. coord.\"):])\nkpoints = [{\"coordinates\": list(map(float, m[:3])), \"weight\": float(m[3])} for m in match]\nprint(json.dumps({\"name\": \"KPOINTS\", \"value\": kpoints, \"scope\": \"global\"}, indent=4))\n","name":"espresso_extract_kpoints.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------- #\n# This script aims to determine extrema for a given array. #\n# Please adjust the parameters according to your data. #\n# Note: This template expects the array to be defined in the #\n# context as 'array_from_context' (see details below). #\n# ----------------------------------------------------------- #\nimport numpy as np\nfrom scipy.signal import find_peaks\nimport json\nfrom munch import Munch\n\n# Data From Context\n# -----------------\n# The array 'array_from_context' is a 1D list (float or int) that has to be defined in\n# a preceding assignment unit in order to be extracted from the context.\n# Example: [0.0, 1.0, 4.0, 3.0]\n# Upon rendering the following Jinja template the extracted array will be inserted.\n{% raw %}Y = np.array({{array_from_context}}){% endraw %}\n\n# Settings\n# --------\nprominence = 0.3 # required prominence in the unit of the data array\n\n# Find Extrema\n# ------------\nmax_indices, _ = find_peaks(Y, prominence=prominence)\nmin_indices, _ = find_peaks(-1 * Y, prominence=prominence)\n\nresult = {\n \"maxima\": Y[max_indices].tolist(),\n \"minima\": Y[min_indices].tolist(),\n}\n\n# print final values to standard output (STDOUT),\n# so that they can be read by a subsequent assignment unit (using value=STDOUT)\nprint(json.dumps(result, indent=4))\n","name":"find_extrema.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# ----------------------------------------------------------------- #\n\n\nmunch==2.5.0\nnumpy>=1.19.5\nscipy>=1.5.4\nmatplotlib>=3.0.0\n","name":"processing_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# PythonML Package Requirements for use on the Exabyte.io Platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscikit-learn==0.24.1\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2;python_version>=\"3.6\"\n","name":"pyml_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# General settings for PythonML jobs on the Exabyte.io Platform #\n# #\n# This file generally shouldn't be modified directly by users. #\n# The \"datafile\" and \"is_workflow_running_to_predict\" variables #\n# are defined in the head subworkflow, and are templated into #\n# this file. This helps facilitate the workflow's behavior #\n# differing whether it is in a \"train\" or \"predict\" mode. #\n# #\n# Also in this file is the \"Context\" object, which helps maintain #\n# certain Python objects between workflow units, and between #\n# predict runs. #\n# #\n# Whenever a python object needs to be stored for subsequent runs #\n# (such as in the case of a trained model), context.save() can be #\n# called to save it. The object can then be loaded again by using #\n# context.load(). #\n# ----------------------------------------------------------------- #\n\n\nimport pickle, os\n\n# ==================================================\n# Variables modified in the Important Settings menu\n# ==================================================\n# Variables in this section can (and oftentimes need to) be modified by the user in the \"Important Settings\" tab\n# of a workflow.\n\n# Target_column_name is used during training to identify the variable the model is traing to predict.\n# For example, consider a CSV containing three columns, \"Y\", \"X1\", and \"X2\". If the goal is to train a model\n# that will predict the value of \"Y,\" then target_column_name would be set to \"Y\"\ntarget_column_name = \"{{ mlSettings.target_column_name }}\"\n\n# The type of ML problem being performed. Can be either \"regression\", \"classification,\" or \"clustering.\"\nproblem_category = \"{{ mlSettings.problem_category }}\"\n\n# =============================\n# Non user-modifiable variables\n# =============================\n# Variables in this section generally do not need to be modified.\n\n# The problem category, regression or classification or clustering. In regression, the target (predicted) variable\n# is continues. In classification, it is categorical. In clustering, there is no target - a set of labels is\n# automatically generated.\nis_regression = is_classification = is_clustering = False\nif problem_category.lower() == \"regression\":\n is_regression = True\nelif problem_category.lower() == \"classification\":\n is_classification = True\nelif problem_category.lower() == \"clustering\":\n is_clustering = True\nelse:\n raise ValueError(\n \"Variable 'problem_category' must be either 'regression', 'classification', or 'clustering'. Check settings.py\")\n\n# The variables \"is_workflow_running_to_predict\" and \"is_workflow_running_to_train\" are used to control whether\n# the workflow is in a \"training\" mode or a \"prediction\" mode. The \"IS_WORKFLOW_RUNNING_TO_PREDICT\" variable is set by\n# an assignment unit in the \"Set Up the Job\" subworkflow that executes at the start of the job. It is automatically\n# changed when the predict workflow is generated, so users should not need to modify this variable.\nis_workflow_running_to_predict = {% raw %}{{IS_WORKFLOW_RUNNING_TO_PREDICT}}{% endraw %}\nis_workflow_running_to_train = not is_workflow_running_to_predict\n\n# Sets the datafile variable. The \"datafile\" is the data that will be read in, and will be used by subsequent\n# workflow units for either training or prediction, depending on the workflow mode.\nif is_workflow_running_to_predict:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\nelse:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\n\n# The \"Context\" class allows for data to be saved and loaded between units, and between train and predict runs.\n# Variables which have been saved using the \"Save\" method are written to disk, and the predict workflow is automatically\n# configured to obtain these files when it starts.\n#\n# IMPORTANT NOTE: Do *not* adjust the value of \"context_dir_pathname\" in the Context object. If the value is changed, then\n# files will not be correctly copied into the generated predict workflow. This will cause the predict workflow to be\n# generated in a broken state, and it will not be able to make any predictions.\nclass Context(object):\n \"\"\"\n Saves and loads objects from the disk, useful for preserving data between workflow units\n\n Attributes:\n context_paths (dict): Dictionary of the format {variable_name: path}, that governs where\n pickle saves files.\n\n Methods:\n save: Used to save objects to the context directory\n load: Used to load objects from the context directory\n \"\"\"\n\n def __init__(self, context_file_basename=\"workflow_context_file_mapping\"):\n \"\"\"\n Constructor for Context objects\n\n Args:\n context_file_basename (str): Name of the file to store context paths in\n \"\"\"\n\n # Warning: DO NOT modify the context_dir_pathname variable below\n # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n context_dir_pathname = \"{% raw %}{{ CONTEXT_DIR_RELATIVE_PATH }}{% endraw %}\"\n # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n self._context_dir_pathname = context_dir_pathname\n self._context_file = os.path.join(context_dir_pathname, context_file_basename)\n\n # Make context dir if it does not exist\n if not os.path.exists(context_dir_pathname):\n os.makedirs(context_dir_pathname)\n\n # Read in the context sources dictionary, if it exists\n if os.path.exists(self._context_file):\n with open(self._context_file, \"rb\") as file_handle:\n self.context_paths: dict = pickle.load(file_handle)\n else:\n # Items is a dictionary of {varname: path}\n self.context_paths = {}\n\n def __enter__(self):\n return self\n\n def __exit__(self, exc_type, exc_value, traceback):\n self._update_context()\n\n def __contains__(self, item):\n return item in self.context_paths\n\n def _update_context(self):\n with open(self._context_file, \"wb\") as file_handle:\n pickle.dump(self.context_paths, file_handle)\n\n def load(self, name: str):\n \"\"\"\n Returns a contextd object\n\n Args:\n name (str): The name in self.context_paths of the object\n \"\"\"\n path = self.context_paths[name]\n with open(path, \"rb\") as file_handle:\n obj = pickle.load(file_handle)\n return obj\n\n def save(self, obj: object, name: str):\n \"\"\"\n Saves an object to disk using pickle\n\n Args:\n name (str): Friendly name for the object, used for lookup in load() method\n obj (object): Object to store on disk\n \"\"\"\n path = os.path.join(self._context_dir_pathname, f\"{name}.pkl\")\n self.context_paths[name] = path\n with open(path, \"wb\") as file_handle:\n pickle.dump(obj, file_handle)\n self._update_context()\n\n# Generate a context object, so that the \"with settings.context\" can be used by other units in this workflow.\ncontext = Context()\n\nis_using_train_test_split = \"is_using_train_test_split\" in context and (context.load(\"is_using_train_test_split\"))\n\n# Create a Class for a DummyScaler()\nclass DummyScaler:\n \"\"\"\n This class is a 'DummyScaler' which trivially acts on data by returning it unchanged.\n \"\"\"\n\n def fit(self, X):\n return self\n\n def transform(self, X):\n return X\n\n def fit_transform(self, X):\n return X\n\n def inverse_transform(self, X):\n return X\n\nif 'target_scaler' not in context:\n context.save(DummyScaler(), 'target_scaler')\n","name":"pyml_settings.py","contextProviders":[{"name":"MLSettingsDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Custom workflow unit template for the Exabyte.io platform #\n# #\n# This file imports a set of workflow-specific context variables #\n# from settings.py. It then uses a context manager to save and #\n# load Python objects. When saved, these objects can then be #\n# loaded either later in the same workflow, or by subsequent #\n# predict jobs. #\n# #\n# Any pickle-able Python object can be saved using #\n# settings.context. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport settings\n\n# The context manager exists to facilitate\n# saving and loading objects across Python units within a workflow.\n\n# To load an object, simply do to \\`context.load(\"name-of-the-saved-object\")\\`\n# To save an object, simply do \\`context.save(\"name-for-the-object\", object_here)\\`\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Do some transformations to the data here\n\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n descriptors = context.load(\"descriptors\")\n\n # Do some predictions or transformation to the data here\n","name":"pyml_custom.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to read in data for the ML workflow. #\n# #\n# Also showcased here is the concept of branching based on #\n# whether the workflow is in \"train\" or \"predict\" mode. #\n# #\n# If the workflow is in \"training\" mode, it will read in the data #\n# before converting it to a Numpy array and save it for use #\n# later. During training, we already have values for the output, #\n# and this gets saved to \"target.\" #\n# #\n# Finally, whether the workflow is in training or predict mode, #\n# it will always read in a set of descriptors from a datafile #\n# defined in settings.py #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport sklearn.preprocessing\nimport settings\n\nwith settings.context as context:\n data = pandas.read_csv(settings.datafile)\n\n # Train\n # By default, we don't do train/test splitting: the train and test represent the same dataset at first.\n # Other units (such as a train/test splitter) down the line can adjust this as-needed.\n if settings.is_workflow_running_to_train:\n\n # Handle the case where we are clustering\n if settings.is_clustering:\n target = data.to_numpy()[:, 0] # Just get the first column, it's not going to get used anyway\n else:\n target = data.pop(settings.target_column_name).to_numpy()\n\n # Handle the case where we are classifying. In this case, we must convert any labels provided to be categorical.\n # Specifically, labels are encoded with values between 0 and (N_Classes - 1)\n if settings.is_classification:\n label_encoder = sklearn.preprocessing.LabelEncoder()\n target = label_encoder.fit_transform(target)\n context.save(label_encoder, \"label_encoder\")\n\n target = target.reshape(-1, 1) # Reshape array from a row vector into a column vector\n\n context.save(target, \"train_target\")\n context.save(target, \"test_target\")\n\n descriptors = data.to_numpy()\n\n context.save(descriptors, \"train_descriptors\")\n context.save(descriptors, \"test_descriptors\")\n\n else:\n descriptors = data.to_numpy()\n context.save(descriptors, \"descriptors\")\n","name":"data_input_read_csv_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to perform a train/test split #\n# #\n# Splits the dataset into a training and testing set. The #\n# variable `percent_held_as_test` controls how much of the #\n# input dataset is removed for use as a testing set. By default, #\n# this unit puts 20% of the dataset into the testing set, and #\n# places the remaining 80% into the training set. #\n# #\n# Does nothing in the case of predictions. #\n# #\n# ----------------------------------------------------------------- #\n\nimport sklearn.model_selection\nimport numpy as np\nimport settings\n\n# `percent_held_as_test` is the amount of the dataset held out as the testing set. If it is set to 0.2,\n# then 20% of the dataset is held out as a testing set. The remaining 80% is the training set.\npercent_held_as_test = {{ mlTrainTestSplit.fraction_held_as_test_set }}\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Load training data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n\n # Combine datasets to facilitate train/test split\n\n # Do train/test split\n train_descriptors, test_descriptors, train_target, test_target = sklearn.model_selection.train_test_split(\n train_descriptors, train_target, test_size=percent_held_as_test)\n\n # Set the flag for using a train/test split\n context.save(True, \"is_using_train_test_split\")\n\n # Save training data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"data_input_train_test_split_sklearn.py","contextProviders":[{"name":"MLTrainTestSplitDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn MinMax Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it is on interval #\n# [0,1]. It then saves the data for use further down #\n# the road in the workflow, for use in un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the min and max of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor MinMax Scaler\n scaler = sklearn.preprocessing.MinMaxScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_min_max_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Duplicates workflow unit #\n# #\n# This workflow unit drops all duplicate rows, if it is running #\n# in the \"train\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop duplicates from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n df = df.drop_duplicates()\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop duplicates from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.drop_duplicates()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_duplicates_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Missing Workflow Unit #\n# #\n# This workflow unit allows missing rows and/or columns to be #\n# dropped from the dataset by configuring the `to_drop` #\n# parameter. #\n# #\n# Valid values for `to_drop`: #\n# - \"rows\": rows with missing values will be removed #\n# - \"columns\": columns with missing values will be removed #\n# - \"both\": rows and columns with missing values will be removed #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\n# `to_drop` can either be \"rows\" or \"columns\"\n# If it is set to \"rows\" (by default), then all rows with missing values will be dropped.\n# If it is set to \"columns\", then all columns with missing values will be dropped.\n# If it is set to \"both\", then all rows and columns with missing values will be dropped.\nto_drop = \"rows\"\n\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop missing from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n\n directions = {\n \"rows\": (\"index\",),\n \"columns\": (\"columns\",),\n \"both\": (\"index\", \"columns\"),\n }[to_drop]\n for direction in directions:\n df = df.dropna(direction)\n\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop missing from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.dropna()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_missing_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn Standard Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it a mean of 0 and #\n# a standard deviation of 1. It then saves the data for use #\n# further down the road in the workflow, for use in #\n# un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the mean and variance of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor Scaler\n scaler = sklearn.preprocessing.StandardScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_standardization_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge-regression model in Scikit-Learn. #\n# Alpha is taken from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.AdaBoostRegressor(\n n_estimators=50,\n learning_rate=1,\n loss=\"linear\",\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"adaboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"adaboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_adaboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a bagged trees regression model with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.BaggingRegressor(\n n_estimators=10,\n max_samples=1.0,\n max_features=1.0,\n bootstrap=True,\n bootstrap_features=False,\n oob_score=False,\n verbose=0,\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"bagged_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"bagged_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_bagged_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for gradient-boosted tree regression with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. Note: In the gradient- #\n# boosted trees ensemble used, the weak learners used as #\n# estimators cannot be tuned with the same level of fidelity #\n# allowed in the adaptive-boosted trees ensemble. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.GradientBoostingRegressor(\n loss=\"ls\",\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion=\"friedman_mse\",\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n max_features=None,\n alpha=0.9,\n verbose=0,\n max_leaf_nodes=None,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees regression #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBRegressor(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='reg:squarederror',\n eval_metric='rmse')\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_regression\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_regression\")\n\n # Make some predictions and unscale\n predictions = model.predict(descriptors)\n predictions = predictions.reshape(-1, 1)\n target_scaler = context.load(\"target_scaler\")\n\n predictions = target_scaler.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\")\n","name":"model_extreme_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for k-means clustering. #\n# #\n# In k-means clustering, the labels are not provided ahead of #\n# time. Instead, one supplies the number of groups the #\n# algorithm should split the dataset into. Here, we set our #\n# own default of 4 groups (fewer than sklearn's default of 8). #\n# Otherwise, the default parameters of the clustering method #\n# are the same as in sklearn. #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.cluster\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Initialize the Model\n model = sklearn.cluster.KMeans(\n n_clusters=4,\n init=\"k-means++\",\n n_init=10,\n max_iter=300,\n tol=0.0001,\n copy_x=True,\n algorithm=\"auto\",\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors)\n context.save(model, \"k_means\")\n train_labels = model.predict(train_descriptors)\n test_labels = model.predict(test_descriptors)\n\n context.save(train_labels, \"train_labels\")\n context.save(test_labels, \"test_labels\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"k_means\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_k_means_clustering_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a kernelized ridge-regression model with #\n# Scikit-Learn. Model parameters are derived from Scikit- #\n# Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.kernel_ridge\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.kernel_ridge.KernelRidge(\n alpha=1.0,\n kernel=\"linear\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"kernel_ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"kernel_ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_kernel_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a LASSO-regression model with Scikit- #\n# Learn. Model parameters derived from Scikit-Learn's #\n# Defaults. Alpha has been lowered from the default of 1.0, to #\n# 0.1. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Lasso(\n alpha=0.1,\n fit_intercept=True,\n normalize=False,\n precompute=False,\n tol=0.0001,\n positive=True,\n selection=\"cyclic\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"LASSO\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"LASSO\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_lasso_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit to train a simple feedforward neural network #\n# model on a regression problem using scikit-learn. In this #\n# template, we use the default values for hidden_layer_sizes, #\n# activation, solver, and learning rate. Other parameters are #\n# available (consult the sklearn docs), but in this case, we #\n# only include those relevant to the Adam optimizer. Sklearn #\n# Docs: Sklearn docs:http://scikit-learn.org/stable/modules/ge #\n# nerated/sklearn.neural_network.MLPRegressor.html #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.neural_network\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.neural_network.MLPRegressor(\n hidden_layer_sizes=(100,),\n activation=\"relu\",\n solver=\"adam\",\n max_iter=300,\n early_stopping=False,\n validation_fraction=0.1,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"multilayer_perceptron\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"multilayer_perceptron\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_mlp_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a random forest classification model with #\n# Scikit-Learn. Parameters derived from Scikit-Learn's #\n# defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. When #\n# the workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a filee named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestClassifier(\n n_estimators=100,\n criterion=\"gini\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n oob_score=False,\n verbose=0,\n class_weight=None,\n ccp_alpha=0.0,\n max_samples=None,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target, test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for a gradient boosted classification model with #\n# Scikit-Learn. Parameters derived from sklearn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = sklearn.ensemble.GradientBoostingClassifier(loss='deviance',\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion='friedman_mse',\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n min_impurity_split=None,\n init=None,\n random_state=None,\n max_features=None,\n verbose=0,\n max_leaf_nodes=None,\n warm_start=False,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees classification #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBClassifier(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='binary:logistic',\n eval_metric='logloss',\n use_label_encoder=False)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_extreme_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow for a random forest regression model with Scikit- #\n# Learn. Parameters are derived from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestRegressor(\n n_estimators=100,\n criterion=\"mse\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n max_samples=None,\n oob_score=False,\n ccp_alpha=0.0,\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge regression model with Scikit- #\n# Learn. Alpha is taken from Scikit-Learn's default #\n# parameters. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Ridge(\n alpha=1.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Parity plot generation unit #\n# #\n# This unit generates a parity plot based on the known values #\n# in the training data, and the predicted values generated #\n# using the training data. #\n# #\n# Because this metric compares predictions versus a ground truth, #\n# it doesn't make sense to generate the plot when a predict #\n# workflow is being run (because in that case, we generally don't #\n# know the ground truth for the values being predicted). Hence, #\n# this unit does nothing if the workflow is in \"predict\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_predictions = context.load(\"train_predictions\")\n test_target = context.load(\"test_target\")\n test_predictions = context.load(\"test_predictions\")\n\n # Un-transform the data\n target_scaler = context.load(\"target_scaler\")\n train_target = target_scaler.inverse_transform(train_target)\n train_predictions = target_scaler.inverse_transform(train_predictions)\n test_target = target_scaler.inverse_transform(test_target)\n test_predictions = target_scaler.inverse_transform(test_predictions)\n\n # Plot the data\n plt.scatter(train_target, train_predictions, c=\"#203d78\", label=\"Training Set\")\n if settings.is_using_train_test_split:\n plt.scatter(test_target, test_predictions, c=\"#67ac5b\", label=\"Testing Set\")\n plt.xlabel(\"Actual Value\")\n plt.ylabel(\"Predicted Value\")\n\n # Scale the plot\n target_range = (min(min(train_target), min(test_target)),\n max(max(train_target), max(test_target)))\n predictions_range = (min(min(train_predictions), min(test_predictions)),\n max(max(train_predictions), max(test_predictions)))\n\n limits = (min(min(target_range), min(target_range)),\n max(max(predictions_range), max(predictions_range)))\n plt.xlim = (limits[0], limits[1])\n plt.ylim = (limits[0], limits[1])\n\n # Draw a parity line, as a guide to the eye\n plt.plot((limits[0], limits[1]), (limits[0], limits[1]), c=\"black\", linestyle=\"dotted\", label=\"Parity\")\n plt.legend()\n\n # Save the figure\n plt.tight_layout()\n plt.savefig(\"my_parity_plot.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_parity_plot_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Cluster Visualization #\n# #\n# This unit takes an N-dimensional feature space, and uses #\n# Principal-component Analysis (PCA) to project into a 2D space #\n# to facilitate plotting on a scatter plot. #\n# #\n# The 2D space we project into are the first two principal #\n# components identified in PCA, which are the two vectors with #\n# the highest variance. #\n# #\n# Wikipedia Article on PCA: #\n# https://en.wikipedia.org/wiki/Principal_component_analysis #\n# #\n# We then plot the labels assigned to the train an test set, #\n# and color by class. #\n# #\n# ----------------------------------------------------------------- #\n\nimport pandas as pd\nimport matplotlib.cm\nimport matplotlib.lines\nimport matplotlib.pyplot as plt\nimport sklearn.decomposition\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_labels = context.load(\"train_labels\")\n train_descriptors = context.load(\"train_descriptors\")\n test_labels = context.load(\"test_labels\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Unscale the descriptors\n descriptor_scaler = context.load(\"descriptor_scaler\")\n train_descriptors = descriptor_scaler.inverse_transform(train_descriptors)\n test_descriptors = descriptor_scaler.inverse_transform(test_descriptors)\n\n # We need at least 2 dimensions, exit if the dataset is 1D\n if train_descriptors.ndim < 2:\n raise ValueError(\"The train descriptors do not have enough dimensions to be plot in 2D\")\n\n # The data could be multidimensional. Let's do some PCA to get things into 2 dimensions.\n pca = sklearn.decomposition.PCA(n_components=2)\n train_descriptors = pca.fit_transform(train_descriptors)\n test_descriptors = pca.transform(test_descriptors)\n xlabel = \"Principle Component 1\"\n ylabel = \"Principle Component 2\"\n\n # Determine the labels we're going to be using, and generate their colors\n labels = set(train_labels)\n colors = {}\n for count, label in enumerate(labels):\n cm = matplotlib.cm.get_cmap('jet', len(labels))\n color = cm(count / len(labels))\n colors[label] = color\n train_colors = [colors[label] for label in train_labels]\n test_colors = [colors[label] for label in test_labels]\n\n # Train / Test Split Visualization\n plt.title(\"Train Test Split Visualization\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=\"#33548c\", marker=\"o\", label=\"Training Set\")\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=\"#F0B332\", marker=\"o\", label=\"Testing Set\")\n xmin, xmax, ymin, ymax = plt.axis()\n plt.legend()\n plt.tight_layout()\n plt.savefig(\"train_test_split.png\", dpi=600)\n plt.close()\n\n def clusters_legend(cluster_colors):\n \"\"\"\n Helper function that creates a legend, given the coloration by clusters.\n Args:\n cluster_colors: A dictionary of the form {cluster_number : color_value}\n\n Returns:\n None; just creates the legend and puts it on the plot\n \"\"\"\n legend_symbols = []\n for group, color in cluster_colors.items():\n label = f\"Cluster {group}\"\n legend_symbols.append(matplotlib.lines.Line2D([], [], color=color, marker=\"o\",\n linewidth=0, label=label))\n plt.legend(handles=legend_symbols)\n\n # Training Set Clusters\n plt.title(\"Training Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=train_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"train_clusters.png\", dpi=600)\n plt.close()\n\n # Testing Set Clusters\n plt.title(\"Testing Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=test_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"test_clusters.png\", dpi=600)\n plt.close()\n\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_pca_2d_clusters_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# ROC Curve Generator #\n# #\n# Computes and displays the Receiver Operating Characteristic #\n# (ROC) curve. This is restricted to binary classification tasks. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\nimport matplotlib.collections\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n test_target = context.load(\"test_target\").flatten()\n # Slice the first column because Sklearn's ROC curve prefers probabilities for the positive class\n test_probabilities = context.load(\"test_probabilities\")[:, 1]\n\n # Exit if there's more than one label in the predictions\n if len(set(test_target)) > 2:\n exit()\n\n # ROC curve function in sklearn prefers the positive class\n false_positive_rate, true_positive_rate, thresholds = sklearn.metrics.roc_curve(test_target, test_probabilities,\n pos_label=1)\n thresholds[0] -= 1 # Sklearn arbitrarily adds 1 to the first threshold\n roc_auc = np.round(sklearn.metrics.auc(false_positive_rate, true_positive_rate), 3)\n\n # Plot the curve\n fig, ax = plt.subplots()\n points = np.array([false_positive_rate, true_positive_rate]).T.reshape(-1, 1, 2)\n segments = np.concatenate([points[:-1], points[1:]], axis=1)\n norm = plt.Normalize(thresholds.min(), thresholds.max())\n lc = matplotlib.collections.LineCollection(segments, cmap='jet', norm=norm, linewidths=2)\n lc.set_array(thresholds)\n line = ax.add_collection(lc)\n fig.colorbar(line, ax=ax).set_label('Threshold')\n\n # Padding to ensure we see the line\n ax.margins(0.01)\n\n plt.title(f\"ROC curve, AUC={roc_auc}\")\n plt.xlabel(\"False Positive Rate\")\n plt.ylabel(\"True Positive Rate\")\n plt.tight_layout()\n plt.savefig(\"my_roc_curve.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_roc_curve_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"#!/bin/bash\n# ---------------------------------------------------------------- #\n# #\n# Example shell script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. shebang line is read from the first line above #\n# 2. based on shebang one of the shell types is selected: #\n# - /bin/bash #\n# - /bin/csh #\n# - /bin/tclsh #\n# - /bin/tcsh #\n# - /bin/zsh #\n# 3. runtime directory for this calculation is created #\n# 4. the content of the script is executed #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\necho \"Hello world!\"\n","name":"hello_world.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ---------------------------------------------------------------- #\n# #\n# Example job submission script for Exabyte.io platform #\n# #\n# Shows resource manager directives for: #\n# #\n# 1. the name of the job (-N) #\n# 2. the number of nodes to be used (-l nodes=) #\n# 3. the number of processors per node (-l ppn=) #\n# 4. the walltime in dd:hh:mm:ss format (-l walltime=) #\n# 5. queue (-q) D, OR, OF, SR, SF #\n# 6. merging standard output and error (-j oe) #\n# 7. email about job abort, begin, end (-m abe) #\n# 8. email address to use (-M) #\n# #\n# For more information visit https://docs.exabyte.io/cli/jobs #\n# ---------------------------------------------------------------- #\n\n#PBS -N ESPRESSO-TEST\n#PBS -j oe\n#PBS -l nodes=1\n#PBS -l ppn=1\n#PBS -l walltime=00:00:10:00\n#PBS -q D\n#PBS -m abe\n#PBS -M info@exabyte.io\n\n# load module\nmodule add espresso/540-i-174-impi-044\n\n# go to the job working directory\ncd $PBS_O_WORKDIR\n\n# create input file\ncat > pw.in < pw.out\n","name":"job_espresso_pw_scf.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\nmkdir -p {{ JOB_SCRATCH_DIR }}/outdir/_ph0\ncd {{ JOB_SCRATCH_DIR }}/outdir\ncp -r {{ JOB_WORK_DIR }}/../outdir/__prefix__.* .\n{%- endraw -%}\n","name":"espresso_link_outdir_save.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\ncp {{ JOB_SCRATCH_DIR }}/outdir/_ph0/__prefix__.phsave/dynmat* {{ JOB_WORK_DIR }}/../outdir/_ph0/__prefix__.phsave\n{%- endraw -%}\n","name":"espresso_collect_dynmat.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ------------------------------------------------------------------ #\n# This script prepares necessary directories to run VASP NEB\n# calculation. It puts initial POSCAR into directory 00, final into 0N\n# and intermediate images in 01 to 0(N-1). It is assumed that SCF\n# calculations for initial and final structures are already done in\n# previous subworkflows and their standard outputs are written into\n# \"vasp_neb_initial.out\" and \"vasp_neb_final.out\" files respectively.\n# These outputs are here copied into initial (00) and final (0N)\n# directories to calculate the reaction energy profile.\n# ------------------------------------------------------------------ #\n\n{% raw -%}\ncd {{ JOB_WORK_DIR }}\n{%- endraw %}\n\n# Prepare First Directory\nmkdir -p 00\ncat > 00/POSCAR < 0{{ input.INTERMEDIATE_IMAGES.length + 1 }}/POSCAR < 0{{ loop.index }}/POSCAR <=6.2.0\nexabyte-api-client>=2020.10.19\nnumpy>=1.17.3\npandas>=1.1.4\nurllib3<2\n","name":"requirements.txt","contextProviders":[],"applicationName":"jupyterLab","executableName":"jupyter"},{"content":" start nwchem\n title \"Test\"\n charge {{ input.CHARGE }}\n geometry units au noautosym\n {{ input.ATOMIC_POSITIONS }}\n end\n basis\n * library {{ input.BASIS }}\n end\n dft\n xc {{ input.FUNCTIONAL }}\n mult {{ input.MULT }}\n end\n task dft energy\n","name":"nwchem_total_energy.inp","contextProviders":[{"name":"NWChemInputDataManager"}],"applicationName":"nwchem","executableName":"nwchem"},{"content":"# ---------------------------------------------------------------- #\n# #\n# Example python script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. runtime directory for this calculation is created #\n# 2. requirements.txt is used to create a virtual environment #\n# 3. virtual environment is activated #\n# 4. python process running this script is started #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\nimport pymatgen as mg\n\nsi = mg.Element(\"Si\")\n\nprint(si.atomic_mass)\n","name":"hello_world.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2\n","name":"requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------------ #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# Please add any packages required for this unit below following #\n# the requirements.txt specification: #\n# https://pip.pypa.io/en/stable/reference/requirements-file-format/ #\n# ------------------------------------------------------------------ #\n","name":"requirements_empty.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# -------------------------------------------------------------------------------\n# This script contains a few helpful commands for basic plotting with matplotlib.\n# The commented out blocks are optional suggestions and included for convenience.\n# -------------------------------------------------------------------------------\nimport matplotlib.pyplot as plt\nimport matplotlib.ticker as ticker\nimport numpy as np\n\n\n# Plot Settings\n# -------------\nfigure_size = (6.4, 4.8) # width, height [inches]\ndpi = 100 # resolution [dots-per-inch]\nfont_size_title = 16 # font size of title\nfont_size_axis = 12 # font size of axis label\nfont_size_tick = 12 # font size of tick label\nfont_size_legend = 14 # font size of legend\nx_axis_label = None # label for x-axis\ny_axis_label = None # label for y-axis\ntitle = None # figure title\nshow_legend = False # whether to show legend\nsave_name = \"plot.pdf\" # output filename (with suffix), e.g. 'plot.pdf'\nx_view_limits = {\"left\": None, \"right\": None} # view limits for x-axis\ny_view_limits = {\"top\": None, \"bottom\": None} # view limits for y-axis\nx_tick_spacing = None # custom tick spacing for x-axis (optional)\ny_tick_spacing = None # custom tick spacing for y-axis (optional)\nx_tick_labels = None # custom tick labels for x-axis (optional)\ny_tick_labels = None # custom tick labels for y-axis (optional)\n\n\n# Figure & axes objects\n# ---------------------\nfig = plt.figure(figsize=figure_size, dpi=dpi)\nax = fig.add_subplot(111)\n\n# Example plot (REPLACE ACCORDINGLY)\n# ------------\nx = np.linspace(0, 7, num=100)\ny = np.sin(x)\nax.plot(x, y, \"g-\", zorder=3)\n\n\n# Help lines\n# ----------\n# ax.axhline(y=0, color=\"0.25\", linewidth=0.6, zorder=1)\n# ax.axvline(x=0, color=\"0.25\", linewidth=0.6, zorder=1)\n\n\n# View limits\n# -----------\nax.set_xlim(**x_view_limits)\nax.set_ylim(**y_view_limits)\n\n\n# Grid lines\n# ----------\n# grid_style = {\n# \"linestyle\" : \"dotted\",\n# \"linewidth\" : 0.6,\n# \"color\" : \"0.25\",\n# }\n# ax.grid(**grid_style)\n\n# Custom tick spacing\n# -------------------\n# ax.xaxis.set_major_locator(ticker.MultipleLocator(x_tick_spacing))\n# ax.yaxis.set_major_locator(ticker.MultipleLocator(y_tick_spacing))\n\n# Custom tick labels\n# ------------------\nif x_tick_labels is not None:\n ax.set_xticklabels(x_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\nif y_tick_labels is not None:\n ax.set_yticklabels(y_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\n\n# Other tick settings\n# -------------------\n# ax.tick_params(axis=\"both\", which=\"major\", labelsize=font_size_tick, direction=\"in\")\n# ax.tick_params(axis=\"x\", which=\"major\", pad=10)\n# ax.tick_params(axis=\"x\", which=\"minor\", bottom=False, top=False)\n\n\n# Axis labels\n# -----------\nif x_axis_label is not None:\n ax.set_xlabel(x_axis_label, size=font_size_axis)\nif y_axis_label is not None:\n ax.set_ylabel(y_axis_label, size=font_size_axis)\n\n# Figure title\n# ------------\nif title is not None:\n ax.set_title(title, fontsize=font_size_title)\n\n# Legend\n# ------\nif show_legend:\n ax.legend(prop={'size': font_size_legend})\n\n# Save figure\n# -----------\nif save_name is not None:\n save_format = save_name.split(\".\")[-1]\n fig.savefig(save_name, format=save_format, bbox_inches=\"tight\")\n","name":"matplotlib_basic.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ---------------------------------------------------------- #\n# #\n# This script extracts q-points and irreducible #\n# representations from Quantum ESPRESSO xml data. #\n# #\n# Expects control_ph.xml and patterns.?.xml files to exist #\n# #\n# ---------------------------------------------------------- #\nfrom __future__ import print_function\nimport json\nfrom xml.dom import minidom\n\n{# JOB_WORK_DIR will be initialized at runtime => avoid substituion below #}\n{%- raw -%}\nCONTROL_PH_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/control_ph.xml\"\nPATTERNS_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/patterns.{}.xml\"\n{%- endraw -%}\n\n# get integer content of an xml tag in a document\ndef get_int_by_tag_name(doc, tag_name):\n element = doc.getElementsByTagName(tag_name)\n return int(element[0].firstChild.nodeValue)\n\nvalues = []\n\n# get number of q-points and cycle through them\nxmldoc = minidom.parse(CONTROL_PH_FILENAME)\nnumber_of_qpoints = get_int_by_tag_name(xmldoc, \"NUMBER_OF_Q_POINTS\")\n\nfor i in range(number_of_qpoints):\n # get number of irreducible representations per qpoint\n xmldoc = minidom.parse(PATTERNS_FILENAME.format(i+1))\n number_of_irr_per_qpoint = get_int_by_tag_name(xmldoc, \"NUMBER_IRR_REP\")\n # add each distinct combination of qpoint and irr as a separate entry\n for j in range(number_of_irr_per_qpoint):\n values.append({\n \"qpoint\": i + 1,\n \"irr\": j + 1\n })\n\n# store final values in standard output (STDOUT)\nprint(json.dumps(values, indent=4))\n","name":"espresso_xml_get_qpt_irr.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"import re\nimport json\n\ndouble_regex = r'[-+]?\\d*\\.\\d+(?:[eE][-+]?\\d+)?'\nregex = r\"\\s+k\\(\\s+\\d*\\)\\s+=\\s+\\(\\s+({0})\\s+({0})\\s+({0})\\),\\s+wk\\s+=\\s+({0}).+?\\n\".format(double_regex)\n\nwith open(\"pw_scf.out\") as f:\n text = f.read()\n\npattern = re.compile(regex, re.I | re.MULTILINE)\nmatch = pattern.findall(text[text.rfind(\" cryst. coord.\"):])\nkpoints = [{\"coordinates\": list(map(float, m[:3])), \"weight\": float(m[3])} for m in match]\nprint(json.dumps({\"name\": \"KPOINTS\", \"value\": kpoints, \"scope\": \"global\"}, indent=4))\n","name":"espresso_extract_kpoints.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------- #\n# This script aims to determine extrema for a given array. #\n# Please adjust the parameters according to your data. #\n# Note: This template expects the array to be defined in the #\n# context as 'array_from_context' (see details below). #\n# ----------------------------------------------------------- #\nimport numpy as np\nfrom scipy.signal import find_peaks\nimport json\nfrom munch import Munch\n\n# Data From Context\n# -----------------\n# The array 'array_from_context' is a 1D list (float or int) that has to be defined in\n# a preceding assignment unit in order to be extracted from the context.\n# Example: [0.0, 1.0, 4.0, 3.0]\n# Upon rendering the following Jinja template the extracted array will be inserted.\n{% raw %}Y = np.array({{array_from_context}}){% endraw %}\n\n# Settings\n# --------\nprominence = 0.3 # required prominence in the unit of the data array\n\n# Find Extrema\n# ------------\nmax_indices, _ = find_peaks(Y, prominence=prominence)\nmin_indices, _ = find_peaks(-1 * Y, prominence=prominence)\n\nresult = {\n \"maxima\": Y[max_indices].tolist(),\n \"minima\": Y[min_indices].tolist(),\n}\n\n# print final values to standard output (STDOUT),\n# so that they can be read by a subsequent assignment unit (using value=STDOUT)\nprint(json.dumps(result, indent=4))\n","name":"find_extrema.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# ----------------------------------------------------------------- #\n\n\nmunch==2.5.0\nnumpy>=1.19.5\nscipy>=1.5.4\nmatplotlib>=3.0.0\n","name":"processing_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# PythonML Package Requirements for use on the Exabyte.io Platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscikit-learn==0.24.1\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2;python_version>=\"3.6\"\n","name":"pyml_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# General settings for PythonML jobs on the Exabyte.io Platform #\n# #\n# This file generally shouldn't be modified directly by users. #\n# The \"datafile\" and \"is_workflow_running_to_predict\" variables #\n# are defined in the head subworkflow, and are templated into #\n# this file. This helps facilitate the workflow's behavior #\n# differing whether it is in a \"train\" or \"predict\" mode. #\n# #\n# Also in this file is the \"Context\" object, which helps maintain #\n# certain Python objects between workflow units, and between #\n# predict runs. #\n# #\n# Whenever a python object needs to be stored for subsequent runs #\n# (such as in the case of a trained model), context.save() can be #\n# called to save it. The object can then be loaded again by using #\n# context.load(). #\n# ----------------------------------------------------------------- #\n\n\nimport pickle, os\n\n# ==================================================\n# Variables modified in the Important Settings menu\n# ==================================================\n# Variables in this section can (and oftentimes need to) be modified by the user in the \"Important Settings\" tab\n# of a workflow.\n\n# Target_column_name is used during training to identify the variable the model is traing to predict.\n# For example, consider a CSV containing three columns, \"Y\", \"X1\", and \"X2\". If the goal is to train a model\n# that will predict the value of \"Y,\" then target_column_name would be set to \"Y\"\ntarget_column_name = \"{{ mlSettings.target_column_name }}\"\n\n# The type of ML problem being performed. Can be either \"regression\", \"classification,\" or \"clustering.\"\nproblem_category = \"{{ mlSettings.problem_category }}\"\n\n# =============================\n# Non user-modifiable variables\n# =============================\n# Variables in this section generally do not need to be modified.\n\n# The problem category, regression or classification or clustering. In regression, the target (predicted) variable\n# is continues. In classification, it is categorical. In clustering, there is no target - a set of labels is\n# automatically generated.\nis_regression = is_classification = is_clustering = False\nif problem_category.lower() == \"regression\":\n is_regression = True\nelif problem_category.lower() == \"classification\":\n is_classification = True\nelif problem_category.lower() == \"clustering\":\n is_clustering = True\nelse:\n raise ValueError(\n \"Variable 'problem_category' must be either 'regression', 'classification', or 'clustering'. Check settings.py\")\n\n# The variables \"is_workflow_running_to_predict\" and \"is_workflow_running_to_train\" are used to control whether\n# the workflow is in a \"training\" mode or a \"prediction\" mode. The \"IS_WORKFLOW_RUNNING_TO_PREDICT\" variable is set by\n# an assignment unit in the \"Set Up the Job\" subworkflow that executes at the start of the job. It is automatically\n# changed when the predict workflow is generated, so users should not need to modify this variable.\nis_workflow_running_to_predict = {% raw %}{{IS_WORKFLOW_RUNNING_TO_PREDICT}}{% endraw %}\nis_workflow_running_to_train = not is_workflow_running_to_predict\n\n# Sets the datafile variable. The \"datafile\" is the data that will be read in, and will be used by subsequent\n# workflow units for either training or prediction, depending on the workflow mode.\nif is_workflow_running_to_predict:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\nelse:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\n\n# The \"Context\" class allows for data to be saved and loaded between units, and between train and predict runs.\n# Variables which have been saved using the \"Save\" method are written to disk, and the predict workflow is automatically\n# configured to obtain these files when it starts.\n#\n# IMPORTANT NOTE: Do *not* adjust the value of \"context_dir_pathname\" in the Context object. If the value is changed, then\n# files will not be correctly copied into the generated predict workflow. This will cause the predict workflow to be\n# generated in a broken state, and it will not be able to make any predictions.\nclass Context(object):\n \"\"\"\n Saves and loads objects from the disk, useful for preserving data between workflow units\n\n Attributes:\n context_paths (dict): Dictionary of the format {variable_name: path}, that governs where\n pickle saves files.\n\n Methods:\n save: Used to save objects to the context directory\n load: Used to load objects from the context directory\n \"\"\"\n\n def __init__(self, context_file_basename=\"workflow_context_file_mapping\"):\n \"\"\"\n Constructor for Context objects\n\n Args:\n context_file_basename (str): Name of the file to store context paths in\n \"\"\"\n\n # Warning: DO NOT modify the context_dir_pathname variable below\n # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n context_dir_pathname = \"{% raw %}{{ CONTEXT_DIR_RELATIVE_PATH }}{% endraw %}\"\n # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n self._context_dir_pathname = context_dir_pathname\n self._context_file = os.path.join(context_dir_pathname, context_file_basename)\n\n # Make context dir if it does not exist\n if not os.path.exists(context_dir_pathname):\n os.makedirs(context_dir_pathname)\n\n # Read in the context sources dictionary, if it exists\n if os.path.exists(self._context_file):\n with open(self._context_file, \"rb\") as file_handle:\n self.context_paths: dict = pickle.load(file_handle)\n else:\n # Items is a dictionary of {varname: path}\n self.context_paths = {}\n\n def __enter__(self):\n return self\n\n def __exit__(self, exc_type, exc_value, traceback):\n self._update_context()\n\n def __contains__(self, item):\n return item in self.context_paths\n\n def _update_context(self):\n with open(self._context_file, \"wb\") as file_handle:\n pickle.dump(self.context_paths, file_handle)\n\n def load(self, name: str):\n \"\"\"\n Returns a contextd object\n\n Args:\n name (str): The name in self.context_paths of the object\n \"\"\"\n path = self.context_paths[name]\n with open(path, \"rb\") as file_handle:\n obj = pickle.load(file_handle)\n return obj\n\n def save(self, obj: object, name: str):\n \"\"\"\n Saves an object to disk using pickle\n\n Args:\n name (str): Friendly name for the object, used for lookup in load() method\n obj (object): Object to store on disk\n \"\"\"\n path = os.path.join(self._context_dir_pathname, f\"{name}.pkl\")\n self.context_paths[name] = path\n with open(path, \"wb\") as file_handle:\n pickle.dump(obj, file_handle)\n self._update_context()\n\n# Generate a context object, so that the \"with settings.context\" can be used by other units in this workflow.\ncontext = Context()\n\nis_using_train_test_split = \"is_using_train_test_split\" in context and (context.load(\"is_using_train_test_split\"))\n\n# Create a Class for a DummyScaler()\nclass DummyScaler:\n \"\"\"\n This class is a 'DummyScaler' which trivially acts on data by returning it unchanged.\n \"\"\"\n\n def fit(self, X):\n return self\n\n def transform(self, X):\n return X\n\n def fit_transform(self, X):\n return X\n\n def inverse_transform(self, X):\n return X\n\nif 'target_scaler' not in context:\n context.save(DummyScaler(), 'target_scaler')\n","name":"pyml_settings.py","contextProviders":[{"name":"MLSettingsDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Custom workflow unit template for the Exabyte.io platform #\n# #\n# This file imports a set of workflow-specific context variables #\n# from settings.py. It then uses a context manager to save and #\n# load Python objects. When saved, these objects can then be #\n# loaded either later in the same workflow, or by subsequent #\n# predict jobs. #\n# #\n# Any pickle-able Python object can be saved using #\n# settings.context. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport settings\n\n# The context manager exists to facilitate\n# saving and loading objects across Python units within a workflow.\n\n# To load an object, simply do to \\`context.load(\"name-of-the-saved-object\")\\`\n# To save an object, simply do \\`context.save(\"name-for-the-object\", object_here)\\`\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Do some transformations to the data here\n\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n descriptors = context.load(\"descriptors\")\n\n # Do some predictions or transformation to the data here\n","name":"pyml_custom.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to read in data for the ML workflow. #\n# #\n# Also showcased here is the concept of branching based on #\n# whether the workflow is in \"train\" or \"predict\" mode. #\n# #\n# If the workflow is in \"training\" mode, it will read in the data #\n# before converting it to a Numpy array and save it for use #\n# later. During training, we already have values for the output, #\n# and this gets saved to \"target.\" #\n# #\n# Finally, whether the workflow is in training or predict mode, #\n# it will always read in a set of descriptors from a datafile #\n# defined in settings.py #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport sklearn.preprocessing\nimport settings\n\nwith settings.context as context:\n data = pandas.read_csv(settings.datafile)\n\n # Train\n # By default, we don't do train/test splitting: the train and test represent the same dataset at first.\n # Other units (such as a train/test splitter) down the line can adjust this as-needed.\n if settings.is_workflow_running_to_train:\n\n # Handle the case where we are clustering\n if settings.is_clustering:\n target = data.to_numpy()[:, 0] # Just get the first column, it's not going to get used anyway\n else:\n target = data.pop(settings.target_column_name).to_numpy()\n\n # Handle the case where we are classifying. In this case, we must convert any labels provided to be categorical.\n # Specifically, labels are encoded with values between 0 and (N_Classes - 1)\n if settings.is_classification:\n label_encoder = sklearn.preprocessing.LabelEncoder()\n target = label_encoder.fit_transform(target)\n context.save(label_encoder, \"label_encoder\")\n\n target = target.reshape(-1, 1) # Reshape array from a row vector into a column vector\n\n context.save(target, \"train_target\")\n context.save(target, \"test_target\")\n\n descriptors = data.to_numpy()\n\n context.save(descriptors, \"train_descriptors\")\n context.save(descriptors, \"test_descriptors\")\n\n else:\n descriptors = data.to_numpy()\n context.save(descriptors, \"descriptors\")\n","name":"data_input_read_csv_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to perform a train/test split #\n# #\n# Splits the dataset into a training and testing set. The #\n# variable `percent_held_as_test` controls how much of the #\n# input dataset is removed for use as a testing set. By default, #\n# this unit puts 20% of the dataset into the testing set, and #\n# places the remaining 80% into the training set. #\n# #\n# Does nothing in the case of predictions. #\n# #\n# ----------------------------------------------------------------- #\n\nimport sklearn.model_selection\nimport numpy as np\nimport settings\n\n# `percent_held_as_test` is the amount of the dataset held out as the testing set. If it is set to 0.2,\n# then 20% of the dataset is held out as a testing set. The remaining 80% is the training set.\npercent_held_as_test = {{ mlTrainTestSplit.fraction_held_as_test_set }}\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Load training data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n\n # Combine datasets to facilitate train/test split\n\n # Do train/test split\n train_descriptors, test_descriptors, train_target, test_target = sklearn.model_selection.train_test_split(\n train_descriptors, train_target, test_size=percent_held_as_test)\n\n # Set the flag for using a train/test split\n context.save(True, \"is_using_train_test_split\")\n\n # Save training data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"data_input_train_test_split_sklearn.py","contextProviders":[{"name":"MLTrainTestSplitDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn MinMax Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it is on interval #\n# [0,1]. It then saves the data for use further down #\n# the road in the workflow, for use in un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the min and max of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor MinMax Scaler\n scaler = sklearn.preprocessing.MinMaxScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_min_max_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Duplicates workflow unit #\n# #\n# This workflow unit drops all duplicate rows, if it is running #\n# in the \"train\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop duplicates from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n df = df.drop_duplicates()\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop duplicates from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.drop_duplicates()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_duplicates_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Missing Workflow Unit #\n# #\n# This workflow unit allows missing rows and/or columns to be #\n# dropped from the dataset by configuring the `to_drop` #\n# parameter. #\n# #\n# Valid values for `to_drop`: #\n# - \"rows\": rows with missing values will be removed #\n# - \"columns\": columns with missing values will be removed #\n# - \"both\": rows and columns with missing values will be removed #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\n# `to_drop` can either be \"rows\" or \"columns\"\n# If it is set to \"rows\" (by default), then all rows with missing values will be dropped.\n# If it is set to \"columns\", then all columns with missing values will be dropped.\n# If it is set to \"both\", then all rows and columns with missing values will be dropped.\nto_drop = \"rows\"\n\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop missing from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n\n directions = {\n \"rows\": (\"index\",),\n \"columns\": (\"columns\",),\n \"both\": (\"index\", \"columns\"),\n }[to_drop]\n for direction in directions:\n df = df.dropna(direction)\n\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop missing from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.dropna()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_missing_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn Standard Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it a mean of 0 and #\n# a standard deviation of 1. It then saves the data for use #\n# further down the road in the workflow, for use in #\n# un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the mean and variance of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor Scaler\n scaler = sklearn.preprocessing.StandardScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_standardization_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge-regression model in Scikit-Learn. #\n# Alpha is taken from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.AdaBoostRegressor(\n n_estimators=50,\n learning_rate=1,\n loss=\"linear\",\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"adaboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"adaboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_adaboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a bagged trees regression model with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.BaggingRegressor(\n n_estimators=10,\n max_samples=1.0,\n max_features=1.0,\n bootstrap=True,\n bootstrap_features=False,\n oob_score=False,\n verbose=0,\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"bagged_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"bagged_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_bagged_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for gradient-boosted tree regression with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. Note: In the gradient- #\n# boosted trees ensemble used, the weak learners used as #\n# estimators cannot be tuned with the same level of fidelity #\n# allowed in the adaptive-boosted trees ensemble. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.GradientBoostingRegressor(\n loss=\"ls\",\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion=\"friedman_mse\",\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n max_features=None,\n alpha=0.9,\n verbose=0,\n max_leaf_nodes=None,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees regression #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBRegressor(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='reg:squarederror',\n eval_metric='rmse')\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_regression\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_regression\")\n\n # Make some predictions and unscale\n predictions = model.predict(descriptors)\n predictions = predictions.reshape(-1, 1)\n target_scaler = context.load(\"target_scaler\")\n\n predictions = target_scaler.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\")\n","name":"model_extreme_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for k-means clustering. #\n# #\n# In k-means clustering, the labels are not provided ahead of #\n# time. Instead, one supplies the number of groups the #\n# algorithm should split the dataset into. Here, we set our #\n# own default of 4 groups (fewer than sklearn's default of 8). #\n# Otherwise, the default parameters of the clustering method #\n# are the same as in sklearn. #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.cluster\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Initialize the Model\n model = sklearn.cluster.KMeans(\n n_clusters=4,\n init=\"k-means++\",\n n_init=10,\n max_iter=300,\n tol=0.0001,\n copy_x=True,\n algorithm=\"auto\",\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors)\n context.save(model, \"k_means\")\n train_labels = model.predict(train_descriptors)\n test_labels = model.predict(test_descriptors)\n\n context.save(train_labels, \"train_labels\")\n context.save(test_labels, \"test_labels\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"k_means\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_k_means_clustering_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a kernelized ridge-regression model with #\n# Scikit-Learn. Model parameters are derived from Scikit- #\n# Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.kernel_ridge\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.kernel_ridge.KernelRidge(\n alpha=1.0,\n kernel=\"linear\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"kernel_ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"kernel_ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_kernel_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a LASSO-regression model with Scikit- #\n# Learn. Model parameters derived from Scikit-Learn's #\n# Defaults. Alpha has been lowered from the default of 1.0, to #\n# 0.1. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Lasso(\n alpha=0.1,\n fit_intercept=True,\n normalize=False,\n precompute=False,\n tol=0.0001,\n positive=True,\n selection=\"cyclic\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"LASSO\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"LASSO\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_lasso_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit to train a simple feedforward neural network #\n# model on a regression problem using scikit-learn. In this #\n# template, we use the default values for hidden_layer_sizes, #\n# activation, solver, and learning rate. Other parameters are #\n# available (consult the sklearn docs), but in this case, we #\n# only include those relevant to the Adam optimizer. Sklearn #\n# Docs: Sklearn docs:http://scikit-learn.org/stable/modules/ge #\n# nerated/sklearn.neural_network.MLPRegressor.html #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.neural_network\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.neural_network.MLPRegressor(\n hidden_layer_sizes=(100,),\n activation=\"relu\",\n solver=\"adam\",\n max_iter=300,\n early_stopping=False,\n validation_fraction=0.1,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"multilayer_perceptron\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"multilayer_perceptron\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_mlp_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a random forest classification model with #\n# Scikit-Learn. Parameters derived from Scikit-Learn's #\n# defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. When #\n# the workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a filee named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestClassifier(\n n_estimators=100,\n criterion=\"gini\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n oob_score=False,\n verbose=0,\n class_weight=None,\n ccp_alpha=0.0,\n max_samples=None,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target, test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for a gradient boosted classification model with #\n# Scikit-Learn. Parameters derived from sklearn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = sklearn.ensemble.GradientBoostingClassifier(loss='deviance',\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion='friedman_mse',\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n min_impurity_split=None,\n init=None,\n random_state=None,\n max_features=None,\n verbose=0,\n max_leaf_nodes=None,\n warm_start=False,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees classification #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBClassifier(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='binary:logistic',\n eval_metric='logloss',\n use_label_encoder=False)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_extreme_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow for a random forest regression model with Scikit- #\n# Learn. Parameters are derived from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestRegressor(\n n_estimators=100,\n criterion=\"mse\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n max_samples=None,\n oob_score=False,\n ccp_alpha=0.0,\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge regression model with Scikit- #\n# Learn. Alpha is taken from Scikit-Learn's default #\n# parameters. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Ridge(\n alpha=1.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Parity plot generation unit #\n# #\n# This unit generates a parity plot based on the known values #\n# in the training data, and the predicted values generated #\n# using the training data. #\n# #\n# Because this metric compares predictions versus a ground truth, #\n# it doesn't make sense to generate the plot when a predict #\n# workflow is being run (because in that case, we generally don't #\n# know the ground truth for the values being predicted). Hence, #\n# this unit does nothing if the workflow is in \"predict\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_predictions = context.load(\"train_predictions\")\n test_target = context.load(\"test_target\")\n test_predictions = context.load(\"test_predictions\")\n\n # Un-transform the data\n target_scaler = context.load(\"target_scaler\")\n train_target = target_scaler.inverse_transform(train_target)\n train_predictions = target_scaler.inverse_transform(train_predictions)\n test_target = target_scaler.inverse_transform(test_target)\n test_predictions = target_scaler.inverse_transform(test_predictions)\n\n # Plot the data\n plt.scatter(train_target, train_predictions, c=\"#203d78\", label=\"Training Set\")\n if settings.is_using_train_test_split:\n plt.scatter(test_target, test_predictions, c=\"#67ac5b\", label=\"Testing Set\")\n plt.xlabel(\"Actual Value\")\n plt.ylabel(\"Predicted Value\")\n\n # Scale the plot\n target_range = (min(min(train_target), min(test_target)),\n max(max(train_target), max(test_target)))\n predictions_range = (min(min(train_predictions), min(test_predictions)),\n max(max(train_predictions), max(test_predictions)))\n\n limits = (min(min(target_range), min(target_range)),\n max(max(predictions_range), max(predictions_range)))\n plt.xlim = (limits[0], limits[1])\n plt.ylim = (limits[0], limits[1])\n\n # Draw a parity line, as a guide to the eye\n plt.plot((limits[0], limits[1]), (limits[0], limits[1]), c=\"black\", linestyle=\"dotted\", label=\"Parity\")\n plt.legend()\n\n # Save the figure\n plt.tight_layout()\n plt.savefig(\"my_parity_plot.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_parity_plot_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Cluster Visualization #\n# #\n# This unit takes an N-dimensional feature space, and uses #\n# Principal-component Analysis (PCA) to project into a 2D space #\n# to facilitate plotting on a scatter plot. #\n# #\n# The 2D space we project into are the first two principal #\n# components identified in PCA, which are the two vectors with #\n# the highest variance. #\n# #\n# Wikipedia Article on PCA: #\n# https://en.wikipedia.org/wiki/Principal_component_analysis #\n# #\n# We then plot the labels assigned to the train an test set, #\n# and color by class. #\n# #\n# ----------------------------------------------------------------- #\n\nimport pandas as pd\nimport matplotlib.cm\nimport matplotlib.lines\nimport matplotlib.pyplot as plt\nimport sklearn.decomposition\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_labels = context.load(\"train_labels\")\n train_descriptors = context.load(\"train_descriptors\")\n test_labels = context.load(\"test_labels\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Unscale the descriptors\n descriptor_scaler = context.load(\"descriptor_scaler\")\n train_descriptors = descriptor_scaler.inverse_transform(train_descriptors)\n test_descriptors = descriptor_scaler.inverse_transform(test_descriptors)\n\n # We need at least 2 dimensions, exit if the dataset is 1D\n if train_descriptors.ndim < 2:\n raise ValueError(\"The train descriptors do not have enough dimensions to be plot in 2D\")\n\n # The data could be multidimensional. Let's do some PCA to get things into 2 dimensions.\n pca = sklearn.decomposition.PCA(n_components=2)\n train_descriptors = pca.fit_transform(train_descriptors)\n test_descriptors = pca.transform(test_descriptors)\n xlabel = \"Principle Component 1\"\n ylabel = \"Principle Component 2\"\n\n # Determine the labels we're going to be using, and generate their colors\n labels = set(train_labels)\n colors = {}\n for count, label in enumerate(labels):\n cm = matplotlib.cm.get_cmap('jet', len(labels))\n color = cm(count / len(labels))\n colors[label] = color\n train_colors = [colors[label] for label in train_labels]\n test_colors = [colors[label] for label in test_labels]\n\n # Train / Test Split Visualization\n plt.title(\"Train Test Split Visualization\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=\"#33548c\", marker=\"o\", label=\"Training Set\")\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=\"#F0B332\", marker=\"o\", label=\"Testing Set\")\n xmin, xmax, ymin, ymax = plt.axis()\n plt.legend()\n plt.tight_layout()\n plt.savefig(\"train_test_split.png\", dpi=600)\n plt.close()\n\n def clusters_legend(cluster_colors):\n \"\"\"\n Helper function that creates a legend, given the coloration by clusters.\n Args:\n cluster_colors: A dictionary of the form {cluster_number : color_value}\n\n Returns:\n None; just creates the legend and puts it on the plot\n \"\"\"\n legend_symbols = []\n for group, color in cluster_colors.items():\n label = f\"Cluster {group}\"\n legend_symbols.append(matplotlib.lines.Line2D([], [], color=color, marker=\"o\",\n linewidth=0, label=label))\n plt.legend(handles=legend_symbols)\n\n # Training Set Clusters\n plt.title(\"Training Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=train_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"train_clusters.png\", dpi=600)\n plt.close()\n\n # Testing Set Clusters\n plt.title(\"Testing Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=test_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"test_clusters.png\", dpi=600)\n plt.close()\n\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_pca_2d_clusters_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# ROC Curve Generator #\n# #\n# Computes and displays the Receiver Operating Characteristic #\n# (ROC) curve. This is restricted to binary classification tasks. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\nimport matplotlib.collections\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n test_target = context.load(\"test_target\").flatten()\n # Slice the first column because Sklearn's ROC curve prefers probabilities for the positive class\n test_probabilities = context.load(\"test_probabilities\")[:, 1]\n\n # Exit if there's more than one label in the predictions\n if len(set(test_target)) > 2:\n exit()\n\n # ROC curve function in sklearn prefers the positive class\n false_positive_rate, true_positive_rate, thresholds = sklearn.metrics.roc_curve(test_target, test_probabilities,\n pos_label=1)\n thresholds[0] -= 1 # Sklearn arbitrarily adds 1 to the first threshold\n roc_auc = np.round(sklearn.metrics.auc(false_positive_rate, true_positive_rate), 3)\n\n # Plot the curve\n fig, ax = plt.subplots()\n points = np.array([false_positive_rate, true_positive_rate]).T.reshape(-1, 1, 2)\n segments = np.concatenate([points[:-1], points[1:]], axis=1)\n norm = plt.Normalize(thresholds.min(), thresholds.max())\n lc = matplotlib.collections.LineCollection(segments, cmap='jet', norm=norm, linewidths=2)\n lc.set_array(thresholds)\n line = ax.add_collection(lc)\n fig.colorbar(line, ax=ax).set_label('Threshold')\n\n # Padding to ensure we see the line\n ax.margins(0.01)\n\n plt.title(f\"ROC curve, AUC={roc_auc}\")\n plt.xlabel(\"False Positive Rate\")\n plt.ylabel(\"True Positive Rate\")\n plt.tight_layout()\n plt.savefig(\"my_roc_curve.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_roc_curve_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"#!/bin/bash\n# ---------------------------------------------------------------- #\n# #\n# Example shell script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. shebang line is read from the first line above #\n# 2. based on shebang one of the shell types is selected: #\n# - /bin/bash #\n# - /bin/csh #\n# - /bin/tclsh #\n# - /bin/tcsh #\n# - /bin/zsh #\n# 3. runtime directory for this calculation is created #\n# 4. the content of the script is executed #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\necho \"Hello world!\"\n","name":"hello_world.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ---------------------------------------------------------------- #\n# #\n# Example job submission script for Exabyte.io platform #\n# #\n# Shows resource manager directives for: #\n# #\n# 1. the name of the job (-N) #\n# 2. the number of nodes to be used (-l nodes=) #\n# 3. the number of processors per node (-l ppn=) #\n# 4. the walltime in dd:hh:mm:ss format (-l walltime=) #\n# 5. queue (-q) D, OR, OF, SR, SF #\n# 6. merging standard output and error (-j oe) #\n# 7. email about job abort, begin, end (-m abe) #\n# 8. email address to use (-M) #\n# #\n# For more information visit https://docs.exabyte.io/cli/jobs #\n# ---------------------------------------------------------------- #\n\n#PBS -N ESPRESSO-TEST\n#PBS -j oe\n#PBS -l nodes=1\n#PBS -l ppn=1\n#PBS -l walltime=00:00:10:00\n#PBS -q D\n#PBS -m abe\n#PBS -M info@exabyte.io\n\n# load module\nmodule add espresso/540-i-174-impi-044\n\n# go to the job working directory\ncd $PBS_O_WORKDIR\n\n# create input file\ncat > pw.in < pw.out\n","name":"job_espresso_pw_scf.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\nmkdir -p {{ JOB_SCRATCH_DIR }}/outdir/_ph0\ncd {{ JOB_SCRATCH_DIR }}/outdir\ncp -r {{ JOB_WORK_DIR }}/../outdir/__prefix__.* .\n{%- endraw -%}\n","name":"espresso_link_outdir_save.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\ncp {{ JOB_SCRATCH_DIR }}/outdir/_ph0/__prefix__.phsave/dynmat* {{ JOB_WORK_DIR }}/../outdir/_ph0/__prefix__.phsave\n{%- endraw -%}\n","name":"espresso_collect_dynmat.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ------------------------------------------------------------------ #\n# This script prepares necessary directories to run VASP NEB\n# calculation. It puts initial POSCAR into directory 00, final into 0N\n# and intermediate images in 01 to 0(N-1). It is assumed that SCF\n# calculations for initial and final structures are already done in\n# previous subworkflows and their standard outputs are written into\n# \"vasp_neb_initial.out\" and \"vasp_neb_final.out\" files respectively.\n# These outputs are here copied into initial (00) and final (0N)\n# directories to calculate the reaction energy profile.\n# ------------------------------------------------------------------ #\n\n{% raw -%}\ncd {{ JOB_WORK_DIR }}\n{%- endraw %}\n\n# Prepare First Directory\nmkdir -p 00\ncat > 00/POSCAR < 0{{ input.INTERMEDIATE_IMAGES.length + 1 }}/POSCAR < 0{{ loop.index }}/POSCAR < Date: Tue, 9 Jan 2024 19:07:37 +0800 Subject: [PATCH 10/20] SOF-7194: fix lammps input files array --- executables/deepmd/dp_lmp.yml | 4 ++-- src/js/data/tree.js | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/executables/deepmd/dp_lmp.yml b/executables/deepmd/dp_lmp.yml index 83a1c5bb..39dd0f78 100644 --- a/executables/deepmd/dp_lmp.yml +++ b/executables/deepmd/dp_lmp.yml @@ -5,8 +5,8 @@ results: [] flavors: dp_lmp: input: - - in.lammps - - structure.lmp + - name: in.lammps + - name: structure.lmp results: [] monitors: - standard_output diff --git a/src/js/data/tree.js b/src/js/data/tree.js index 7fa0c139..a0cecc32 100644 --- a/src/js/data/tree.js +++ b/src/js/data/tree.js @@ -1,2 +1,2 @@ /* eslint-disable */ -module.exports = {applicationTree: {"deepmd":{"dp_lmp":{"isDefault":false,"monitors":["standard_output"],"results":[],"flavors":{"dp_lmp":{"input":["in.lammps","structure.lmp"],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"dp_lmp"}}},"dp_prepare":{"isDefault":true,"monitors":["standard_output"],"results":[],"flavors":{"qe_cp_to_deepmd":{"isDefault":true,"input":[{"name":"qe_cp_to_deepmd.py"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"dp_prepare"}}},"dp_train":{"isDefault":false,"monitors":["standard_output"],"results":[],"flavors":{"dp_train_se_e2_r":{"input":[{"name":"dp_train_se_e2_r.json"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"dp_train"}}}},"espresso":{"average.x":{"monitors":["standard_output"],"results":["average_potential_profile"],"flavors":{"average":{"input":[{"name":"average.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"average.x"},"average_potential":{"input":[{"name":"average.in"}],"results":["average_potential_profile"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"average.x"}}},"bands.x":{"monitors":["standard_output"],"results":["band_structure"],"flavors":{"bands":{"input":[{"name":"bands.in"}],"results":["band_structure"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"bands.x"}}},"dos.x":{"monitors":["standard_output"],"results":["density_of_states"],"flavors":{"dos":{"input":[{"name":"dos.in"}],"results":["density_of_states"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"dos.x"}}},"dynmat.x":{"monitors":["standard_output"],"results":[],"flavors":{"dynmat":{"input":[{"name":"dynmat_grid.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"dynmat.x"}}},"gw.x":{"monitors":["standard_output"],"results":["band_structure","fermi_energy","band_gaps"],"flavors":{"gw_bands_plasmon_pole":{"input":[{"name":"gw_bands_plasmon_pole.in"}],"results":["band_structure","fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"gw.x"},"gw_bands_full_frequency":{"input":[{"name":"gw_bands_full_frequency.in"}],"results":["band_structure","fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"gw.x"}}},"matdyn.x":{"monitors":["standard_output"],"results":["phonon_dos","phonon_dispersions"],"flavors":{"matdyn_grid":{"input":[{"name":"matdyn_grid.in"}],"monitors":["standard_output"],"results":["phonon_dos"],"applicationName":"espresso","executableName":"matdyn.x"},"matdyn_path":{"input":[{"name":"matdyn_path.in"}],"monitors":["standard_output"],"results":["phonon_dispersions"],"applicationName":"espresso","executableName":"matdyn.x"}}},"neb.x":{"monitors":["standard_output"],"results":["reaction_energy_barrier","reaction_energy_profile"],"flavors":{"neb":{"isMultiMaterial":true,"input":[{"name":"neb.in"}],"results":["reaction_energy_barrier","reaction_energy_profile"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"neb.x"}}},"ph.x":{"monitors":["standard_output"],"results":["phonon_dos","phonon_dispersions","zero_point_energy"],"flavors":{"ph_path":{"input":[{"name":"ph_path.in"}],"results":["phonon_dispersions"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_grid":{"input":[{"name":"ph_grid.in"}],"results":["phonon_dos"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_gamma":{"input":[{"name":"ph_gamma.in"}],"results":["zero_point_energy"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_init_qpoints":{"input":[{"name":"ph_init_qpoints.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_grid_restart":{"input":[{"name":"ph_grid_restart.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_single_irr_qpt":{"input":[{"name":"ph_single_irr_qpt.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"}}},"pp.x":{"monitors":["standard_output"],"results":[],"flavors":{"pp_density":{"input":[{"name":"pp_density.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pp.x"},"pp_electrostatic_potential":{"input":[{"name":"pp_electrostatic_potential.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pp.x"}}},"projwfc.x":{"monitors":["standard_output"],"results":["density_of_states"],"flavors":{"projwfc":{"input":[{"name":"projwfc.in"}],"results":["density_of_states"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"projwfc.x"}}},"pw.x":{"isDefault":true,"hasAdvancedComputeOptions":true,"postProcessors":["remove_non_zero_weight_kpoints"],"monitors":["standard_output","convergence_ionic","convergence_electronic"],"results":["atomic_forces","band_gaps","charge_density_profile","density_of_states","fermi_energy","final_structure","magnetic_moments","potential_profile","pressure","reaction_energy_barrier","reaction_energy_profile","stress_tensor","total_energy","total_energy_contributions","total_force"],"flavors":{"pw_scf":{"isDefault":true,"input":[{"name":"pw_scf.in"}],"results":["atomic_forces","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_bands_hse":{"input":[{"name":"pw_scf_bands_hse.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_hse":{"input":[{"name":"pw_scf_hse.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_dft_u":{"input":[{"name":"pw_scf_dft_u.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u+v":{"input":[{"name":"pw_scf_dft_v.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u+j":{"input":[{"name":"pw_scf_dft_j.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u_legacy":{"input":[{"name":"pw_scf_dft_u_legacy.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["5.2.1","5.4.0","6.0.0","6.3","6.4.1","6.5.0","6.6.0","6.7.0","6.8.0","7.0"]},"pw_esm":{"input":[{"name":"pw_esm.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor","potential_profile","charge_density_profile"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_esm_relax":{"input":[{"name":"pw_esm_relax.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor","potential_profile","charge_density_profile"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_nscf":{"input":[{"name":"pw_nscf.in"}],"results":["fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pw.x"},"pw_bands":{"input":[{"name":"pw_bands.in"}],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pw.x"},"pw_relax":{"input":[{"name":"pw_relax.in"}],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor","final_structure"],"applicationName":"espresso","executableName":"pw.x"},"pw_vc-relax":{"input":[{"name":"pw_vc_relax.in"}],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor","final_structure"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_kpt_conv":{"input":[{"name":"pw_scf_kpt_conv.in"}],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"}}},"q2r.x":{"monitors":["standard_output"],"results":[],"flavors":{"q2r":{"input":[{"name":"q2r.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"q2r.x"}}},"hp.x":{"monitors":["standard_output"],"results":["hubbard_u","hubbard_v_nn","hubbard_v"],"flavors":{"hp":{"input":[{"name":"hp.in"}],"results":["hubbard_u","hubbard_v_nn","hubbard_v"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"hp.x"}},"supportedApplicationVersions":["7.0","7.2"]},"epsilon.x":{"monitors":["standard_output"],"results":["dielectric_tensor"],"flavors":{"dielectric_tensor":{"input":[{"name":"epsilon.in"}],"results":["dielectric_tensor"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"epsilon.x"}}}},"jupyterLab":{"jupyter":{"isDefault":true,"monitors":["standard_output","jupyter_notebook_endpoint"],"results":[],"flavors":{"notebook":{"isDefault":true,"input":[{"name":"requirements.txt","templateName":"requirements.txt"}],"monitors":["standard_output","jupyter_notebook_endpoint"],"applicationName":"jupyterLab","executableName":"jupyter"}}}},"exabyteml":{"score":{"isDefault":false,"monitors":["standard_output"],"results":["predicted_properties"],"flavors":{"score":{"isDefault":true,"input":[],"monitors":["standard_output"],"applicationName":"exabyteml","executableName":"score"}}},"train":{"isDefault":true,"monitors":["standard_output"],"results":["workflow:ml_predict"],"flavors":{"train":{"isDefault":true,"input":[],"monitors":["standard_output"],"applicationName":"exabyteml","executableName":"train"}}}},"nwchem":{"nwchem":{"isDefault":true,"hasAdvancedComputeOptions":false,"postProcessors":["error_handler"],"monitors":["standard_output"],"results":["total_energy","total_energy_contributions"],"flavors":{"nwchem_total_energy":{"isDefault":true,"input":[{"name":"nwchem_total_energy.inp"}],"results":["total_energy","total_energy_contributions"],"monitors":["standard_output"],"applicationName":"nwchem","executableName":"nwchem"}}}},"python":{"python":{"isDefault":true,"monitors":["standard_output"],"results":["file_content","workflow:pyml_predict"],"flavors":{"hello_world":{"isDefault":true,"input":[{"name":"script.py","templateName":"hello_world.py"},{"name":"requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"espresso_xml_get_qpt_irr":{"input":[{"name":"espresso_xml_get_qpt_irr.py"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"espresso_extract_kpoints":{"input":[{"name":"espresso_extract_kpoints.py"},{"name":"requirements.txt","templateName":"requirements_empty.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"generic:post_processing:plot:matplotlib":{"input":[{"name":"plot.py","templateName":"matplotlib_basic.py"},{"name":"requirements.txt","templateName":"processing_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"generic:processing:find_extrema:scipy":{"input":[{"name":"find_extrema.py","templateName":"find_extrema.py"},{"name":"requirements.txt","templateName":"processing_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:setup_variables_packages":{"input":[{"name":"settings.py","templateName":"pyml_settings.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:custom":{"input":[{"name":"pyml_custom.py","templateName":"pyml_custom.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:data_input:read_csv:pandas":{"input":[{"name":"data_input_read_csv_pandas.py","templateName":"data_input_read_csv_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:data_input:train_test_split:sklearn":{"input":[{"name":"data_input_train_test_split_sklearn.py","templateName":"data_input_train_test_split_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:min_max_scaler:sklearn":{"input":[{"name":"pre_processing_min_max_sklearn.py","templateName":"pre_processing_min_max_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:remove_duplicates:pandas":{"input":[{"name":"pre_processing_remove_duplicates_pandas.py","templateName":"pre_processing_remove_duplicates_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:remove_missing:pandas":{"input":[{"name":"pre_processing_remove_missing_pandas.py","templateName":"pre_processing_remove_missing_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:standardization:sklearn":{"input":[{"name":"pre_processing_standardization_sklearn.py","templateName":"pre_processing_standardization_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:adaboosted_trees_regression:sklearn":{"input":[{"name":"model_adaboosted_trees_regression_sklearn.py","templateName":"model_adaboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"results":["workflow:pyml_predict"],"applicationName":"python","executableName":"python"},"pyml:model:bagged_trees_regression:sklearn":{"input":[{"name":"model_bagged_trees_regression_sklearn.py","templateName":"model_bagged_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:gradboosted_trees_regression:sklearn":{"input":[{"name":"model_gradboosted_trees_regression_sklearn.py","templateName":"model_gradboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:extreme_gradboosted_trees_regression:sklearn":{"input":[{"name":"model_extreme_gradboosted_trees_regression_sklearn.py","templateName":"model_extreme_gradboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:k_means_clustering:sklearn":{"input":[{"name":"model_k_means_clustering_sklearn.py","templateName":"model_k_means_clustering_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:kernel_ridge_regression:sklearn":{"input":[{"name":"model_kernel_ridge_regression_sklearn.py","templateName":"model_kernel_ridge_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:lasso_regression:sklearn":{"input":[{"name":"model_lasso_regression_sklearn.py","templateName":"model_lasso_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:multilayer_perceptron:sklearn":{"input":[{"name":"model_mlp_sklearn.py","templateName":"model_mlp_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:random_forest_classification:sklearn":{"input":[{"name":"model_random_forest_classification_sklearn.py","templateName":"model_random_forest_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:gradboosted_trees_classification:sklearn":{"input":[{"name":"model_gradboosted_trees_classification_sklearn.py","templateName":"model_gradboosted_trees_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:extreme_gradboosted_trees_classification:sklearn":{"input":[{"name":"model_extreme_gradboosted_trees_classification_sklearn.py","templateName":"model_extreme_gradboosted_trees_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:random_forest_regression:sklearn":{"input":[{"name":"model_random_forest_regression_sklearn.py","templateName":"model_random_forest_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:ridge_regression:sklearn":{"input":[{"name":"model_ridge_regression_sklearn.py","templateName":"model_ridge_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:parity_plot:matplotlib":{"input":[{"name":"post_processing_parity_plot_matplotlib.py","templateName":"post_processing_parity_plot_matplotlib.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:pca_2d_clusters:matplotlib":{"input":[{"name":"post_processing_pca_2d_clusters_matplotlib.py","templateName":"post_processing_pca_2d_clusters_matplotlib.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:roc_curve:sklearn":{"input":[{"name":"post_processing_roc_curve_sklearn.py","templateName":"post_processing_roc_curve_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"}}}},"shell":{"sh":{"isDefault":true,"monitors":["standard_output"],"results":["atomic_forces","band_gaps","band_structure","density_of_states","fermi_energy","phonon_dispersions","phonon_dos","pressure","stress_tensor","total_energy","total_energy_contributions","total_force","zero_point_energy","final_structure","magnetic_moments","reaction_energy_barrier","reaction_energy_profile","potential_profile","charge_density_profile"],"flavors":{"hello_world":{"isDefault":true,"input":[{"name":"hello_world.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"job_espresso_pw_scf":{"input":[{"name":"job_espresso_pw_scf.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"espresso_link_outdir_save":{"input":[{"name":"espresso_link_outdir_save.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"espresso_collect_dynmat":{"input":[{"name":"espresso_collect_dynmat.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"bash_vasp_prepare_neb_images":{"isMultiMaterial":true,"input":[{"name":"bash_vasp_prepare_neb_images.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"}}}},"vasp":{"vasp":{"isDefault":true,"postProcessors":["error_handler","prepare_restart","remove_non_zero_weight_kpoints"],"monitors":["standard_output","convergence_ionic","convergence_electronic"],"results":["atomic_forces","band_gaps","band_structure","density_of_states","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force","zero_point_energy","final_structure","magnetic_moments","reaction_energy_barrier","reaction_energy_profile","potential_profile","charge_density_profile"],"flavors":{"vasp":{"isDefault":true,"input":[{"name":"INCAR"},{"name":"KPOINTS"},{"name":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_bands":{"input":[{"name":"INCAR","templateName":"INCAR_BANDS"},{"name":"KPOINTS","templateName":"KPOINTS_BANDS"},{"name":"POSCAR","templateName":""}],"results":["band_structure"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_nscf":{"input":[{"name":"INCAR","templateName":"INCAR_BANDS"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["band_gaps","fermi_energy"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_HSE"},{"name":"KPOINTS"},{"name":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_bands_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_BANDS_HSE"},{"name":"KPOINTS","templateName":"KPOINTS_BANDS"},{"name":"POSCAR","templateName":""}],"results":["band_structure"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_nscf_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_BANDS_HSE"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["band_gaps","fermi_energy"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_relax":{"input":[{"name":"INCAR","templateName":"INCAR_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","atomic_forces","fermi_energy","pressure","stress_tensor","total_force","final_structure"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"postProcessors":["prepare_restart"],"applicationName":"vasp","executableName":"vasp"},"vasp_vc_relax":{"input":[{"name":"INCAR","templateName":"INCAR_VC_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","atomic_forces","fermi_energy","pressure","stress_tensor","total_force","final_structure"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"postProcessors":["prepare_restart"],"applicationName":"vasp","executableName":"vasp"},"vasp_zpe":{"input":[{"name":"INCAR","templateName":"INCAR_ZPE"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","fermi_energy","pressure","atomic_forces","stress_tensor","total_force","zero_point_energy"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"applicationName":"vasp","executableName":"vasp"},"vasp_kpt_conv":{"input":[{"name":"INCAR","templateName":"INCAR"},{"name":"KPOINTS","templateName":"KPOINTS_CONV"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_vc_relax_conv":{"input":[{"name":"INCAR","templateName":"INCAR_VC_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS_CONV"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB"},{"name":"KPOINTS","templateName":"KPOINTS"}],"results":["reaction_energy_barrier","reaction_energy_profile"],"monitors":["standard_output"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb_initial":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB_INITIAL_FINAL"},{"name":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR_NEB_INITIAL"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb_final":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB_INITIAL_FINAL"},{"name":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR_NEB_FINAL"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"}}}}}} +module.exports = {applicationTree: {"deepmd":{"dp_lmp":{"isDefault":false,"monitors":["standard_output"],"results":[],"flavors":{"dp_lmp":{"input":[{"name":"in.lammps"},{"name":"structure.lmp"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"dp_lmp"}}},"dp_prepare":{"isDefault":true,"monitors":["standard_output"],"results":[],"flavors":{"qe_cp_to_deepmd":{"isDefault":true,"input":[{"name":"qe_cp_to_deepmd.py"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"dp_prepare"}}},"dp_train":{"isDefault":false,"monitors":["standard_output"],"results":[],"flavors":{"dp_train_se_e2_r":{"input":[{"name":"dp_train_se_e2_r.json"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"dp_train"}}}},"espresso":{"average.x":{"monitors":["standard_output"],"results":["average_potential_profile"],"flavors":{"average":{"input":[{"name":"average.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"average.x"},"average_potential":{"input":[{"name":"average.in"}],"results":["average_potential_profile"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"average.x"}}},"bands.x":{"monitors":["standard_output"],"results":["band_structure"],"flavors":{"bands":{"input":[{"name":"bands.in"}],"results":["band_structure"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"bands.x"}}},"dos.x":{"monitors":["standard_output"],"results":["density_of_states"],"flavors":{"dos":{"input":[{"name":"dos.in"}],"results":["density_of_states"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"dos.x"}}},"dynmat.x":{"monitors":["standard_output"],"results":[],"flavors":{"dynmat":{"input":[{"name":"dynmat_grid.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"dynmat.x"}}},"gw.x":{"monitors":["standard_output"],"results":["band_structure","fermi_energy","band_gaps"],"flavors":{"gw_bands_plasmon_pole":{"input":[{"name":"gw_bands_plasmon_pole.in"}],"results":["band_structure","fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"gw.x"},"gw_bands_full_frequency":{"input":[{"name":"gw_bands_full_frequency.in"}],"results":["band_structure","fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"gw.x"}}},"matdyn.x":{"monitors":["standard_output"],"results":["phonon_dos","phonon_dispersions"],"flavors":{"matdyn_grid":{"input":[{"name":"matdyn_grid.in"}],"monitors":["standard_output"],"results":["phonon_dos"],"applicationName":"espresso","executableName":"matdyn.x"},"matdyn_path":{"input":[{"name":"matdyn_path.in"}],"monitors":["standard_output"],"results":["phonon_dispersions"],"applicationName":"espresso","executableName":"matdyn.x"}}},"neb.x":{"monitors":["standard_output"],"results":["reaction_energy_barrier","reaction_energy_profile"],"flavors":{"neb":{"isMultiMaterial":true,"input":[{"name":"neb.in"}],"results":["reaction_energy_barrier","reaction_energy_profile"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"neb.x"}}},"ph.x":{"monitors":["standard_output"],"results":["phonon_dos","phonon_dispersions","zero_point_energy"],"flavors":{"ph_path":{"input":[{"name":"ph_path.in"}],"results":["phonon_dispersions"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_grid":{"input":[{"name":"ph_grid.in"}],"results":["phonon_dos"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_gamma":{"input":[{"name":"ph_gamma.in"}],"results":["zero_point_energy"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_init_qpoints":{"input":[{"name":"ph_init_qpoints.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_grid_restart":{"input":[{"name":"ph_grid_restart.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_single_irr_qpt":{"input":[{"name":"ph_single_irr_qpt.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"}}},"pp.x":{"monitors":["standard_output"],"results":[],"flavors":{"pp_density":{"input":[{"name":"pp_density.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pp.x"},"pp_electrostatic_potential":{"input":[{"name":"pp_electrostatic_potential.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pp.x"}}},"projwfc.x":{"monitors":["standard_output"],"results":["density_of_states"],"flavors":{"projwfc":{"input":[{"name":"projwfc.in"}],"results":["density_of_states"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"projwfc.x"}}},"pw.x":{"isDefault":true,"hasAdvancedComputeOptions":true,"postProcessors":["remove_non_zero_weight_kpoints"],"monitors":["standard_output","convergence_ionic","convergence_electronic"],"results":["atomic_forces","band_gaps","charge_density_profile","density_of_states","fermi_energy","final_structure","magnetic_moments","potential_profile","pressure","reaction_energy_barrier","reaction_energy_profile","stress_tensor","total_energy","total_energy_contributions","total_force"],"flavors":{"pw_scf":{"isDefault":true,"input":[{"name":"pw_scf.in"}],"results":["atomic_forces","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_bands_hse":{"input":[{"name":"pw_scf_bands_hse.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_hse":{"input":[{"name":"pw_scf_hse.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_dft_u":{"input":[{"name":"pw_scf_dft_u.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u+v":{"input":[{"name":"pw_scf_dft_v.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u+j":{"input":[{"name":"pw_scf_dft_j.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u_legacy":{"input":[{"name":"pw_scf_dft_u_legacy.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["5.2.1","5.4.0","6.0.0","6.3","6.4.1","6.5.0","6.6.0","6.7.0","6.8.0","7.0"]},"pw_esm":{"input":[{"name":"pw_esm.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor","potential_profile","charge_density_profile"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_esm_relax":{"input":[{"name":"pw_esm_relax.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor","potential_profile","charge_density_profile"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_nscf":{"input":[{"name":"pw_nscf.in"}],"results":["fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pw.x"},"pw_bands":{"input":[{"name":"pw_bands.in"}],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pw.x"},"pw_relax":{"input":[{"name":"pw_relax.in"}],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor","final_structure"],"applicationName":"espresso","executableName":"pw.x"},"pw_vc-relax":{"input":[{"name":"pw_vc_relax.in"}],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor","final_structure"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_kpt_conv":{"input":[{"name":"pw_scf_kpt_conv.in"}],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"}}},"q2r.x":{"monitors":["standard_output"],"results":[],"flavors":{"q2r":{"input":[{"name":"q2r.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"q2r.x"}}},"hp.x":{"monitors":["standard_output"],"results":["hubbard_u","hubbard_v_nn","hubbard_v"],"flavors":{"hp":{"input":[{"name":"hp.in"}],"results":["hubbard_u","hubbard_v_nn","hubbard_v"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"hp.x"}},"supportedApplicationVersions":["7.0","7.2"]},"epsilon.x":{"monitors":["standard_output"],"results":["dielectric_tensor"],"flavors":{"dielectric_tensor":{"input":[{"name":"epsilon.in"}],"results":["dielectric_tensor"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"epsilon.x"}}}},"jupyterLab":{"jupyter":{"isDefault":true,"monitors":["standard_output","jupyter_notebook_endpoint"],"results":[],"flavors":{"notebook":{"isDefault":true,"input":[{"name":"requirements.txt","templateName":"requirements.txt"}],"monitors":["standard_output","jupyter_notebook_endpoint"],"applicationName":"jupyterLab","executableName":"jupyter"}}}},"exabyteml":{"score":{"isDefault":false,"monitors":["standard_output"],"results":["predicted_properties"],"flavors":{"score":{"isDefault":true,"input":[],"monitors":["standard_output"],"applicationName":"exabyteml","executableName":"score"}}},"train":{"isDefault":true,"monitors":["standard_output"],"results":["workflow:ml_predict"],"flavors":{"train":{"isDefault":true,"input":[],"monitors":["standard_output"],"applicationName":"exabyteml","executableName":"train"}}}},"nwchem":{"nwchem":{"isDefault":true,"hasAdvancedComputeOptions":false,"postProcessors":["error_handler"],"monitors":["standard_output"],"results":["total_energy","total_energy_contributions"],"flavors":{"nwchem_total_energy":{"isDefault":true,"input":[{"name":"nwchem_total_energy.inp"}],"results":["total_energy","total_energy_contributions"],"monitors":["standard_output"],"applicationName":"nwchem","executableName":"nwchem"}}}},"python":{"python":{"isDefault":true,"monitors":["standard_output"],"results":["file_content","workflow:pyml_predict"],"flavors":{"hello_world":{"isDefault":true,"input":[{"name":"script.py","templateName":"hello_world.py"},{"name":"requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"espresso_xml_get_qpt_irr":{"input":[{"name":"espresso_xml_get_qpt_irr.py"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"espresso_extract_kpoints":{"input":[{"name":"espresso_extract_kpoints.py"},{"name":"requirements.txt","templateName":"requirements_empty.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"generic:post_processing:plot:matplotlib":{"input":[{"name":"plot.py","templateName":"matplotlib_basic.py"},{"name":"requirements.txt","templateName":"processing_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"generic:processing:find_extrema:scipy":{"input":[{"name":"find_extrema.py","templateName":"find_extrema.py"},{"name":"requirements.txt","templateName":"processing_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:setup_variables_packages":{"input":[{"name":"settings.py","templateName":"pyml_settings.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:custom":{"input":[{"name":"pyml_custom.py","templateName":"pyml_custom.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:data_input:read_csv:pandas":{"input":[{"name":"data_input_read_csv_pandas.py","templateName":"data_input_read_csv_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:data_input:train_test_split:sklearn":{"input":[{"name":"data_input_train_test_split_sklearn.py","templateName":"data_input_train_test_split_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:min_max_scaler:sklearn":{"input":[{"name":"pre_processing_min_max_sklearn.py","templateName":"pre_processing_min_max_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:remove_duplicates:pandas":{"input":[{"name":"pre_processing_remove_duplicates_pandas.py","templateName":"pre_processing_remove_duplicates_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:remove_missing:pandas":{"input":[{"name":"pre_processing_remove_missing_pandas.py","templateName":"pre_processing_remove_missing_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:standardization:sklearn":{"input":[{"name":"pre_processing_standardization_sklearn.py","templateName":"pre_processing_standardization_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:adaboosted_trees_regression:sklearn":{"input":[{"name":"model_adaboosted_trees_regression_sklearn.py","templateName":"model_adaboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"results":["workflow:pyml_predict"],"applicationName":"python","executableName":"python"},"pyml:model:bagged_trees_regression:sklearn":{"input":[{"name":"model_bagged_trees_regression_sklearn.py","templateName":"model_bagged_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:gradboosted_trees_regression:sklearn":{"input":[{"name":"model_gradboosted_trees_regression_sklearn.py","templateName":"model_gradboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:extreme_gradboosted_trees_regression:sklearn":{"input":[{"name":"model_extreme_gradboosted_trees_regression_sklearn.py","templateName":"model_extreme_gradboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:k_means_clustering:sklearn":{"input":[{"name":"model_k_means_clustering_sklearn.py","templateName":"model_k_means_clustering_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:kernel_ridge_regression:sklearn":{"input":[{"name":"model_kernel_ridge_regression_sklearn.py","templateName":"model_kernel_ridge_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:lasso_regression:sklearn":{"input":[{"name":"model_lasso_regression_sklearn.py","templateName":"model_lasso_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:multilayer_perceptron:sklearn":{"input":[{"name":"model_mlp_sklearn.py","templateName":"model_mlp_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:random_forest_classification:sklearn":{"input":[{"name":"model_random_forest_classification_sklearn.py","templateName":"model_random_forest_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:gradboosted_trees_classification:sklearn":{"input":[{"name":"model_gradboosted_trees_classification_sklearn.py","templateName":"model_gradboosted_trees_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:extreme_gradboosted_trees_classification:sklearn":{"input":[{"name":"model_extreme_gradboosted_trees_classification_sklearn.py","templateName":"model_extreme_gradboosted_trees_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:random_forest_regression:sklearn":{"input":[{"name":"model_random_forest_regression_sklearn.py","templateName":"model_random_forest_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:ridge_regression:sklearn":{"input":[{"name":"model_ridge_regression_sklearn.py","templateName":"model_ridge_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:parity_plot:matplotlib":{"input":[{"name":"post_processing_parity_plot_matplotlib.py","templateName":"post_processing_parity_plot_matplotlib.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:pca_2d_clusters:matplotlib":{"input":[{"name":"post_processing_pca_2d_clusters_matplotlib.py","templateName":"post_processing_pca_2d_clusters_matplotlib.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:roc_curve:sklearn":{"input":[{"name":"post_processing_roc_curve_sklearn.py","templateName":"post_processing_roc_curve_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"}}}},"shell":{"sh":{"isDefault":true,"monitors":["standard_output"],"results":["atomic_forces","band_gaps","band_structure","density_of_states","fermi_energy","phonon_dispersions","phonon_dos","pressure","stress_tensor","total_energy","total_energy_contributions","total_force","zero_point_energy","final_structure","magnetic_moments","reaction_energy_barrier","reaction_energy_profile","potential_profile","charge_density_profile"],"flavors":{"hello_world":{"isDefault":true,"input":[{"name":"hello_world.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"job_espresso_pw_scf":{"input":[{"name":"job_espresso_pw_scf.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"espresso_link_outdir_save":{"input":[{"name":"espresso_link_outdir_save.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"espresso_collect_dynmat":{"input":[{"name":"espresso_collect_dynmat.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"bash_vasp_prepare_neb_images":{"isMultiMaterial":true,"input":[{"name":"bash_vasp_prepare_neb_images.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"}}}},"vasp":{"vasp":{"isDefault":true,"postProcessors":["error_handler","prepare_restart","remove_non_zero_weight_kpoints"],"monitors":["standard_output","convergence_ionic","convergence_electronic"],"results":["atomic_forces","band_gaps","band_structure","density_of_states","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force","zero_point_energy","final_structure","magnetic_moments","reaction_energy_barrier","reaction_energy_profile","potential_profile","charge_density_profile"],"flavors":{"vasp":{"isDefault":true,"input":[{"name":"INCAR"},{"name":"KPOINTS"},{"name":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_bands":{"input":[{"name":"INCAR","templateName":"INCAR_BANDS"},{"name":"KPOINTS","templateName":"KPOINTS_BANDS"},{"name":"POSCAR","templateName":""}],"results":["band_structure"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_nscf":{"input":[{"name":"INCAR","templateName":"INCAR_BANDS"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["band_gaps","fermi_energy"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_HSE"},{"name":"KPOINTS"},{"name":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_bands_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_BANDS_HSE"},{"name":"KPOINTS","templateName":"KPOINTS_BANDS"},{"name":"POSCAR","templateName":""}],"results":["band_structure"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_nscf_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_BANDS_HSE"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["band_gaps","fermi_energy"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_relax":{"input":[{"name":"INCAR","templateName":"INCAR_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","atomic_forces","fermi_energy","pressure","stress_tensor","total_force","final_structure"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"postProcessors":["prepare_restart"],"applicationName":"vasp","executableName":"vasp"},"vasp_vc_relax":{"input":[{"name":"INCAR","templateName":"INCAR_VC_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","atomic_forces","fermi_energy","pressure","stress_tensor","total_force","final_structure"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"postProcessors":["prepare_restart"],"applicationName":"vasp","executableName":"vasp"},"vasp_zpe":{"input":[{"name":"INCAR","templateName":"INCAR_ZPE"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","fermi_energy","pressure","atomic_forces","stress_tensor","total_force","zero_point_energy"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"applicationName":"vasp","executableName":"vasp"},"vasp_kpt_conv":{"input":[{"name":"INCAR","templateName":"INCAR"},{"name":"KPOINTS","templateName":"KPOINTS_CONV"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_vc_relax_conv":{"input":[{"name":"INCAR","templateName":"INCAR_VC_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS_CONV"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB"},{"name":"KPOINTS","templateName":"KPOINTS"}],"results":["reaction_energy_barrier","reaction_energy_profile"],"monitors":["standard_output"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb_initial":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB_INITIAL_FINAL"},{"name":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR_NEB_INITIAL"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb_final":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB_INITIAL_FINAL"},{"name":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR_NEB_FINAL"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"}}}}}} From 30bfe3ac1a9fc57fab8728f2ff00a5e81b150c3d Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Tue, 9 Jan 2024 20:17:59 +0800 Subject: [PATCH 11/20] SOF-7194: fix qe input file prefix --- assets/deepmd/qe_cp_to_deepmd.j2.py | 2 +- executables/deepmd/dp_lmp.yml | 2 +- src/js/data/templates.js | 2 +- src/js/data/tree.js | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/assets/deepmd/qe_cp_to_deepmd.j2.py b/assets/deepmd/qe_cp_to_deepmd.j2.py index 50bc1130..d092102b 100644 --- a/assets/deepmd/qe_cp_to_deepmd.j2.py +++ b/assets/deepmd/qe_cp_to_deepmd.j2.py @@ -2,7 +2,7 @@ import numpy as np # https://docs.deepmodeling.com/projects/dpdata/en/master/formats/QECPTrajFormat.html -data = dpdata.LabeledSystem("generate_dft_data", fmt="qe/cp/traj") +data = dpdata.LabeledSystem("cp", fmt="qe/cp/traj") print("Dataset contains total {0} frames".format(len(data))) # randomly choose 20% index for validation_data diff --git a/executables/deepmd/dp_lmp.yml b/executables/deepmd/dp_lmp.yml index 39dd0f78..1a2e71ca 100644 --- a/executables/deepmd/dp_lmp.yml +++ b/executables/deepmd/dp_lmp.yml @@ -3,7 +3,7 @@ monitors: - standard_output results: [] flavors: - dp_lmp: + lammps: input: - name: in.lammps - name: structure.lmp diff --git a/src/js/data/templates.js b/src/js/data/templates.js index d2e94b35..8b0ff686 100644 --- a/src/js/data/templates.js +++ b/src/js/data/templates.js @@ -1,2 +1,2 @@ /* eslint-disable */ -module.exports = {allTemplates: [{"content":"# LAMMPS input, deepmd-kit version. Built from bulk water calculation.\n\nunits metal\nboundary p p p\natom_style atomic\n\nneighbor 2.0 bin\nneigh_modify every 10 delay 0 check no\n\nread_data structure.lmp\nmass 1 16\nmass 2 2\n\npair_style\t deepmd ./graph.pb\npair_coeff * *\n\nvelocity all create 330.0 23456789\n\nfix 1 all nvt temp 330.0 330.0 0.5\ntimestep 0.0005\nthermo_style custom step pe ke etotal temp press vol\nthermo 100\ndump 1 all custom 100 structure.dump id type x y z\n\nrun 100\n","name":"in.lammps","contextProviders":[],"applicationName":"deepmd","executableName":"dp_lmp"},{"content":"# LAMMPS data\n192 atoms\n2 atom types\n0.0 12.44470 xlo xhi\n0.0 12.44470 ylo yhi\n0.0 12.44470 zlo zhi\n0.0 0.0 0.0 xy xz yz\n\nAtoms # metal\n\n1\t1\t11.83 2.56 2.18\n2\t1\t5.36 6 1.81\n3\t1\t12.17 7.34 2.71\n4\t1\t3.38 4.28 4.84\n5\t1\t4.54 11.73 11.9\n6\t1\t7.34 0.18 2.18\n7\t1\t5.73 9.9 10.31\n8\t1\t5.32 11.19 1.83\n9\t1\t9.39 11.14 1.1\n10\t1\t12.06 0.64 8.92\n11\t1\t5.01 7.72 11.74\n12\t1\t3.64 3.19 1.08\n13\t1\t11.88 10.9 4.7\n14\t1\t3.88 8.05 2.51\n15\t1\t9.82 9.16 10.18\n16\t1\t11 5.31 5.17\n17\t1\t0.97 4.12 0.84\n18\t1\t1.24 5.99 4.76\n19\t1\t10.1 9.09 3.32\n20\t1\t8.39 5.72 10.39\n21\t1\t8.06 4.72 1.3\n22\t1\t3.23 3.34 8.26\n23\t1\t5.02 6.21 7.29\n24\t1\t3.94 10.55 3.95\n25\t1\t6.25 4.56 4.21\n26\t1\t12.18 2.35 11.11\n27\t1\t6.76 8.57 4.07\n28\t1\t5.87 5.84 9.79\n29\t1\t2.18 6.91 6.99\n30\t1\t10.89 7.69 6.78\n31\t1\t8.03 4.28 6.57\n32\t1\t5.29 1.92 11.56\n33\t1\t1.53 12.16 3.45\n34\t1\t2.04 5.49 9.71\n35\t1\t6.66 9.05 1.21\n36\t1\t11.75 6.18 11.01\n37\t1\t6.11 3.07 7.96\n38\t1\t8.22 1.38 9.43\n39\t1\t1.31 2.02 7.09\n40\t1\t9.48 2.11 5.92\n41\t1\t9.33 3.34 10.97\n42\t1\t1.31 8.49 0.81\n43\t1\t11.25 3.48 7.51\n44\t1\t11.7 0.33 0.6\n45\t1\t7.63 2.04 0.28\n46\t1\t8.86 9.52 7.4\n47\t1\t2.01 11.54 6.89\n48\t1\t8.02 11.27 4.66\n49\t1\t5.82 9.18 7.71\n50\t1\t8.11 11.19 11.22\n51\t1\t5.94 0.84 5.91\n52\t1\t1.88 11.66 0.64\n53\t1\t4.66 11.28 6.73\n54\t1\t10.32 6.26 0.89\n55\t1\t2.01 11.07 10.33\n56\t1\t5.36 2.06 3.14\n57\t1\t8.56 7.59 12.17\n58\t1\t10.51 5.81 8.66\n59\t1\t2.37 6.23 12.1\n60\t1\t0.3 8.77 10.05\n61\t1\t9.47 12.13 7.57\n62\t1\t1.41 2.32 4.17\n63\t1\t9.69 3.69 3.56\n64\t1\t0.75 9.22 7.39\n65\t2\t11.09 2.87 2.74\n66\t2\t5.51 5.51 2.6\n67\t2\t0.24 6.8 3.29\n68\t2\t4.28 4.19 4.46\n69\t2\t3.63 11.56 11.76\n70\t2\t6.53 12.07 1.99\n71\t2\t5.37 9.14 10.85\n72\t2\t5.74 10.31 1.64\n73\t2\t8.71 11.07 0.41\n74\t2\t0.27 1.04 8.27\n75\t2\t5.46 7.08 11.15\n76\t2\t3.84 4.17 1.06\n77\t2\t11.55 10.19 4.15\n78\t2\t3 8.17 2.14\n79\t2\t9.53 10.04 10.56\n80\t2\t11.18 4.84 6.05\n81\t2\t0.68 3.76 12.44\n82\t2\t0.43 5.51 4.88\n83\t2\t9.51 9.48 3.94\n84\t2\t9.08 5.89 9.68\n85\t2\t7.92 3.83 0.8\n86\t2\t4.23 3.22 8.21\n87\t2\t5.27 5.9 8.19\n88\t2\t4.44 10.9 3.14\n89\t2\t6.93 4.56 4.89\n90\t2\t11.88 1.61 11.75\n91\t2\t6.79 8.54 3.09\n92\t2\t5.62 4.99 10.13\n93\t2\t3.09 6.66 6.98\n94\t2\t11.73 8.22 6.86\n95\t2\t8.4 3.45 6.18\n96\t2\t4.64 2.37 12.16\n97\t2\t0.74 11.69 3.86\n98\t2\t1.1 5.3 9.72\n99\t2\t7.54 9.11 0.83\n100\t2\t11.36 6.2 11.9\n101\t2\t5.85 2.21 7.53\n102\t2\t8.81 2.01 9.89\n103\t2\t2.17 2.43 7.46\n104\t2\t10.1 2.53 6.55\n105\t2\t9.07 4.29 10.91\n106\t2\t0.64 8.13 1.45\n107\t2\t11.12 4.2 8.14\n108\t2\t11.03 12.03 0.58\n109\t2\t6.83 1.84 12.19\n110\t2\t9.13 10.45 7.25\n111\t2\t1.76 12.44 6.8\n112\t2\t7.37 11.88 5.08\n113\t2\t5.59 8.29 7.44\n114\t2\t8.14 12.12 10.86\n115\t2\t5.48 0.06 6.32\n116\t2\t0.99 11.99 0.41\n117\t2\t4.54 10.95 5.79\n118\t2\t10.94 6.29 1.65\n119\t2\t2.52 10.55 9.76\n120\t2\t4.74 2.41 2.42\n121\t2\t8.84 8.27 11.56\n122\t2\t10.89 6.06 9.58\n123\t2\t2.15 5.99 11.18\n124\t2\t0.49 9.06 9.13\n125\t2\t8.69 12.34 8.15\n126\t2\t1.76 1.44 4.07\n127\t2\t10.05 4.44 4.1\n128\t2\t1.44 8.49 7.32\n129\t2\t12.25 3.32 1.68\n130\t2\t6.27 6.22 1.56\n131\t2\t11.5 7.85 3.14\n132\t2\t2.96 3.44 4.61\n133\t2\t5.04 11.17 11.3\n134\t2\t7.6 12.39 3.03\n135\t2\t5.94 9.68 9.37\n136\t2\t4.93 11.46 0.96\n137\t2\t8.92 11.68 1.69\n138\t2\t12.07 1.36 9.54\n139\t2\t4.17 7.28 11.8\n140\t2\t2.67 3.09 1.05\n141\t2\t11.89 10.59 5.56\n142\t2\t4.27 7.23 2.16\n143\t2\t10.8 9.22 10.18\n144\t2\t10.68 6.2 5.44\n145\t2\t1.51 4.9 0.53\n146\t2\t1.94 5.36 4.61\n147\t2\t10.02 9.64 2.47\n148\t2\t8.41 6.43 11.04\n149\t2\t8.58 4.29 2.01\n150\t2\t3.12 4.28 8.41\n151\t2\t5.12 5.54 6.6\n152\t2\t3.97 9.58 3.89\n153\t2\t6.17 3.63 3.9\n154\t2\t11.4 2.92 10.99\n155\t2\t5.96 8.02 4.26\n156\t2\t6.81 5.9 9.83\n157\t2\t1.83 6.59 6.16\n158\t2\t10.19 8.36 6.88\n159\t2\t8.74 4.67 7.12\n160\t2\t4.84 1.08 11.38\n161\t2\t2.38 11.71 3.7\n162\t2\t2.02 6.1 8.92\n163\t2\t6.05 8.48 0.61\n164\t2\t12.12 7.05 10.81\n165\t2\t6.81 3.46 7.31\n166\t2\t7.52 1.99 9.1\n167\t2\t1.25 2.26 6.18\n168\t2\t9.31 1.19 6.35\n169\t2\t8.84 2.9 11.69\n170\t2\t1.39 9.44 0.85\n171\t2\t12.13 3.22 7.5\n172\t2\t11.38 0.95 1.34\n173\t2\t7.43 1.82 1.24\n174\t2\t9.15 9.43 8.28\n175\t2\t2.97 11.66 6.88\n176\t2\t7.5 10.43 4.47\n177\t2\t6.69 9.4 7.47\n178\t2\t7.29 10.77 10.93\n179\t2\t5.45 1.07 5.12\n180\t2\t1.92 11.57 1.62\n181\t2\t5.05 10.55 7.26\n182\t2\t9.73 5.52 1.11\n183\t2\t1.73 11.85 9.82\n184\t2\t6.02 1.57 2.67\n185\t2\t9.34 7.2 0.21\n186\t2\t10.71 6.56 7.97\n187\t2\t1.86 7.03 12.37\n188\t2\t1.08 9.16 10.59\n189\t2\t10.19 12.39 8.15\n190\t2\t0.92 2.56 3.28\n191\t2\t9.34 3.01 4.17\n192\t2\t1.11 10.09 7.21\n","name":"structure.lmp","contextProviders":[],"applicationName":"deepmd","executableName":"dp_lmp"},{"content":"import dpdata\nimport numpy as np\n\n# https://docs.deepmodeling.com/projects/dpdata/en/master/formats/QECPTrajFormat.html\ndata = dpdata.LabeledSystem(\"generate_dft_data\", fmt=\"qe/cp/traj\")\nprint(\"Dataset contains total {0} frames\".format(len(data)))\n\n# randomly choose 20% index for validation_data\nsize = len(data)\nsize_validation = round(size * 0.2)\n\nindex_validation = np.random.choice(size, size=size_validation, replace=False)\nindex_training = list(set(range(size)) - set(index_validation))\n\ndata_training = data.sub_system(index_training)\ndata_validation = data.sub_system(index_validation)\n\nprint(\"Using {0} frames as training set\".format(len(data_training)))\nprint(\"Using {0} frames as validation set\".format(len(data_validation)))\n\n# save training and validation sets\ndata_training.to_deepmd_npy(\"./training\")\ndata_validation.to_deepmd_npy(\"./validation\")\n","name":"qe_cp_to_deepmd.py","contextProviders":[],"applicationName":"deepmd","executableName":"dp_prepare"},{"content":"{\n \"model\": {\n \"type_map\": [\n \"O\",\n \"H\"\n ],\n \"descriptor\": {\n \"type\": \"se_e2_r\",\n \"sel\": [\n 23,\n 46\n ],\n \"rcut_smth\": 0.50,\n \"rcut\": 5.00,\n \"neuron\": [\n 5,\n 5,\n 5\n ],\n \"resnet_dt\": false,\n \"seed\": 1\n },\n \"fitting_net\": {\n \"neuron\": [\n 60,\n 60,\n 60\n ],\n \"resnet_dt\": true,\n \"seed\": 1\n }\n },\n \"learning_rate\": {\n \"type\": \"exp\",\n \"decay_steps\": 1000,\n \"start_lr\": 0.005,\n \"stop_lr\": 3.51e-6\n },\n \"loss\": {\n \"start_pref_e\": 0.02,\n \"limit_pref_e\": 1,\n \"start_pref_f\": 1000,\n \"limit_pref_f\": 1,\n \"start_pref_v\": 0,\n \"limit_pref_v\": 0\n },\n \"training\": {\n \"training_data\": {\n \"systems\": [\n \"./training/\"\n ],\n \"batch_size\": \"auto\"\n },\n \"validation_data\": {\n \"systems\": [\n \"./validation/\"\n ],\n \"batch_size\": \"auto\"\n },\n \"numb_steps\": 301,\n \"seed\": 1,\n \"disp_file\": \"lcurve.out\",\n \"disp_freq\": 10,\n \"numb_test\": 1,\n \"save_freq\": 100\n }\n}\n","name":"dp_train_se_e2_r.json","contextProviders":[],"applicationName":"deepmd","executableName":"dp_train"},{"content":"1\npp.dat\n1.0\n3000\n3\n3.0000\n","name":"average.in","contextProviders":[],"applicationName":"espresso","executableName":"average.x"},{"content":"&BANDS\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n filband = {% raw %}'{{ JOB_WORK_DIR }}/bands.dat'{% endraw %}\n no_overlap = .true.\n/\n\n","name":"bands.in","contextProviders":[],"applicationName":"espresso","executableName":"bands.x"},{"content":"&DOS\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n degauss = 0.01\n/\n\n","name":"dos.in","contextProviders":[],"applicationName":"espresso","executableName":"dos.x"},{"content":"&INPUT\n asr = 'simple'\n flfrc = 'force_constants.fc'\n flfrq = 'frequencies.freq'\n dos = .true.\n fldos = 'phonon_dos.out'\n deltaE = 1.d0\n {% for d in igrid.dimensions -%}\n nk{{loop.index}} = {{d}}\n {% endfor %}\n /\n","name":"dynmat_grid.in","contextProviders":[{"name":"IGridFormDataManager"}],"applicationName":"espresso","executableName":"dynmat.x"},{"content":"&gw_input\n\n ! see http://www.sternheimergw.org for more information.\n\n ! config of the scf run\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n\n ! the grid used for the linear response\n kpt_grid = {{ kgrid.dimensions|join(', ') }}\n qpt_grid = {{ qgrid.dimensions|join(', ') }}\n\n ! truncation (used for both correlation and exchange)\n truncation = '2d'\n\n ! number of bands for which the GW correction is calculated\n num_band = 8\n\n ! configuration of the Coulomb solver\n thres_coul = 1.0d-2\n\n ! configuration of W in the convolution\n model_coul = 'godby-needs'\n max_freq_coul = 120\n num_freq_coul = 35\n\n ! configuration of the Green solver\n thres_green = 1.0d-3\n max_iter_green = 300\n\n ! configuration for the correlation self energy\n ecut_corr = 5.0\n max_freq_corr = 100.0\n num_freq_corr = 11\n\n ! configuration for the exchange self energy\n ecut_exch = 20.0\n\n ! configuration for the output\n eta = 0.1\n min_freq_wind = -30.0\n max_freq_wind = 30.0\n num_freq_wind = 601\n/\n\n&gw_output\n/\n\nFREQUENCIES\n2\n 0.0 0.0\n 0.0 10.0\n/\n\nK_points\n{{ explicitKPath2PIBA.length }}\n{% for point in explicitKPath2PIBA -%}\n{% for coordinate in point.coordinates %}{{ coordinate }}{% endfor %}\n{% endfor %}\n/\n\n","name":"gw_bands_plasmon_pole.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QGridFormDataManager"},{"name":"ExplicitKPath2PIBAFormDataManager"}],"applicationName":"espresso","executableName":"gw.x"},{"content":"&gw_input\n\n ! see http://www.sternheimergw.org for more information.\n\n ! config of the scf run\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n\n ! the grid used for the linear response\n kpt_grid = {{ kgrid.dimensions|join(', ') }}\n qpt_grid = {{ qgrid.dimensions|join(', ') }}\n\n ! number of bands for which the GW correction is calculated\n num_band = 8\n\n ! configuration of W in the convolution\n max_freq_coul = 200\n num_freq_coul = 51\n\n ! configuration for the correlation self energy\n ecut_corr = 6.0\n\n ! configuration for the exchange self energy\n ecut_exch = 15.0\n/\n\n&gw_output\n/\n\nFREQUENCIES\n35\n 0.0 0.0\n 0.0 0.3\n 0.0 0.9\n 0.0 1.8\n 0.0 3.0\n 0.0 4.5\n 0.0 6.3\n 0.0 8.4\n 0.0 10.8\n 0.0 13.5\n 0.0 16.5\n 0.0 19.8\n 0.0 23.4\n 0.0 27.3\n 0.0 31.5\n 0.0 36.0\n 0.0 40.8\n 0.0 45.9\n 0.0 51.3\n 0.0 57.0\n 0.0 63.0\n 0.0 69.3\n 0.0 75.9\n 0.0 82.8\n 0.0 90.0\n 0.0 97.5\n 0.0 105.3\n 0.0 113.4\n 0.0 121.8\n 0.0 130.5\n 0.0 139.5\n 0.0 148.8\n 0.0 158.4\n 0.0 168.3\n 0.0 178.5\n/\n\nK_points\n{{ explicitKPath2PIBA.length }}\n{% for point in explicitKPath2PIBA -%}\n{% for coordinate in point.coordinates %}{{ coordinate }}{% endfor %}\n{% endfor %}\n/\n\n","name":"gw_bands_full_frequency.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QGridFormDataManager"},{"name":"ExplicitKPath2PIBAFormDataManager"}],"applicationName":"espresso","executableName":"gw.x"},{"content":"&INPUT\n asr = 'simple'\n flfrc = 'force_constants.fc'\n flfrq = 'frequencies.freq'\n dos = .true.\n fldos = 'phonon_dos.out'\n deltaE = 1.d0\n {% for d in igrid.dimensions -%}\n nk{{loop.index}} = {{d}}\n {% endfor %}\n /\n","name":"matdyn_grid.in","contextProviders":[{"name":"IGridFormDataManager"}],"applicationName":"espresso","executableName":"matdyn.x"},{"content":"&INPUT\n asr = 'simple'\n flfrc ='force_constants.fc'\n flfrq ='frequencies.freq'\n flvec ='normal_modes.out'\n q_in_band_form = .true.\n /\n{{ipath.length}}\n{% for point in ipath -%}\n{% for d in point.coordinates %}{{d}} {% endfor -%}{{point.steps}}\n{% endfor %}\n","name":"matdyn_path.in","contextProviders":[{"name":"IPathFormDataManager"}],"applicationName":"espresso","executableName":"matdyn.x"},{"content":"BEGIN\nBEGIN_PATH_INPUT\n&PATH\n restart_mode = 'from_scratch'\n string_method = 'neb',\n nstep_path = 50,\n ds = 2.D0,\n opt_scheme = \"broyden\",\n num_of_images = {{ 2 + (input.INTERMEDIATE_IMAGES.length || neb.nImages) }},\n k_max = 0.3D0,\n k_min = 0.2D0,\n CI_scheme = \"auto\",\n path_thr = 0.1D0,\n/\nEND_PATH_INPUT\nBEGIN_ENGINE_INPUT\n&CONTROL\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.03\n nspin = 2\n starting_magnetization = 0.5\n/\n&ELECTRONS\n conv_thr = 1.D-8\n mixing_beta = 0.3\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nBEGIN_POSITIONS\nFIRST_IMAGE\nATOMIC_POSITIONS crystal\n{{ input.FIRST_IMAGE }}\n{%- for IMAGE in input.INTERMEDIATE_IMAGES %}\nINTERMEDIATE_IMAGE\nATOMIC_POSITIONS crystal\n{{ IMAGE }}\n{%- endfor %}\nLAST_IMAGE\nATOMIC_POSITIONS crystal\n{{ input.LAST_IMAGE }}\nEND_POSITIONS\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nEND_ENGINE_INPUT\nEND\n","name":"neb.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"NEBFormDataManager"},{"name":"QENEBInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"neb.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-12\n asr = .true.\n search_sym = .false.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n fildyn = 'dyn'\n ldisp = .true.\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_grid.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-12\n asr = .true.\n search_sym = .false.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n/\n{% for point in qpath -%}\n{% for d in point.coordinates %}{{d}} {% endfor %}\n{% endfor %}\n","name":"ph_path.in","contextProviders":[{"name":"QPathFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-12\n asr = .true.\n search_sym = .false.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n/\n0 0 0\n","name":"ph_gamma.in","contextProviders":[],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-18,\n recover = .false.\n start_irr = 0\n last_irr = 0\n ldisp = .true.\n fildyn = 'dyn0'\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_init_qpoints.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-18,\n recover = .true.\n ldisp = .true.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n fildyn = 'dyn'\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_grid_restart.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-18\n ldisp = .true.\n {% raw -%}\n start_q = {{MAP_DATA.qpoint}}\n last_q = {{MAP_DATA.qpoint}}\n start_irr = {{MAP_DATA.irr}}\n last_irr= {{MAP_DATA.irr}}\n {%- endraw %}\n recover = .true.\n fildyn = 'dyn'\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_SCRATCH_DIR }}/outdir'{% endraw %}\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_single_irr_qpt.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPP\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n filplot = 'pp.dat'\n plot_num = 0\n/\n&PLOT\n iflag = 3\n output_format = 5\n fileout ='density.xsf'\n/\n\n","name":"pp_density.in","contextProviders":[],"applicationName":"espresso","executableName":"pp.x"},{"content":"&INPUTPP\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n filplot = 'pp.dat'\n plot_num = 11\n/\n","name":"pp_electrostatic_potential.in","contextProviders":[],"applicationName":"espresso","executableName":"pp.x"},{"content":"&PROJWFC\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n degauss = 0.01\n deltaE = 0.05\n/\n","name":"projwfc.in","contextProviders":[],"applicationName":"espresso","executableName":"projwfc.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_scf.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n input_dft = 'hse',\n {% for d in qgrid.dimensions -%}\n nqx{{loop.index}} = {{d}}\n {% endfor %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS crystal\n{{ '{{' }} {{ explicitKPath.length }} {% raw %} + KPOINTS|length }} {% endraw %}\n{%- raw %}\n{% for point in KPOINTS -%}\n {% for d in point.coordinates %}{{ \"%14.9f\"|format(d) }} {% endfor -%}{{ point.weight }}\n{% endfor %}\n{% endraw -%}\n{% for point in explicitKPath -%}\n{% for d in point.coordinates %}{{d}} {% endfor -%}0.0000001\n{% endfor %}\n","name":"pw_scf_bands_hse.in","contextProviders":[{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"QGridFormDataManager"},{"name":"ExplicitKPathFormDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n assume_isolated = 'esm'\n esm_bc = '{{ boundaryConditions.type }}'\n fcp_mu = {{ boundaryConditions.targetFermiEnergy }}\n esm_w = {{ boundaryConditions.offset }}\n esm_efield = {{ boundaryConditions.electricField }}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_esm.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"BoundaryConditionsFormDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'relax'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n assume_isolated = 'esm'\n esm_bc = '{{ boundaryConditions.type }}'\n fcp_mu = {{ boundaryConditions.targetFermiEnergy }}\n esm_w = {{ boundaryConditions.offset }}\n esm_efield = {{ boundaryConditions.electricField }}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_esm_relax.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"BoundaryConditionsFormDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% raw %}{{PARAMETER | default('1')}} {{PARAMETER | default('1')}} {{PARAMETER | default('1')}} 0 0 0{% endraw %}\n","name":"pw_scf_kpt_conv.in","contextProviders":[{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'nscf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{input.RESTART_MODE}}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n{%- if subworkflowContext.NO_SYMMETRY_NO_INVERSION %}\n nosym = .true.\n noinv = .true.\n{%- endif %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_nscf.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'relax'\n nstep = 50\n title = ''\n verbosity = 'low'\n restart_mode = 'from_scratch'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_relax.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'vc-relax'\n title = ''\n verbosity = 'low'\n restart_mode = 'from_scratch'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_vc_relax.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'bands'\n title = ''\n verbosity = 'low'\n restart_mode = '{{input.RESTART_MODE}}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS crystal_b\n{{kpath.length}}\n{% for point in kpath -%}\n{% for d in point.coordinates %}{{d}} {% endfor -%}{{point.steps}}\n{% endfor %}\n","name":"pw_bands.in","contextProviders":[{"name":"KPathFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n ecutfock = 100\n occupations = 'smearing'\n degauss = 0.005\n input_dft='hse',\n nqx1 = {% if kgrid.dimensions[0]%2 == 0 %}{{kgrid.dimensions[0]/2}}{% else %}{{(kgrid.dimensions[0]+1)/2}}{% endif %}, nqx2 = {% if kgrid.dimensions[1]%2 == 0 %}{{kgrid.dimensions[1]/2}}{% else %}{{(kgrid.dimensions[1]+1)/2}}{% endif %}, nqx3 = {% if kgrid.dimensions[2]%2 == 0 %}{{kgrid.dimensions[2]/2}}{% else %}{{(kgrid.dimensions[2]+1)/2}}{% endif %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{% if d%2 == 0 %}{{d}} {% else %}{{d+1}} {% endif %}{% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_scf_hse.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nHUBBARD {ortho-atomic}\n{% for row in hubbard_u -%}\nU {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.hubbardUValue }}\n{% endfor -%}\n","name":"pw_scf_dft_u.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardUContextManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nHUBBARD {ortho-atomic}\n{% for row in hubbard_u -%}\nU {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.hubbardUValue }}\n{% endfor -%}\n{% for row in hubbard_v -%}\nV {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.atomicSpecies2 }}-{{ row.atomicOrbital2 }} {{ row.siteIndex }} {{ row.siteIndex2 }} {{ row.hubbardVValue }}\n{% endfor -%}\n","name":"pw_scf_dft_v.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardUContextManager"},{"name":"HubbardVContextManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nHUBBARD {ortho-atomic}\n{% for row in hubbard_j -%}\n{{ row.paramType }} {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.value }}\n{% endfor -%}\n","name":"pw_scf_dft_j.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardJContextManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n lda_plus_u = .true.\n lda_plus_u_kind = 0\n U_projection_type = 'ortho-atomic'\n {%- for row in hubbard_legacy %}\n Hubbard_U({{ row.atomicSpeciesIndex }}) = {{ row.hubbardUValue }}\n {%- endfor %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_scf_dft_u_legacy.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardContextManagerLegacy"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&INPUT\n fildyn = 'dyn'\n zasr = 'simple'\n flfrc = 'force_constants.fc'\n/\n","name":"q2r.in","contextProviders":[],"applicationName":"espresso","executableName":"q2r.x"},{"content":"&inputhp\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n {%- for d in qgrid.dimensions %}\n nq{{ loop.index }} = {{ d }}\n {%- endfor %}\n/\n","name":"hp.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"hp.x"},{"content":"&inputpp\n calculation = \"eps\"\n prefix = \"__prefix__\"\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n/\n\n&energy_grid\n smeartype = \"gauss\"\n intersmear = 0.2\n intrasmear = 0.0\n wmin = 0.0\n wmax = 30.0\n nw = 500\n shift = 0.0\n/\n\n","name":"epsilon.in","contextProviders":[],"applicationName":"espresso","executableName":"epsilon.x"},{"content":"# ------------------------------------------------------------------------------- #\n# #\n# Example JupyterLab requirements #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. A Python virtual environment is created #\n# - in /scratch/$USER/$JOB_ID (for build: 'Default') #\n# - in /export/share/python/ (for build: 'with-pre-installed-packages') #\n# 3. This list is used to populate a Python virtual environment #\n# 4. JupyterLab is started #\n# #\n# For more information visit: #\n# - https://jupyterlab.readthedocs.io/en/stable/index.html #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# Note: With the JupyterLab build 'with-pre-installed-packages', packages #\n# cannot be added during the notebook runtime. #\n# #\n# ------------------------------------------------------------------------------- #\n\njupyterlab==3.0.3\nnotebook>=6.2.0\nexabyte-api-client>=2020.10.19\nnumpy>=1.17.3\npandas>=1.1.4\nurllib3<2\n","name":"requirements.txt","contextProviders":[],"applicationName":"jupyterLab","executableName":"jupyter"},{"content":" start nwchem\n title \"Test\"\n charge {{ input.CHARGE }}\n geometry units au noautosym\n {{ input.ATOMIC_POSITIONS }}\n end\n basis\n * library {{ input.BASIS }}\n end\n dft\n xc {{ input.FUNCTIONAL }}\n mult {{ input.MULT }}\n end\n task dft energy\n","name":"nwchem_total_energy.inp","contextProviders":[{"name":"NWChemInputDataManager"}],"applicationName":"nwchem","executableName":"nwchem"},{"content":"# ---------------------------------------------------------------- #\n# #\n# Example python script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. runtime directory for this calculation is created #\n# 2. requirements.txt is used to create a virtual environment #\n# 3. virtual environment is activated #\n# 4. python process running this script is started #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\nimport pymatgen as mg\n\nsi = mg.Element(\"Si\")\n\nprint(si.atomic_mass)\n","name":"hello_world.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2\n","name":"requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------------ #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# Please add any packages required for this unit below following #\n# the requirements.txt specification: #\n# https://pip.pypa.io/en/stable/reference/requirements-file-format/ #\n# ------------------------------------------------------------------ #\n","name":"requirements_empty.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# -------------------------------------------------------------------------------\n# This script contains a few helpful commands for basic plotting with matplotlib.\n# The commented out blocks are optional suggestions and included for convenience.\n# -------------------------------------------------------------------------------\nimport matplotlib.pyplot as plt\nimport matplotlib.ticker as ticker\nimport numpy as np\n\n\n# Plot Settings\n# -------------\nfigure_size = (6.4, 4.8) # width, height [inches]\ndpi = 100 # resolution [dots-per-inch]\nfont_size_title = 16 # font size of title\nfont_size_axis = 12 # font size of axis label\nfont_size_tick = 12 # font size of tick label\nfont_size_legend = 14 # font size of legend\nx_axis_label = None # label for x-axis\ny_axis_label = None # label for y-axis\ntitle = None # figure title\nshow_legend = False # whether to show legend\nsave_name = \"plot.pdf\" # output filename (with suffix), e.g. 'plot.pdf'\nx_view_limits = {\"left\": None, \"right\": None} # view limits for x-axis\ny_view_limits = {\"top\": None, \"bottom\": None} # view limits for y-axis\nx_tick_spacing = None # custom tick spacing for x-axis (optional)\ny_tick_spacing = None # custom tick spacing for y-axis (optional)\nx_tick_labels = None # custom tick labels for x-axis (optional)\ny_tick_labels = None # custom tick labels for y-axis (optional)\n\n\n# Figure & axes objects\n# ---------------------\nfig = plt.figure(figsize=figure_size, dpi=dpi)\nax = fig.add_subplot(111)\n\n# Example plot (REPLACE ACCORDINGLY)\n# ------------\nx = np.linspace(0, 7, num=100)\ny = np.sin(x)\nax.plot(x, y, \"g-\", zorder=3)\n\n\n# Help lines\n# ----------\n# ax.axhline(y=0, color=\"0.25\", linewidth=0.6, zorder=1)\n# ax.axvline(x=0, color=\"0.25\", linewidth=0.6, zorder=1)\n\n\n# View limits\n# -----------\nax.set_xlim(**x_view_limits)\nax.set_ylim(**y_view_limits)\n\n\n# Grid lines\n# ----------\n# grid_style = {\n# \"linestyle\" : \"dotted\",\n# \"linewidth\" : 0.6,\n# \"color\" : \"0.25\",\n# }\n# ax.grid(**grid_style)\n\n# Custom tick spacing\n# -------------------\n# ax.xaxis.set_major_locator(ticker.MultipleLocator(x_tick_spacing))\n# ax.yaxis.set_major_locator(ticker.MultipleLocator(y_tick_spacing))\n\n# Custom tick labels\n# ------------------\nif x_tick_labels is not None:\n ax.set_xticklabels(x_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\nif y_tick_labels is not None:\n ax.set_yticklabels(y_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\n\n# Other tick settings\n# -------------------\n# ax.tick_params(axis=\"both\", which=\"major\", labelsize=font_size_tick, direction=\"in\")\n# ax.tick_params(axis=\"x\", which=\"major\", pad=10)\n# ax.tick_params(axis=\"x\", which=\"minor\", bottom=False, top=False)\n\n\n# Axis labels\n# -----------\nif x_axis_label is not None:\n ax.set_xlabel(x_axis_label, size=font_size_axis)\nif y_axis_label is not None:\n ax.set_ylabel(y_axis_label, size=font_size_axis)\n\n# Figure title\n# ------------\nif title is not None:\n ax.set_title(title, fontsize=font_size_title)\n\n# Legend\n# ------\nif show_legend:\n ax.legend(prop={'size': font_size_legend})\n\n# Save figure\n# -----------\nif save_name is not None:\n save_format = save_name.split(\".\")[-1]\n fig.savefig(save_name, format=save_format, bbox_inches=\"tight\")\n","name":"matplotlib_basic.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ---------------------------------------------------------- #\n# #\n# This script extracts q-points and irreducible #\n# representations from Quantum ESPRESSO xml data. #\n# #\n# Expects control_ph.xml and patterns.?.xml files to exist #\n# #\n# ---------------------------------------------------------- #\nfrom __future__ import print_function\nimport json\nfrom xml.dom import minidom\n\n{# JOB_WORK_DIR will be initialized at runtime => avoid substituion below #}\n{%- raw -%}\nCONTROL_PH_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/control_ph.xml\"\nPATTERNS_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/patterns.{}.xml\"\n{%- endraw -%}\n\n# get integer content of an xml tag in a document\ndef get_int_by_tag_name(doc, tag_name):\n element = doc.getElementsByTagName(tag_name)\n return int(element[0].firstChild.nodeValue)\n\nvalues = []\n\n# get number of q-points and cycle through them\nxmldoc = minidom.parse(CONTROL_PH_FILENAME)\nnumber_of_qpoints = get_int_by_tag_name(xmldoc, \"NUMBER_OF_Q_POINTS\")\n\nfor i in range(number_of_qpoints):\n # get number of irreducible representations per qpoint\n xmldoc = minidom.parse(PATTERNS_FILENAME.format(i+1))\n number_of_irr_per_qpoint = get_int_by_tag_name(xmldoc, \"NUMBER_IRR_REP\")\n # add each distinct combination of qpoint and irr as a separate entry\n for j in range(number_of_irr_per_qpoint):\n values.append({\n \"qpoint\": i + 1,\n \"irr\": j + 1\n })\n\n# store final values in standard output (STDOUT)\nprint(json.dumps(values, indent=4))\n","name":"espresso_xml_get_qpt_irr.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"import re\nimport json\n\ndouble_regex = r'[-+]?\\d*\\.\\d+(?:[eE][-+]?\\d+)?'\nregex = r\"\\s+k\\(\\s+\\d*\\)\\s+=\\s+\\(\\s+({0})\\s+({0})\\s+({0})\\),\\s+wk\\s+=\\s+({0}).+?\\n\".format(double_regex)\n\nwith open(\"pw_scf.out\") as f:\n text = f.read()\n\npattern = re.compile(regex, re.I | re.MULTILINE)\nmatch = pattern.findall(text[text.rfind(\" cryst. coord.\"):])\nkpoints = [{\"coordinates\": list(map(float, m[:3])), \"weight\": float(m[3])} for m in match]\nprint(json.dumps({\"name\": \"KPOINTS\", \"value\": kpoints, \"scope\": \"global\"}, indent=4))\n","name":"espresso_extract_kpoints.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------- #\n# This script aims to determine extrema for a given array. #\n# Please adjust the parameters according to your data. #\n# Note: This template expects the array to be defined in the #\n# context as 'array_from_context' (see details below). #\n# ----------------------------------------------------------- #\nimport numpy as np\nfrom scipy.signal import find_peaks\nimport json\nfrom munch import Munch\n\n# Data From Context\n# -----------------\n# The array 'array_from_context' is a 1D list (float or int) that has to be defined in\n# a preceding assignment unit in order to be extracted from the context.\n# Example: [0.0, 1.0, 4.0, 3.0]\n# Upon rendering the following Jinja template the extracted array will be inserted.\n{% raw %}Y = np.array({{array_from_context}}){% endraw %}\n\n# Settings\n# --------\nprominence = 0.3 # required prominence in the unit of the data array\n\n# Find Extrema\n# ------------\nmax_indices, _ = find_peaks(Y, prominence=prominence)\nmin_indices, _ = find_peaks(-1 * Y, prominence=prominence)\n\nresult = {\n \"maxima\": Y[max_indices].tolist(),\n \"minima\": Y[min_indices].tolist(),\n}\n\n# print final values to standard output (STDOUT),\n# so that they can be read by a subsequent assignment unit (using value=STDOUT)\nprint(json.dumps(result, indent=4))\n","name":"find_extrema.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# ----------------------------------------------------------------- #\n\n\nmunch==2.5.0\nnumpy>=1.19.5\nscipy>=1.5.4\nmatplotlib>=3.0.0\n","name":"processing_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# PythonML Package Requirements for use on the Exabyte.io Platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscikit-learn==0.24.1\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2;python_version>=\"3.6\"\n","name":"pyml_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# General settings for PythonML jobs on the Exabyte.io Platform #\n# #\n# This file generally shouldn't be modified directly by users. #\n# The \"datafile\" and \"is_workflow_running_to_predict\" variables #\n# are defined in the head subworkflow, and are templated into #\n# this file. This helps facilitate the workflow's behavior #\n# differing whether it is in a \"train\" or \"predict\" mode. #\n# #\n# Also in this file is the \"Context\" object, which helps maintain #\n# certain Python objects between workflow units, and between #\n# predict runs. #\n# #\n# Whenever a python object needs to be stored for subsequent runs #\n# (such as in the case of a trained model), context.save() can be #\n# called to save it. The object can then be loaded again by using #\n# context.load(). #\n# ----------------------------------------------------------------- #\n\n\nimport pickle, os\n\n# ==================================================\n# Variables modified in the Important Settings menu\n# ==================================================\n# Variables in this section can (and oftentimes need to) be modified by the user in the \"Important Settings\" tab\n# of a workflow.\n\n# Target_column_name is used during training to identify the variable the model is traing to predict.\n# For example, consider a CSV containing three columns, \"Y\", \"X1\", and \"X2\". If the goal is to train a model\n# that will predict the value of \"Y,\" then target_column_name would be set to \"Y\"\ntarget_column_name = \"{{ mlSettings.target_column_name }}\"\n\n# The type of ML problem being performed. Can be either \"regression\", \"classification,\" or \"clustering.\"\nproblem_category = \"{{ mlSettings.problem_category }}\"\n\n# =============================\n# Non user-modifiable variables\n# =============================\n# Variables in this section generally do not need to be modified.\n\n# The problem category, regression or classification or clustering. In regression, the target (predicted) variable\n# is continues. In classification, it is categorical. In clustering, there is no target - a set of labels is\n# automatically generated.\nis_regression = is_classification = is_clustering = False\nif problem_category.lower() == \"regression\":\n is_regression = True\nelif problem_category.lower() == \"classification\":\n is_classification = True\nelif problem_category.lower() == \"clustering\":\n is_clustering = True\nelse:\n raise ValueError(\n \"Variable 'problem_category' must be either 'regression', 'classification', or 'clustering'. Check settings.py\")\n\n# The variables \"is_workflow_running_to_predict\" and \"is_workflow_running_to_train\" are used to control whether\n# the workflow is in a \"training\" mode or a \"prediction\" mode. The \"IS_WORKFLOW_RUNNING_TO_PREDICT\" variable is set by\n# an assignment unit in the \"Set Up the Job\" subworkflow that executes at the start of the job. It is automatically\n# changed when the predict workflow is generated, so users should not need to modify this variable.\nis_workflow_running_to_predict = {% raw %}{{IS_WORKFLOW_RUNNING_TO_PREDICT}}{% endraw %}\nis_workflow_running_to_train = not is_workflow_running_to_predict\n\n# Sets the datafile variable. The \"datafile\" is the data that will be read in, and will be used by subsequent\n# workflow units for either training or prediction, depending on the workflow mode.\nif is_workflow_running_to_predict:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\nelse:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\n\n# The \"Context\" class allows for data to be saved and loaded between units, and between train and predict runs.\n# Variables which have been saved using the \"Save\" method are written to disk, and the predict workflow is automatically\n# configured to obtain these files when it starts.\n#\n# IMPORTANT NOTE: Do *not* adjust the value of \"context_dir_pathname\" in the Context object. If the value is changed, then\n# files will not be correctly copied into the generated predict workflow. This will cause the predict workflow to be\n# generated in a broken state, and it will not be able to make any predictions.\nclass Context(object):\n \"\"\"\n Saves and loads objects from the disk, useful for preserving data between workflow units\n\n Attributes:\n context_paths (dict): Dictionary of the format {variable_name: path}, that governs where\n pickle saves files.\n\n Methods:\n save: Used to save objects to the context directory\n load: Used to load objects from the context directory\n \"\"\"\n\n def __init__(self, context_file_basename=\"workflow_context_file_mapping\"):\n \"\"\"\n Constructor for Context objects\n\n Args:\n context_file_basename (str): Name of the file to store context paths in\n \"\"\"\n\n # Warning: DO NOT modify the context_dir_pathname variable below\n # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n context_dir_pathname = \"{% raw %}{{ CONTEXT_DIR_RELATIVE_PATH }}{% endraw %}\"\n # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n self._context_dir_pathname = context_dir_pathname\n self._context_file = os.path.join(context_dir_pathname, context_file_basename)\n\n # Make context dir if it does not exist\n if not os.path.exists(context_dir_pathname):\n os.makedirs(context_dir_pathname)\n\n # Read in the context sources dictionary, if it exists\n if os.path.exists(self._context_file):\n with open(self._context_file, \"rb\") as file_handle:\n self.context_paths: dict = pickle.load(file_handle)\n else:\n # Items is a dictionary of {varname: path}\n self.context_paths = {}\n\n def __enter__(self):\n return self\n\n def __exit__(self, exc_type, exc_value, traceback):\n self._update_context()\n\n def __contains__(self, item):\n return item in self.context_paths\n\n def _update_context(self):\n with open(self._context_file, \"wb\") as file_handle:\n pickle.dump(self.context_paths, file_handle)\n\n def load(self, name: str):\n \"\"\"\n Returns a contextd object\n\n Args:\n name (str): The name in self.context_paths of the object\n \"\"\"\n path = self.context_paths[name]\n with open(path, \"rb\") as file_handle:\n obj = pickle.load(file_handle)\n return obj\n\n def save(self, obj: object, name: str):\n \"\"\"\n Saves an object to disk using pickle\n\n Args:\n name (str): Friendly name for the object, used for lookup in load() method\n obj (object): Object to store on disk\n \"\"\"\n path = os.path.join(self._context_dir_pathname, f\"{name}.pkl\")\n self.context_paths[name] = path\n with open(path, \"wb\") as file_handle:\n pickle.dump(obj, file_handle)\n self._update_context()\n\n# Generate a context object, so that the \"with settings.context\" can be used by other units in this workflow.\ncontext = Context()\n\nis_using_train_test_split = \"is_using_train_test_split\" in context and (context.load(\"is_using_train_test_split\"))\n\n# Create a Class for a DummyScaler()\nclass DummyScaler:\n \"\"\"\n This class is a 'DummyScaler' which trivially acts on data by returning it unchanged.\n \"\"\"\n\n def fit(self, X):\n return self\n\n def transform(self, X):\n return X\n\n def fit_transform(self, X):\n return X\n\n def inverse_transform(self, X):\n return X\n\nif 'target_scaler' not in context:\n context.save(DummyScaler(), 'target_scaler')\n","name":"pyml_settings.py","contextProviders":[{"name":"MLSettingsDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Custom workflow unit template for the Exabyte.io platform #\n# #\n# This file imports a set of workflow-specific context variables #\n# from settings.py. It then uses a context manager to save and #\n# load Python objects. When saved, these objects can then be #\n# loaded either later in the same workflow, or by subsequent #\n# predict jobs. #\n# #\n# Any pickle-able Python object can be saved using #\n# settings.context. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport settings\n\n# The context manager exists to facilitate\n# saving and loading objects across Python units within a workflow.\n\n# To load an object, simply do to \\`context.load(\"name-of-the-saved-object\")\\`\n# To save an object, simply do \\`context.save(\"name-for-the-object\", object_here)\\`\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Do some transformations to the data here\n\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n descriptors = context.load(\"descriptors\")\n\n # Do some predictions or transformation to the data here\n","name":"pyml_custom.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to read in data for the ML workflow. #\n# #\n# Also showcased here is the concept of branching based on #\n# whether the workflow is in \"train\" or \"predict\" mode. #\n# #\n# If the workflow is in \"training\" mode, it will read in the data #\n# before converting it to a Numpy array and save it for use #\n# later. During training, we already have values for the output, #\n# and this gets saved to \"target.\" #\n# #\n# Finally, whether the workflow is in training or predict mode, #\n# it will always read in a set of descriptors from a datafile #\n# defined in settings.py #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport sklearn.preprocessing\nimport settings\n\nwith settings.context as context:\n data = pandas.read_csv(settings.datafile)\n\n # Train\n # By default, we don't do train/test splitting: the train and test represent the same dataset at first.\n # Other units (such as a train/test splitter) down the line can adjust this as-needed.\n if settings.is_workflow_running_to_train:\n\n # Handle the case where we are clustering\n if settings.is_clustering:\n target = data.to_numpy()[:, 0] # Just get the first column, it's not going to get used anyway\n else:\n target = data.pop(settings.target_column_name).to_numpy()\n\n # Handle the case where we are classifying. In this case, we must convert any labels provided to be categorical.\n # Specifically, labels are encoded with values between 0 and (N_Classes - 1)\n if settings.is_classification:\n label_encoder = sklearn.preprocessing.LabelEncoder()\n target = label_encoder.fit_transform(target)\n context.save(label_encoder, \"label_encoder\")\n\n target = target.reshape(-1, 1) # Reshape array from a row vector into a column vector\n\n context.save(target, \"train_target\")\n context.save(target, \"test_target\")\n\n descriptors = data.to_numpy()\n\n context.save(descriptors, \"train_descriptors\")\n context.save(descriptors, \"test_descriptors\")\n\n else:\n descriptors = data.to_numpy()\n context.save(descriptors, \"descriptors\")\n","name":"data_input_read_csv_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to perform a train/test split #\n# #\n# Splits the dataset into a training and testing set. The #\n# variable `percent_held_as_test` controls how much of the #\n# input dataset is removed for use as a testing set. By default, #\n# this unit puts 20% of the dataset into the testing set, and #\n# places the remaining 80% into the training set. #\n# #\n# Does nothing in the case of predictions. #\n# #\n# ----------------------------------------------------------------- #\n\nimport sklearn.model_selection\nimport numpy as np\nimport settings\n\n# `percent_held_as_test` is the amount of the dataset held out as the testing set. If it is set to 0.2,\n# then 20% of the dataset is held out as a testing set. The remaining 80% is the training set.\npercent_held_as_test = {{ mlTrainTestSplit.fraction_held_as_test_set }}\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Load training data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n\n # Combine datasets to facilitate train/test split\n\n # Do train/test split\n train_descriptors, test_descriptors, train_target, test_target = sklearn.model_selection.train_test_split(\n train_descriptors, train_target, test_size=percent_held_as_test)\n\n # Set the flag for using a train/test split\n context.save(True, \"is_using_train_test_split\")\n\n # Save training data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"data_input_train_test_split_sklearn.py","contextProviders":[{"name":"MLTrainTestSplitDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn MinMax Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it is on interval #\n# [0,1]. It then saves the data for use further down #\n# the road in the workflow, for use in un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the min and max of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor MinMax Scaler\n scaler = sklearn.preprocessing.MinMaxScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_min_max_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Duplicates workflow unit #\n# #\n# This workflow unit drops all duplicate rows, if it is running #\n# in the \"train\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop duplicates from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n df = df.drop_duplicates()\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop duplicates from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.drop_duplicates()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_duplicates_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Missing Workflow Unit #\n# #\n# This workflow unit allows missing rows and/or columns to be #\n# dropped from the dataset by configuring the `to_drop` #\n# parameter. #\n# #\n# Valid values for `to_drop`: #\n# - \"rows\": rows with missing values will be removed #\n# - \"columns\": columns with missing values will be removed #\n# - \"both\": rows and columns with missing values will be removed #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\n# `to_drop` can either be \"rows\" or \"columns\"\n# If it is set to \"rows\" (by default), then all rows with missing values will be dropped.\n# If it is set to \"columns\", then all columns with missing values will be dropped.\n# If it is set to \"both\", then all rows and columns with missing values will be dropped.\nto_drop = \"rows\"\n\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop missing from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n\n directions = {\n \"rows\": (\"index\",),\n \"columns\": (\"columns\",),\n \"both\": (\"index\", \"columns\"),\n }[to_drop]\n for direction in directions:\n df = df.dropna(direction)\n\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop missing from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.dropna()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_missing_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn Standard Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it a mean of 0 and #\n# a standard deviation of 1. It then saves the data for use #\n# further down the road in the workflow, for use in #\n# un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the mean and variance of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor Scaler\n scaler = sklearn.preprocessing.StandardScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_standardization_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge-regression model in Scikit-Learn. #\n# Alpha is taken from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.AdaBoostRegressor(\n n_estimators=50,\n learning_rate=1,\n loss=\"linear\",\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"adaboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"adaboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_adaboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a bagged trees regression model with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.BaggingRegressor(\n n_estimators=10,\n max_samples=1.0,\n max_features=1.0,\n bootstrap=True,\n bootstrap_features=False,\n oob_score=False,\n verbose=0,\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"bagged_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"bagged_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_bagged_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for gradient-boosted tree regression with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. Note: In the gradient- #\n# boosted trees ensemble used, the weak learners used as #\n# estimators cannot be tuned with the same level of fidelity #\n# allowed in the adaptive-boosted trees ensemble. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.GradientBoostingRegressor(\n loss=\"ls\",\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion=\"friedman_mse\",\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n max_features=None,\n alpha=0.9,\n verbose=0,\n max_leaf_nodes=None,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees regression #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBRegressor(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='reg:squarederror',\n eval_metric='rmse')\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_regression\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_regression\")\n\n # Make some predictions and unscale\n predictions = model.predict(descriptors)\n predictions = predictions.reshape(-1, 1)\n target_scaler = context.load(\"target_scaler\")\n\n predictions = target_scaler.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\")\n","name":"model_extreme_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for k-means clustering. #\n# #\n# In k-means clustering, the labels are not provided ahead of #\n# time. Instead, one supplies the number of groups the #\n# algorithm should split the dataset into. Here, we set our #\n# own default of 4 groups (fewer than sklearn's default of 8). #\n# Otherwise, the default parameters of the clustering method #\n# are the same as in sklearn. #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.cluster\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Initialize the Model\n model = sklearn.cluster.KMeans(\n n_clusters=4,\n init=\"k-means++\",\n n_init=10,\n max_iter=300,\n tol=0.0001,\n copy_x=True,\n algorithm=\"auto\",\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors)\n context.save(model, \"k_means\")\n train_labels = model.predict(train_descriptors)\n test_labels = model.predict(test_descriptors)\n\n context.save(train_labels, \"train_labels\")\n context.save(test_labels, \"test_labels\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"k_means\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_k_means_clustering_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a kernelized ridge-regression model with #\n# Scikit-Learn. Model parameters are derived from Scikit- #\n# Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.kernel_ridge\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.kernel_ridge.KernelRidge(\n alpha=1.0,\n kernel=\"linear\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"kernel_ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"kernel_ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_kernel_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a LASSO-regression model with Scikit- #\n# Learn. Model parameters derived from Scikit-Learn's #\n# Defaults. Alpha has been lowered from the default of 1.0, to #\n# 0.1. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Lasso(\n alpha=0.1,\n fit_intercept=True,\n normalize=False,\n precompute=False,\n tol=0.0001,\n positive=True,\n selection=\"cyclic\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"LASSO\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"LASSO\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_lasso_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit to train a simple feedforward neural network #\n# model on a regression problem using scikit-learn. In this #\n# template, we use the default values for hidden_layer_sizes, #\n# activation, solver, and learning rate. Other parameters are #\n# available (consult the sklearn docs), but in this case, we #\n# only include those relevant to the Adam optimizer. Sklearn #\n# Docs: Sklearn docs:http://scikit-learn.org/stable/modules/ge #\n# nerated/sklearn.neural_network.MLPRegressor.html #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.neural_network\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.neural_network.MLPRegressor(\n hidden_layer_sizes=(100,),\n activation=\"relu\",\n solver=\"adam\",\n max_iter=300,\n early_stopping=False,\n validation_fraction=0.1,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"multilayer_perceptron\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"multilayer_perceptron\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_mlp_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a random forest classification model with #\n# Scikit-Learn. Parameters derived from Scikit-Learn's #\n# defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. When #\n# the workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a filee named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestClassifier(\n n_estimators=100,\n criterion=\"gini\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n oob_score=False,\n verbose=0,\n class_weight=None,\n ccp_alpha=0.0,\n max_samples=None,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target, test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for a gradient boosted classification model with #\n# Scikit-Learn. Parameters derived from sklearn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = sklearn.ensemble.GradientBoostingClassifier(loss='deviance',\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion='friedman_mse',\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n min_impurity_split=None,\n init=None,\n random_state=None,\n max_features=None,\n verbose=0,\n max_leaf_nodes=None,\n warm_start=False,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees classification #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBClassifier(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='binary:logistic',\n eval_metric='logloss',\n use_label_encoder=False)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_extreme_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow for a random forest regression model with Scikit- #\n# Learn. Parameters are derived from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestRegressor(\n n_estimators=100,\n criterion=\"mse\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n max_samples=None,\n oob_score=False,\n ccp_alpha=0.0,\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge regression model with Scikit- #\n# Learn. Alpha is taken from Scikit-Learn's default #\n# parameters. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Ridge(\n alpha=1.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Parity plot generation unit #\n# #\n# This unit generates a parity plot based on the known values #\n# in the training data, and the predicted values generated #\n# using the training data. #\n# #\n# Because this metric compares predictions versus a ground truth, #\n# it doesn't make sense to generate the plot when a predict #\n# workflow is being run (because in that case, we generally don't #\n# know the ground truth for the values being predicted). Hence, #\n# this unit does nothing if the workflow is in \"predict\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_predictions = context.load(\"train_predictions\")\n test_target = context.load(\"test_target\")\n test_predictions = context.load(\"test_predictions\")\n\n # Un-transform the data\n target_scaler = context.load(\"target_scaler\")\n train_target = target_scaler.inverse_transform(train_target)\n train_predictions = target_scaler.inverse_transform(train_predictions)\n test_target = target_scaler.inverse_transform(test_target)\n test_predictions = target_scaler.inverse_transform(test_predictions)\n\n # Plot the data\n plt.scatter(train_target, train_predictions, c=\"#203d78\", label=\"Training Set\")\n if settings.is_using_train_test_split:\n plt.scatter(test_target, test_predictions, c=\"#67ac5b\", label=\"Testing Set\")\n plt.xlabel(\"Actual Value\")\n plt.ylabel(\"Predicted Value\")\n\n # Scale the plot\n target_range = (min(min(train_target), min(test_target)),\n max(max(train_target), max(test_target)))\n predictions_range = (min(min(train_predictions), min(test_predictions)),\n max(max(train_predictions), max(test_predictions)))\n\n limits = (min(min(target_range), min(target_range)),\n max(max(predictions_range), max(predictions_range)))\n plt.xlim = (limits[0], limits[1])\n plt.ylim = (limits[0], limits[1])\n\n # Draw a parity line, as a guide to the eye\n plt.plot((limits[0], limits[1]), (limits[0], limits[1]), c=\"black\", linestyle=\"dotted\", label=\"Parity\")\n plt.legend()\n\n # Save the figure\n plt.tight_layout()\n plt.savefig(\"my_parity_plot.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_parity_plot_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Cluster Visualization #\n# #\n# This unit takes an N-dimensional feature space, and uses #\n# Principal-component Analysis (PCA) to project into a 2D space #\n# to facilitate plotting on a scatter plot. #\n# #\n# The 2D space we project into are the first two principal #\n# components identified in PCA, which are the two vectors with #\n# the highest variance. #\n# #\n# Wikipedia Article on PCA: #\n# https://en.wikipedia.org/wiki/Principal_component_analysis #\n# #\n# We then plot the labels assigned to the train an test set, #\n# and color by class. #\n# #\n# ----------------------------------------------------------------- #\n\nimport pandas as pd\nimport matplotlib.cm\nimport matplotlib.lines\nimport matplotlib.pyplot as plt\nimport sklearn.decomposition\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_labels = context.load(\"train_labels\")\n train_descriptors = context.load(\"train_descriptors\")\n test_labels = context.load(\"test_labels\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Unscale the descriptors\n descriptor_scaler = context.load(\"descriptor_scaler\")\n train_descriptors = descriptor_scaler.inverse_transform(train_descriptors)\n test_descriptors = descriptor_scaler.inverse_transform(test_descriptors)\n\n # We need at least 2 dimensions, exit if the dataset is 1D\n if train_descriptors.ndim < 2:\n raise ValueError(\"The train descriptors do not have enough dimensions to be plot in 2D\")\n\n # The data could be multidimensional. Let's do some PCA to get things into 2 dimensions.\n pca = sklearn.decomposition.PCA(n_components=2)\n train_descriptors = pca.fit_transform(train_descriptors)\n test_descriptors = pca.transform(test_descriptors)\n xlabel = \"Principle Component 1\"\n ylabel = \"Principle Component 2\"\n\n # Determine the labels we're going to be using, and generate their colors\n labels = set(train_labels)\n colors = {}\n for count, label in enumerate(labels):\n cm = matplotlib.cm.get_cmap('jet', len(labels))\n color = cm(count / len(labels))\n colors[label] = color\n train_colors = [colors[label] for label in train_labels]\n test_colors = [colors[label] for label in test_labels]\n\n # Train / Test Split Visualization\n plt.title(\"Train Test Split Visualization\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=\"#33548c\", marker=\"o\", label=\"Training Set\")\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=\"#F0B332\", marker=\"o\", label=\"Testing Set\")\n xmin, xmax, ymin, ymax = plt.axis()\n plt.legend()\n plt.tight_layout()\n plt.savefig(\"train_test_split.png\", dpi=600)\n plt.close()\n\n def clusters_legend(cluster_colors):\n \"\"\"\n Helper function that creates a legend, given the coloration by clusters.\n Args:\n cluster_colors: A dictionary of the form {cluster_number : color_value}\n\n Returns:\n None; just creates the legend and puts it on the plot\n \"\"\"\n legend_symbols = []\n for group, color in cluster_colors.items():\n label = f\"Cluster {group}\"\n legend_symbols.append(matplotlib.lines.Line2D([], [], color=color, marker=\"o\",\n linewidth=0, label=label))\n plt.legend(handles=legend_symbols)\n\n # Training Set Clusters\n plt.title(\"Training Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=train_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"train_clusters.png\", dpi=600)\n plt.close()\n\n # Testing Set Clusters\n plt.title(\"Testing Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=test_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"test_clusters.png\", dpi=600)\n plt.close()\n\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_pca_2d_clusters_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# ROC Curve Generator #\n# #\n# Computes and displays the Receiver Operating Characteristic #\n# (ROC) curve. This is restricted to binary classification tasks. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\nimport matplotlib.collections\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n test_target = context.load(\"test_target\").flatten()\n # Slice the first column because Sklearn's ROC curve prefers probabilities for the positive class\n test_probabilities = context.load(\"test_probabilities\")[:, 1]\n\n # Exit if there's more than one label in the predictions\n if len(set(test_target)) > 2:\n exit()\n\n # ROC curve function in sklearn prefers the positive class\n false_positive_rate, true_positive_rate, thresholds = sklearn.metrics.roc_curve(test_target, test_probabilities,\n pos_label=1)\n thresholds[0] -= 1 # Sklearn arbitrarily adds 1 to the first threshold\n roc_auc = np.round(sklearn.metrics.auc(false_positive_rate, true_positive_rate), 3)\n\n # Plot the curve\n fig, ax = plt.subplots()\n points = np.array([false_positive_rate, true_positive_rate]).T.reshape(-1, 1, 2)\n segments = np.concatenate([points[:-1], points[1:]], axis=1)\n norm = plt.Normalize(thresholds.min(), thresholds.max())\n lc = matplotlib.collections.LineCollection(segments, cmap='jet', norm=norm, linewidths=2)\n lc.set_array(thresholds)\n line = ax.add_collection(lc)\n fig.colorbar(line, ax=ax).set_label('Threshold')\n\n # Padding to ensure we see the line\n ax.margins(0.01)\n\n plt.title(f\"ROC curve, AUC={roc_auc}\")\n plt.xlabel(\"False Positive Rate\")\n plt.ylabel(\"True Positive Rate\")\n plt.tight_layout()\n plt.savefig(\"my_roc_curve.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_roc_curve_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"#!/bin/bash\n# ---------------------------------------------------------------- #\n# #\n# Example shell script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. shebang line is read from the first line above #\n# 2. based on shebang one of the shell types is selected: #\n# - /bin/bash #\n# - /bin/csh #\n# - /bin/tclsh #\n# - /bin/tcsh #\n# - /bin/zsh #\n# 3. runtime directory for this calculation is created #\n# 4. the content of the script is executed #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\necho \"Hello world!\"\n","name":"hello_world.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ---------------------------------------------------------------- #\n# #\n# Example job submission script for Exabyte.io platform #\n# #\n# Shows resource manager directives for: #\n# #\n# 1. the name of the job (-N) #\n# 2. the number of nodes to be used (-l nodes=) #\n# 3. the number of processors per node (-l ppn=) #\n# 4. the walltime in dd:hh:mm:ss format (-l walltime=) #\n# 5. queue (-q) D, OR, OF, SR, SF #\n# 6. merging standard output and error (-j oe) #\n# 7. email about job abort, begin, end (-m abe) #\n# 8. email address to use (-M) #\n# #\n# For more information visit https://docs.exabyte.io/cli/jobs #\n# ---------------------------------------------------------------- #\n\n#PBS -N ESPRESSO-TEST\n#PBS -j oe\n#PBS -l nodes=1\n#PBS -l ppn=1\n#PBS -l walltime=00:00:10:00\n#PBS -q D\n#PBS -m abe\n#PBS -M info@exabyte.io\n\n# load module\nmodule add espresso/540-i-174-impi-044\n\n# go to the job working directory\ncd $PBS_O_WORKDIR\n\n# create input file\ncat > pw.in < pw.out\n","name":"job_espresso_pw_scf.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\nmkdir -p {{ JOB_SCRATCH_DIR }}/outdir/_ph0\ncd {{ JOB_SCRATCH_DIR }}/outdir\ncp -r {{ JOB_WORK_DIR }}/../outdir/__prefix__.* .\n{%- endraw -%}\n","name":"espresso_link_outdir_save.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\ncp {{ JOB_SCRATCH_DIR }}/outdir/_ph0/__prefix__.phsave/dynmat* {{ JOB_WORK_DIR }}/../outdir/_ph0/__prefix__.phsave\n{%- endraw -%}\n","name":"espresso_collect_dynmat.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ------------------------------------------------------------------ #\n# This script prepares necessary directories to run VASP NEB\n# calculation. It puts initial POSCAR into directory 00, final into 0N\n# and intermediate images in 01 to 0(N-1). It is assumed that SCF\n# calculations for initial and final structures are already done in\n# previous subworkflows and their standard outputs are written into\n# \"vasp_neb_initial.out\" and \"vasp_neb_final.out\" files respectively.\n# These outputs are here copied into initial (00) and final (0N)\n# directories to calculate the reaction energy profile.\n# ------------------------------------------------------------------ #\n\n{% raw -%}\ncd {{ JOB_WORK_DIR }}\n{%- endraw %}\n\n# Prepare First Directory\nmkdir -p 00\ncat > 00/POSCAR < 0{{ input.INTERMEDIATE_IMAGES.length + 1 }}/POSCAR < 0{{ loop.index }}/POSCAR <=6.2.0\nexabyte-api-client>=2020.10.19\nnumpy>=1.17.3\npandas>=1.1.4\nurllib3<2\n","name":"requirements.txt","contextProviders":[],"applicationName":"jupyterLab","executableName":"jupyter"},{"content":" start nwchem\n title \"Test\"\n charge {{ input.CHARGE }}\n geometry units au noautosym\n {{ input.ATOMIC_POSITIONS }}\n end\n basis\n * library {{ input.BASIS }}\n end\n dft\n xc {{ input.FUNCTIONAL }}\n mult {{ input.MULT }}\n end\n task dft energy\n","name":"nwchem_total_energy.inp","contextProviders":[{"name":"NWChemInputDataManager"}],"applicationName":"nwchem","executableName":"nwchem"},{"content":"# ---------------------------------------------------------------- #\n# #\n# Example python script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. runtime directory for this calculation is created #\n# 2. requirements.txt is used to create a virtual environment #\n# 3. virtual environment is activated #\n# 4. python process running this script is started #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\nimport pymatgen as mg\n\nsi = mg.Element(\"Si\")\n\nprint(si.atomic_mass)\n","name":"hello_world.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2\n","name":"requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------------ #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# Please add any packages required for this unit below following #\n# the requirements.txt specification: #\n# https://pip.pypa.io/en/stable/reference/requirements-file-format/ #\n# ------------------------------------------------------------------ #\n","name":"requirements_empty.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# -------------------------------------------------------------------------------\n# This script contains a few helpful commands for basic plotting with matplotlib.\n# The commented out blocks are optional suggestions and included for convenience.\n# -------------------------------------------------------------------------------\nimport matplotlib.pyplot as plt\nimport matplotlib.ticker as ticker\nimport numpy as np\n\n\n# Plot Settings\n# -------------\nfigure_size = (6.4, 4.8) # width, height [inches]\ndpi = 100 # resolution [dots-per-inch]\nfont_size_title = 16 # font size of title\nfont_size_axis = 12 # font size of axis label\nfont_size_tick = 12 # font size of tick label\nfont_size_legend = 14 # font size of legend\nx_axis_label = None # label for x-axis\ny_axis_label = None # label for y-axis\ntitle = None # figure title\nshow_legend = False # whether to show legend\nsave_name = \"plot.pdf\" # output filename (with suffix), e.g. 'plot.pdf'\nx_view_limits = {\"left\": None, \"right\": None} # view limits for x-axis\ny_view_limits = {\"top\": None, \"bottom\": None} # view limits for y-axis\nx_tick_spacing = None # custom tick spacing for x-axis (optional)\ny_tick_spacing = None # custom tick spacing for y-axis (optional)\nx_tick_labels = None # custom tick labels for x-axis (optional)\ny_tick_labels = None # custom tick labels for y-axis (optional)\n\n\n# Figure & axes objects\n# ---------------------\nfig = plt.figure(figsize=figure_size, dpi=dpi)\nax = fig.add_subplot(111)\n\n# Example plot (REPLACE ACCORDINGLY)\n# ------------\nx = np.linspace(0, 7, num=100)\ny = np.sin(x)\nax.plot(x, y, \"g-\", zorder=3)\n\n\n# Help lines\n# ----------\n# ax.axhline(y=0, color=\"0.25\", linewidth=0.6, zorder=1)\n# ax.axvline(x=0, color=\"0.25\", linewidth=0.6, zorder=1)\n\n\n# View limits\n# -----------\nax.set_xlim(**x_view_limits)\nax.set_ylim(**y_view_limits)\n\n\n# Grid lines\n# ----------\n# grid_style = {\n# \"linestyle\" : \"dotted\",\n# \"linewidth\" : 0.6,\n# \"color\" : \"0.25\",\n# }\n# ax.grid(**grid_style)\n\n# Custom tick spacing\n# -------------------\n# ax.xaxis.set_major_locator(ticker.MultipleLocator(x_tick_spacing))\n# ax.yaxis.set_major_locator(ticker.MultipleLocator(y_tick_spacing))\n\n# Custom tick labels\n# ------------------\nif x_tick_labels is not None:\n ax.set_xticklabels(x_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\nif y_tick_labels is not None:\n ax.set_yticklabels(y_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\n\n# Other tick settings\n# -------------------\n# ax.tick_params(axis=\"both\", which=\"major\", labelsize=font_size_tick, direction=\"in\")\n# ax.tick_params(axis=\"x\", which=\"major\", pad=10)\n# ax.tick_params(axis=\"x\", which=\"minor\", bottom=False, top=False)\n\n\n# Axis labels\n# -----------\nif x_axis_label is not None:\n ax.set_xlabel(x_axis_label, size=font_size_axis)\nif y_axis_label is not None:\n ax.set_ylabel(y_axis_label, size=font_size_axis)\n\n# Figure title\n# ------------\nif title is not None:\n ax.set_title(title, fontsize=font_size_title)\n\n# Legend\n# ------\nif show_legend:\n ax.legend(prop={'size': font_size_legend})\n\n# Save figure\n# -----------\nif save_name is not None:\n save_format = save_name.split(\".\")[-1]\n fig.savefig(save_name, format=save_format, bbox_inches=\"tight\")\n","name":"matplotlib_basic.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ---------------------------------------------------------- #\n# #\n# This script extracts q-points and irreducible #\n# representations from Quantum ESPRESSO xml data. #\n# #\n# Expects control_ph.xml and patterns.?.xml files to exist #\n# #\n# ---------------------------------------------------------- #\nfrom __future__ import print_function\nimport json\nfrom xml.dom import minidom\n\n{# JOB_WORK_DIR will be initialized at runtime => avoid substituion below #}\n{%- raw -%}\nCONTROL_PH_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/control_ph.xml\"\nPATTERNS_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/patterns.{}.xml\"\n{%- endraw -%}\n\n# get integer content of an xml tag in a document\ndef get_int_by_tag_name(doc, tag_name):\n element = doc.getElementsByTagName(tag_name)\n return int(element[0].firstChild.nodeValue)\n\nvalues = []\n\n# get number of q-points and cycle through them\nxmldoc = minidom.parse(CONTROL_PH_FILENAME)\nnumber_of_qpoints = get_int_by_tag_name(xmldoc, \"NUMBER_OF_Q_POINTS\")\n\nfor i in range(number_of_qpoints):\n # get number of irreducible representations per qpoint\n xmldoc = minidom.parse(PATTERNS_FILENAME.format(i+1))\n number_of_irr_per_qpoint = get_int_by_tag_name(xmldoc, \"NUMBER_IRR_REP\")\n # add each distinct combination of qpoint and irr as a separate entry\n for j in range(number_of_irr_per_qpoint):\n values.append({\n \"qpoint\": i + 1,\n \"irr\": j + 1\n })\n\n# store final values in standard output (STDOUT)\nprint(json.dumps(values, indent=4))\n","name":"espresso_xml_get_qpt_irr.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"import re\nimport json\n\ndouble_regex = r'[-+]?\\d*\\.\\d+(?:[eE][-+]?\\d+)?'\nregex = r\"\\s+k\\(\\s+\\d*\\)\\s+=\\s+\\(\\s+({0})\\s+({0})\\s+({0})\\),\\s+wk\\s+=\\s+({0}).+?\\n\".format(double_regex)\n\nwith open(\"pw_scf.out\") as f:\n text = f.read()\n\npattern = re.compile(regex, re.I | re.MULTILINE)\nmatch = pattern.findall(text[text.rfind(\" cryst. coord.\"):])\nkpoints = [{\"coordinates\": list(map(float, m[:3])), \"weight\": float(m[3])} for m in match]\nprint(json.dumps({\"name\": \"KPOINTS\", \"value\": kpoints, \"scope\": \"global\"}, indent=4))\n","name":"espresso_extract_kpoints.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------- #\n# This script aims to determine extrema for a given array. #\n# Please adjust the parameters according to your data. #\n# Note: This template expects the array to be defined in the #\n# context as 'array_from_context' (see details below). #\n# ----------------------------------------------------------- #\nimport numpy as np\nfrom scipy.signal import find_peaks\nimport json\nfrom munch import Munch\n\n# Data From Context\n# -----------------\n# The array 'array_from_context' is a 1D list (float or int) that has to be defined in\n# a preceding assignment unit in order to be extracted from the context.\n# Example: [0.0, 1.0, 4.0, 3.0]\n# Upon rendering the following Jinja template the extracted array will be inserted.\n{% raw %}Y = np.array({{array_from_context}}){% endraw %}\n\n# Settings\n# --------\nprominence = 0.3 # required prominence in the unit of the data array\n\n# Find Extrema\n# ------------\nmax_indices, _ = find_peaks(Y, prominence=prominence)\nmin_indices, _ = find_peaks(-1 * Y, prominence=prominence)\n\nresult = {\n \"maxima\": Y[max_indices].tolist(),\n \"minima\": Y[min_indices].tolist(),\n}\n\n# print final values to standard output (STDOUT),\n# so that they can be read by a subsequent assignment unit (using value=STDOUT)\nprint(json.dumps(result, indent=4))\n","name":"find_extrema.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# ----------------------------------------------------------------- #\n\n\nmunch==2.5.0\nnumpy>=1.19.5\nscipy>=1.5.4\nmatplotlib>=3.0.0\n","name":"processing_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# PythonML Package Requirements for use on the Exabyte.io Platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscikit-learn==0.24.1\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2;python_version>=\"3.6\"\n","name":"pyml_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# General settings for PythonML jobs on the Exabyte.io Platform #\n# #\n# This file generally shouldn't be modified directly by users. #\n# The \"datafile\" and \"is_workflow_running_to_predict\" variables #\n# are defined in the head subworkflow, and are templated into #\n# this file. This helps facilitate the workflow's behavior #\n# differing whether it is in a \"train\" or \"predict\" mode. #\n# #\n# Also in this file is the \"Context\" object, which helps maintain #\n# certain Python objects between workflow units, and between #\n# predict runs. #\n# #\n# Whenever a python object needs to be stored for subsequent runs #\n# (such as in the case of a trained model), context.save() can be #\n# called to save it. The object can then be loaded again by using #\n# context.load(). #\n# ----------------------------------------------------------------- #\n\n\nimport pickle, os\n\n# ==================================================\n# Variables modified in the Important Settings menu\n# ==================================================\n# Variables in this section can (and oftentimes need to) be modified by the user in the \"Important Settings\" tab\n# of a workflow.\n\n# Target_column_name is used during training to identify the variable the model is traing to predict.\n# For example, consider a CSV containing three columns, \"Y\", \"X1\", and \"X2\". If the goal is to train a model\n# that will predict the value of \"Y,\" then target_column_name would be set to \"Y\"\ntarget_column_name = \"{{ mlSettings.target_column_name }}\"\n\n# The type of ML problem being performed. Can be either \"regression\", \"classification,\" or \"clustering.\"\nproblem_category = \"{{ mlSettings.problem_category }}\"\n\n# =============================\n# Non user-modifiable variables\n# =============================\n# Variables in this section generally do not need to be modified.\n\n# The problem category, regression or classification or clustering. In regression, the target (predicted) variable\n# is continues. In classification, it is categorical. In clustering, there is no target - a set of labels is\n# automatically generated.\nis_regression = is_classification = is_clustering = False\nif problem_category.lower() == \"regression\":\n is_regression = True\nelif problem_category.lower() == \"classification\":\n is_classification = True\nelif problem_category.lower() == \"clustering\":\n is_clustering = True\nelse:\n raise ValueError(\n \"Variable 'problem_category' must be either 'regression', 'classification', or 'clustering'. Check settings.py\")\n\n# The variables \"is_workflow_running_to_predict\" and \"is_workflow_running_to_train\" are used to control whether\n# the workflow is in a \"training\" mode or a \"prediction\" mode. The \"IS_WORKFLOW_RUNNING_TO_PREDICT\" variable is set by\n# an assignment unit in the \"Set Up the Job\" subworkflow that executes at the start of the job. It is automatically\n# changed when the predict workflow is generated, so users should not need to modify this variable.\nis_workflow_running_to_predict = {% raw %}{{IS_WORKFLOW_RUNNING_TO_PREDICT}}{% endraw %}\nis_workflow_running_to_train = not is_workflow_running_to_predict\n\n# Sets the datafile variable. The \"datafile\" is the data that will be read in, and will be used by subsequent\n# workflow units for either training or prediction, depending on the workflow mode.\nif is_workflow_running_to_predict:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\nelse:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\n\n# The \"Context\" class allows for data to be saved and loaded between units, and between train and predict runs.\n# Variables which have been saved using the \"Save\" method are written to disk, and the predict workflow is automatically\n# configured to obtain these files when it starts.\n#\n# IMPORTANT NOTE: Do *not* adjust the value of \"context_dir_pathname\" in the Context object. If the value is changed, then\n# files will not be correctly copied into the generated predict workflow. This will cause the predict workflow to be\n# generated in a broken state, and it will not be able to make any predictions.\nclass Context(object):\n \"\"\"\n Saves and loads objects from the disk, useful for preserving data between workflow units\n\n Attributes:\n context_paths (dict): Dictionary of the format {variable_name: path}, that governs where\n pickle saves files.\n\n Methods:\n save: Used to save objects to the context directory\n load: Used to load objects from the context directory\n \"\"\"\n\n def __init__(self, context_file_basename=\"workflow_context_file_mapping\"):\n \"\"\"\n Constructor for Context objects\n\n Args:\n context_file_basename (str): Name of the file to store context paths in\n \"\"\"\n\n # Warning: DO NOT modify the context_dir_pathname variable below\n # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n context_dir_pathname = \"{% raw %}{{ CONTEXT_DIR_RELATIVE_PATH }}{% endraw %}\"\n # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n self._context_dir_pathname = context_dir_pathname\n self._context_file = os.path.join(context_dir_pathname, context_file_basename)\n\n # Make context dir if it does not exist\n if not os.path.exists(context_dir_pathname):\n os.makedirs(context_dir_pathname)\n\n # Read in the context sources dictionary, if it exists\n if os.path.exists(self._context_file):\n with open(self._context_file, \"rb\") as file_handle:\n self.context_paths: dict = pickle.load(file_handle)\n else:\n # Items is a dictionary of {varname: path}\n self.context_paths = {}\n\n def __enter__(self):\n return self\n\n def __exit__(self, exc_type, exc_value, traceback):\n self._update_context()\n\n def __contains__(self, item):\n return item in self.context_paths\n\n def _update_context(self):\n with open(self._context_file, \"wb\") as file_handle:\n pickle.dump(self.context_paths, file_handle)\n\n def load(self, name: str):\n \"\"\"\n Returns a contextd object\n\n Args:\n name (str): The name in self.context_paths of the object\n \"\"\"\n path = self.context_paths[name]\n with open(path, \"rb\") as file_handle:\n obj = pickle.load(file_handle)\n return obj\n\n def save(self, obj: object, name: str):\n \"\"\"\n Saves an object to disk using pickle\n\n Args:\n name (str): Friendly name for the object, used for lookup in load() method\n obj (object): Object to store on disk\n \"\"\"\n path = os.path.join(self._context_dir_pathname, f\"{name}.pkl\")\n self.context_paths[name] = path\n with open(path, \"wb\") as file_handle:\n pickle.dump(obj, file_handle)\n self._update_context()\n\n# Generate a context object, so that the \"with settings.context\" can be used by other units in this workflow.\ncontext = Context()\n\nis_using_train_test_split = \"is_using_train_test_split\" in context and (context.load(\"is_using_train_test_split\"))\n\n# Create a Class for a DummyScaler()\nclass DummyScaler:\n \"\"\"\n This class is a 'DummyScaler' which trivially acts on data by returning it unchanged.\n \"\"\"\n\n def fit(self, X):\n return self\n\n def transform(self, X):\n return X\n\n def fit_transform(self, X):\n return X\n\n def inverse_transform(self, X):\n return X\n\nif 'target_scaler' not in context:\n context.save(DummyScaler(), 'target_scaler')\n","name":"pyml_settings.py","contextProviders":[{"name":"MLSettingsDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Custom workflow unit template for the Exabyte.io platform #\n# #\n# This file imports a set of workflow-specific context variables #\n# from settings.py. It then uses a context manager to save and #\n# load Python objects. When saved, these objects can then be #\n# loaded either later in the same workflow, or by subsequent #\n# predict jobs. #\n# #\n# Any pickle-able Python object can be saved using #\n# settings.context. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport settings\n\n# The context manager exists to facilitate\n# saving and loading objects across Python units within a workflow.\n\n# To load an object, simply do to \\`context.load(\"name-of-the-saved-object\")\\`\n# To save an object, simply do \\`context.save(\"name-for-the-object\", object_here)\\`\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Do some transformations to the data here\n\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n descriptors = context.load(\"descriptors\")\n\n # Do some predictions or transformation to the data here\n","name":"pyml_custom.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to read in data for the ML workflow. #\n# #\n# Also showcased here is the concept of branching based on #\n# whether the workflow is in \"train\" or \"predict\" mode. #\n# #\n# If the workflow is in \"training\" mode, it will read in the data #\n# before converting it to a Numpy array and save it for use #\n# later. During training, we already have values for the output, #\n# and this gets saved to \"target.\" #\n# #\n# Finally, whether the workflow is in training or predict mode, #\n# it will always read in a set of descriptors from a datafile #\n# defined in settings.py #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport sklearn.preprocessing\nimport settings\n\nwith settings.context as context:\n data = pandas.read_csv(settings.datafile)\n\n # Train\n # By default, we don't do train/test splitting: the train and test represent the same dataset at first.\n # Other units (such as a train/test splitter) down the line can adjust this as-needed.\n if settings.is_workflow_running_to_train:\n\n # Handle the case where we are clustering\n if settings.is_clustering:\n target = data.to_numpy()[:, 0] # Just get the first column, it's not going to get used anyway\n else:\n target = data.pop(settings.target_column_name).to_numpy()\n\n # Handle the case where we are classifying. In this case, we must convert any labels provided to be categorical.\n # Specifically, labels are encoded with values between 0 and (N_Classes - 1)\n if settings.is_classification:\n label_encoder = sklearn.preprocessing.LabelEncoder()\n target = label_encoder.fit_transform(target)\n context.save(label_encoder, \"label_encoder\")\n\n target = target.reshape(-1, 1) # Reshape array from a row vector into a column vector\n\n context.save(target, \"train_target\")\n context.save(target, \"test_target\")\n\n descriptors = data.to_numpy()\n\n context.save(descriptors, \"train_descriptors\")\n context.save(descriptors, \"test_descriptors\")\n\n else:\n descriptors = data.to_numpy()\n context.save(descriptors, \"descriptors\")\n","name":"data_input_read_csv_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to perform a train/test split #\n# #\n# Splits the dataset into a training and testing set. The #\n# variable `percent_held_as_test` controls how much of the #\n# input dataset is removed for use as a testing set. By default, #\n# this unit puts 20% of the dataset into the testing set, and #\n# places the remaining 80% into the training set. #\n# #\n# Does nothing in the case of predictions. #\n# #\n# ----------------------------------------------------------------- #\n\nimport sklearn.model_selection\nimport numpy as np\nimport settings\n\n# `percent_held_as_test` is the amount of the dataset held out as the testing set. If it is set to 0.2,\n# then 20% of the dataset is held out as a testing set. The remaining 80% is the training set.\npercent_held_as_test = {{ mlTrainTestSplit.fraction_held_as_test_set }}\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Load training data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n\n # Combine datasets to facilitate train/test split\n\n # Do train/test split\n train_descriptors, test_descriptors, train_target, test_target = sklearn.model_selection.train_test_split(\n train_descriptors, train_target, test_size=percent_held_as_test)\n\n # Set the flag for using a train/test split\n context.save(True, \"is_using_train_test_split\")\n\n # Save training data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"data_input_train_test_split_sklearn.py","contextProviders":[{"name":"MLTrainTestSplitDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn MinMax Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it is on interval #\n# [0,1]. It then saves the data for use further down #\n# the road in the workflow, for use in un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the min and max of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor MinMax Scaler\n scaler = sklearn.preprocessing.MinMaxScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_min_max_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Duplicates workflow unit #\n# #\n# This workflow unit drops all duplicate rows, if it is running #\n# in the \"train\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop duplicates from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n df = df.drop_duplicates()\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop duplicates from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.drop_duplicates()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_duplicates_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Missing Workflow Unit #\n# #\n# This workflow unit allows missing rows and/or columns to be #\n# dropped from the dataset by configuring the `to_drop` #\n# parameter. #\n# #\n# Valid values for `to_drop`: #\n# - \"rows\": rows with missing values will be removed #\n# - \"columns\": columns with missing values will be removed #\n# - \"both\": rows and columns with missing values will be removed #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\n# `to_drop` can either be \"rows\" or \"columns\"\n# If it is set to \"rows\" (by default), then all rows with missing values will be dropped.\n# If it is set to \"columns\", then all columns with missing values will be dropped.\n# If it is set to \"both\", then all rows and columns with missing values will be dropped.\nto_drop = \"rows\"\n\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop missing from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n\n directions = {\n \"rows\": (\"index\",),\n \"columns\": (\"columns\",),\n \"both\": (\"index\", \"columns\"),\n }[to_drop]\n for direction in directions:\n df = df.dropna(direction)\n\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop missing from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.dropna()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_missing_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn Standard Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it a mean of 0 and #\n# a standard deviation of 1. It then saves the data for use #\n# further down the road in the workflow, for use in #\n# un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the mean and variance of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor Scaler\n scaler = sklearn.preprocessing.StandardScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_standardization_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge-regression model in Scikit-Learn. #\n# Alpha is taken from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.AdaBoostRegressor(\n n_estimators=50,\n learning_rate=1,\n loss=\"linear\",\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"adaboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"adaboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_adaboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a bagged trees regression model with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.BaggingRegressor(\n n_estimators=10,\n max_samples=1.0,\n max_features=1.0,\n bootstrap=True,\n bootstrap_features=False,\n oob_score=False,\n verbose=0,\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"bagged_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"bagged_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_bagged_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for gradient-boosted tree regression with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. Note: In the gradient- #\n# boosted trees ensemble used, the weak learners used as #\n# estimators cannot be tuned with the same level of fidelity #\n# allowed in the adaptive-boosted trees ensemble. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.GradientBoostingRegressor(\n loss=\"ls\",\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion=\"friedman_mse\",\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n max_features=None,\n alpha=0.9,\n verbose=0,\n max_leaf_nodes=None,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees regression #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBRegressor(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='reg:squarederror',\n eval_metric='rmse')\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_regression\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_regression\")\n\n # Make some predictions and unscale\n predictions = model.predict(descriptors)\n predictions = predictions.reshape(-1, 1)\n target_scaler = context.load(\"target_scaler\")\n\n predictions = target_scaler.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\")\n","name":"model_extreme_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for k-means clustering. #\n# #\n# In k-means clustering, the labels are not provided ahead of #\n# time. Instead, one supplies the number of groups the #\n# algorithm should split the dataset into. Here, we set our #\n# own default of 4 groups (fewer than sklearn's default of 8). #\n# Otherwise, the default parameters of the clustering method #\n# are the same as in sklearn. #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.cluster\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Initialize the Model\n model = sklearn.cluster.KMeans(\n n_clusters=4,\n init=\"k-means++\",\n n_init=10,\n max_iter=300,\n tol=0.0001,\n copy_x=True,\n algorithm=\"auto\",\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors)\n context.save(model, \"k_means\")\n train_labels = model.predict(train_descriptors)\n test_labels = model.predict(test_descriptors)\n\n context.save(train_labels, \"train_labels\")\n context.save(test_labels, \"test_labels\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"k_means\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_k_means_clustering_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a kernelized ridge-regression model with #\n# Scikit-Learn. Model parameters are derived from Scikit- #\n# Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.kernel_ridge\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.kernel_ridge.KernelRidge(\n alpha=1.0,\n kernel=\"linear\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"kernel_ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"kernel_ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_kernel_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a LASSO-regression model with Scikit- #\n# Learn. Model parameters derived from Scikit-Learn's #\n# Defaults. Alpha has been lowered from the default of 1.0, to #\n# 0.1. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Lasso(\n alpha=0.1,\n fit_intercept=True,\n normalize=False,\n precompute=False,\n tol=0.0001,\n positive=True,\n selection=\"cyclic\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"LASSO\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"LASSO\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_lasso_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit to train a simple feedforward neural network #\n# model on a regression problem using scikit-learn. In this #\n# template, we use the default values for hidden_layer_sizes, #\n# activation, solver, and learning rate. Other parameters are #\n# available (consult the sklearn docs), but in this case, we #\n# only include those relevant to the Adam optimizer. Sklearn #\n# Docs: Sklearn docs:http://scikit-learn.org/stable/modules/ge #\n# nerated/sklearn.neural_network.MLPRegressor.html #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.neural_network\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.neural_network.MLPRegressor(\n hidden_layer_sizes=(100,),\n activation=\"relu\",\n solver=\"adam\",\n max_iter=300,\n early_stopping=False,\n validation_fraction=0.1,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"multilayer_perceptron\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"multilayer_perceptron\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_mlp_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a random forest classification model with #\n# Scikit-Learn. Parameters derived from Scikit-Learn's #\n# defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. When #\n# the workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a filee named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestClassifier(\n n_estimators=100,\n criterion=\"gini\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n oob_score=False,\n verbose=0,\n class_weight=None,\n ccp_alpha=0.0,\n max_samples=None,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target, test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for a gradient boosted classification model with #\n# Scikit-Learn. Parameters derived from sklearn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = sklearn.ensemble.GradientBoostingClassifier(loss='deviance',\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion='friedman_mse',\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n min_impurity_split=None,\n init=None,\n random_state=None,\n max_features=None,\n verbose=0,\n max_leaf_nodes=None,\n warm_start=False,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees classification #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBClassifier(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='binary:logistic',\n eval_metric='logloss',\n use_label_encoder=False)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_extreme_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow for a random forest regression model with Scikit- #\n# Learn. Parameters are derived from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestRegressor(\n n_estimators=100,\n criterion=\"mse\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n max_samples=None,\n oob_score=False,\n ccp_alpha=0.0,\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge regression model with Scikit- #\n# Learn. Alpha is taken from Scikit-Learn's default #\n# parameters. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Ridge(\n alpha=1.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Parity plot generation unit #\n# #\n# This unit generates a parity plot based on the known values #\n# in the training data, and the predicted values generated #\n# using the training data. #\n# #\n# Because this metric compares predictions versus a ground truth, #\n# it doesn't make sense to generate the plot when a predict #\n# workflow is being run (because in that case, we generally don't #\n# know the ground truth for the values being predicted). Hence, #\n# this unit does nothing if the workflow is in \"predict\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_predictions = context.load(\"train_predictions\")\n test_target = context.load(\"test_target\")\n test_predictions = context.load(\"test_predictions\")\n\n # Un-transform the data\n target_scaler = context.load(\"target_scaler\")\n train_target = target_scaler.inverse_transform(train_target)\n train_predictions = target_scaler.inverse_transform(train_predictions)\n test_target = target_scaler.inverse_transform(test_target)\n test_predictions = target_scaler.inverse_transform(test_predictions)\n\n # Plot the data\n plt.scatter(train_target, train_predictions, c=\"#203d78\", label=\"Training Set\")\n if settings.is_using_train_test_split:\n plt.scatter(test_target, test_predictions, c=\"#67ac5b\", label=\"Testing Set\")\n plt.xlabel(\"Actual Value\")\n plt.ylabel(\"Predicted Value\")\n\n # Scale the plot\n target_range = (min(min(train_target), min(test_target)),\n max(max(train_target), max(test_target)))\n predictions_range = (min(min(train_predictions), min(test_predictions)),\n max(max(train_predictions), max(test_predictions)))\n\n limits = (min(min(target_range), min(target_range)),\n max(max(predictions_range), max(predictions_range)))\n plt.xlim = (limits[0], limits[1])\n plt.ylim = (limits[0], limits[1])\n\n # Draw a parity line, as a guide to the eye\n plt.plot((limits[0], limits[1]), (limits[0], limits[1]), c=\"black\", linestyle=\"dotted\", label=\"Parity\")\n plt.legend()\n\n # Save the figure\n plt.tight_layout()\n plt.savefig(\"my_parity_plot.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_parity_plot_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Cluster Visualization #\n# #\n# This unit takes an N-dimensional feature space, and uses #\n# Principal-component Analysis (PCA) to project into a 2D space #\n# to facilitate plotting on a scatter plot. #\n# #\n# The 2D space we project into are the first two principal #\n# components identified in PCA, which are the two vectors with #\n# the highest variance. #\n# #\n# Wikipedia Article on PCA: #\n# https://en.wikipedia.org/wiki/Principal_component_analysis #\n# #\n# We then plot the labels assigned to the train an test set, #\n# and color by class. #\n# #\n# ----------------------------------------------------------------- #\n\nimport pandas as pd\nimport matplotlib.cm\nimport matplotlib.lines\nimport matplotlib.pyplot as plt\nimport sklearn.decomposition\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_labels = context.load(\"train_labels\")\n train_descriptors = context.load(\"train_descriptors\")\n test_labels = context.load(\"test_labels\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Unscale the descriptors\n descriptor_scaler = context.load(\"descriptor_scaler\")\n train_descriptors = descriptor_scaler.inverse_transform(train_descriptors)\n test_descriptors = descriptor_scaler.inverse_transform(test_descriptors)\n\n # We need at least 2 dimensions, exit if the dataset is 1D\n if train_descriptors.ndim < 2:\n raise ValueError(\"The train descriptors do not have enough dimensions to be plot in 2D\")\n\n # The data could be multidimensional. Let's do some PCA to get things into 2 dimensions.\n pca = sklearn.decomposition.PCA(n_components=2)\n train_descriptors = pca.fit_transform(train_descriptors)\n test_descriptors = pca.transform(test_descriptors)\n xlabel = \"Principle Component 1\"\n ylabel = \"Principle Component 2\"\n\n # Determine the labels we're going to be using, and generate their colors\n labels = set(train_labels)\n colors = {}\n for count, label in enumerate(labels):\n cm = matplotlib.cm.get_cmap('jet', len(labels))\n color = cm(count / len(labels))\n colors[label] = color\n train_colors = [colors[label] for label in train_labels]\n test_colors = [colors[label] for label in test_labels]\n\n # Train / Test Split Visualization\n plt.title(\"Train Test Split Visualization\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=\"#33548c\", marker=\"o\", label=\"Training Set\")\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=\"#F0B332\", marker=\"o\", label=\"Testing Set\")\n xmin, xmax, ymin, ymax = plt.axis()\n plt.legend()\n plt.tight_layout()\n plt.savefig(\"train_test_split.png\", dpi=600)\n plt.close()\n\n def clusters_legend(cluster_colors):\n \"\"\"\n Helper function that creates a legend, given the coloration by clusters.\n Args:\n cluster_colors: A dictionary of the form {cluster_number : color_value}\n\n Returns:\n None; just creates the legend and puts it on the plot\n \"\"\"\n legend_symbols = []\n for group, color in cluster_colors.items():\n label = f\"Cluster {group}\"\n legend_symbols.append(matplotlib.lines.Line2D([], [], color=color, marker=\"o\",\n linewidth=0, label=label))\n plt.legend(handles=legend_symbols)\n\n # Training Set Clusters\n plt.title(\"Training Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=train_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"train_clusters.png\", dpi=600)\n plt.close()\n\n # Testing Set Clusters\n plt.title(\"Testing Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=test_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"test_clusters.png\", dpi=600)\n plt.close()\n\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_pca_2d_clusters_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# ROC Curve Generator #\n# #\n# Computes and displays the Receiver Operating Characteristic #\n# (ROC) curve. This is restricted to binary classification tasks. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\nimport matplotlib.collections\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n test_target = context.load(\"test_target\").flatten()\n # Slice the first column because Sklearn's ROC curve prefers probabilities for the positive class\n test_probabilities = context.load(\"test_probabilities\")[:, 1]\n\n # Exit if there's more than one label in the predictions\n if len(set(test_target)) > 2:\n exit()\n\n # ROC curve function in sklearn prefers the positive class\n false_positive_rate, true_positive_rate, thresholds = sklearn.metrics.roc_curve(test_target, test_probabilities,\n pos_label=1)\n thresholds[0] -= 1 # Sklearn arbitrarily adds 1 to the first threshold\n roc_auc = np.round(sklearn.metrics.auc(false_positive_rate, true_positive_rate), 3)\n\n # Plot the curve\n fig, ax = plt.subplots()\n points = np.array([false_positive_rate, true_positive_rate]).T.reshape(-1, 1, 2)\n segments = np.concatenate([points[:-1], points[1:]], axis=1)\n norm = plt.Normalize(thresholds.min(), thresholds.max())\n lc = matplotlib.collections.LineCollection(segments, cmap='jet', norm=norm, linewidths=2)\n lc.set_array(thresholds)\n line = ax.add_collection(lc)\n fig.colorbar(line, ax=ax).set_label('Threshold')\n\n # Padding to ensure we see the line\n ax.margins(0.01)\n\n plt.title(f\"ROC curve, AUC={roc_auc}\")\n plt.xlabel(\"False Positive Rate\")\n plt.ylabel(\"True Positive Rate\")\n plt.tight_layout()\n plt.savefig(\"my_roc_curve.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_roc_curve_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"#!/bin/bash\n# ---------------------------------------------------------------- #\n# #\n# Example shell script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. shebang line is read from the first line above #\n# 2. based on shebang one of the shell types is selected: #\n# - /bin/bash #\n# - /bin/csh #\n# - /bin/tclsh #\n# - /bin/tcsh #\n# - /bin/zsh #\n# 3. runtime directory for this calculation is created #\n# 4. the content of the script is executed #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\necho \"Hello world!\"\n","name":"hello_world.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ---------------------------------------------------------------- #\n# #\n# Example job submission script for Exabyte.io platform #\n# #\n# Shows resource manager directives for: #\n# #\n# 1. the name of the job (-N) #\n# 2. the number of nodes to be used (-l nodes=) #\n# 3. the number of processors per node (-l ppn=) #\n# 4. the walltime in dd:hh:mm:ss format (-l walltime=) #\n# 5. queue (-q) D, OR, OF, SR, SF #\n# 6. merging standard output and error (-j oe) #\n# 7. email about job abort, begin, end (-m abe) #\n# 8. email address to use (-M) #\n# #\n# For more information visit https://docs.exabyte.io/cli/jobs #\n# ---------------------------------------------------------------- #\n\n#PBS -N ESPRESSO-TEST\n#PBS -j oe\n#PBS -l nodes=1\n#PBS -l ppn=1\n#PBS -l walltime=00:00:10:00\n#PBS -q D\n#PBS -m abe\n#PBS -M info@exabyte.io\n\n# load module\nmodule add espresso/540-i-174-impi-044\n\n# go to the job working directory\ncd $PBS_O_WORKDIR\n\n# create input file\ncat > pw.in < pw.out\n","name":"job_espresso_pw_scf.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\nmkdir -p {{ JOB_SCRATCH_DIR }}/outdir/_ph0\ncd {{ JOB_SCRATCH_DIR }}/outdir\ncp -r {{ JOB_WORK_DIR }}/../outdir/__prefix__.* .\n{%- endraw -%}\n","name":"espresso_link_outdir_save.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\ncp {{ JOB_SCRATCH_DIR }}/outdir/_ph0/__prefix__.phsave/dynmat* {{ JOB_WORK_DIR }}/../outdir/_ph0/__prefix__.phsave\n{%- endraw -%}\n","name":"espresso_collect_dynmat.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ------------------------------------------------------------------ #\n# This script prepares necessary directories to run VASP NEB\n# calculation. It puts initial POSCAR into directory 00, final into 0N\n# and intermediate images in 01 to 0(N-1). It is assumed that SCF\n# calculations for initial and final structures are already done in\n# previous subworkflows and their standard outputs are written into\n# \"vasp_neb_initial.out\" and \"vasp_neb_final.out\" files respectively.\n# These outputs are here copied into initial (00) and final (0N)\n# directories to calculate the reaction energy profile.\n# ------------------------------------------------------------------ #\n\n{% raw -%}\ncd {{ JOB_WORK_DIR }}\n{%- endraw %}\n\n# Prepare First Directory\nmkdir -p 00\ncat > 00/POSCAR < 0{{ input.INTERMEDIATE_IMAGES.length + 1 }}/POSCAR < 0{{ loop.index }}/POSCAR < Date: Tue, 9 Jan 2024 23:18:08 +0800 Subject: [PATCH 12/20] chore(SOF-7194): rename structure.lmp -> system.lmp --- assets/deepmd/in.lammps | 4 ++-- assets/deepmd/{structure.lmp => system.lmp} | 0 executables/deepmd/dp_lmp.yml | 2 +- src/js/data/templates.js | 2 +- src/js/data/tree.js | 2 +- templates/deepmd/dp_lmp.yml | 4 ++-- 6 files changed, 7 insertions(+), 7 deletions(-) rename assets/deepmd/{structure.lmp => system.lmp} (100%) diff --git a/assets/deepmd/in.lammps b/assets/deepmd/in.lammps index 01aa2ce2..c51c63df 100644 --- a/assets/deepmd/in.lammps +++ b/assets/deepmd/in.lammps @@ -7,7 +7,7 @@ atom_style atomic neighbor 2.0 bin neigh_modify every 10 delay 0 check no -read_data structure.lmp +read_data system.lmp mass 1 16 mass 2 2 @@ -20,6 +20,6 @@ fix 1 all nvt temp 330.0 330.0 0.5 timestep 0.0005 thermo_style custom step pe ke etotal temp press vol thermo 100 -dump 1 all custom 100 structure.dump id type x y z +dump 1 all custom 100 system.dump id type x y z run 100 diff --git a/assets/deepmd/structure.lmp b/assets/deepmd/system.lmp similarity index 100% rename from assets/deepmd/structure.lmp rename to assets/deepmd/system.lmp diff --git a/executables/deepmd/dp_lmp.yml b/executables/deepmd/dp_lmp.yml index 1a2e71ca..3c7e5909 100644 --- a/executables/deepmd/dp_lmp.yml +++ b/executables/deepmd/dp_lmp.yml @@ -6,7 +6,7 @@ flavors: lammps: input: - name: in.lammps - - name: structure.lmp + - name: system.lmp results: [] monitors: - standard_output diff --git a/src/js/data/templates.js b/src/js/data/templates.js index 8b0ff686..065f4549 100644 --- a/src/js/data/templates.js +++ b/src/js/data/templates.js @@ -1,2 +1,2 @@ /* eslint-disable */ -module.exports = {allTemplates: [{"content":"# LAMMPS input, deepmd-kit version. Built from bulk water calculation.\n\nunits metal\nboundary p p p\natom_style atomic\n\nneighbor 2.0 bin\nneigh_modify every 10 delay 0 check no\n\nread_data structure.lmp\nmass 1 16\nmass 2 2\n\npair_style\t deepmd ./graph.pb\npair_coeff * *\n\nvelocity all create 330.0 23456789\n\nfix 1 all nvt temp 330.0 330.0 0.5\ntimestep 0.0005\nthermo_style custom step pe ke etotal temp press vol\nthermo 100\ndump 1 all custom 100 structure.dump id type x y z\n\nrun 100\n","name":"in.lammps","contextProviders":[],"applicationName":"deepmd","executableName":"dp_lmp"},{"content":"# LAMMPS data\n192 atoms\n2 atom types\n0.0 12.44470 xlo xhi\n0.0 12.44470 ylo yhi\n0.0 12.44470 zlo zhi\n0.0 0.0 0.0 xy xz yz\n\nAtoms # metal\n\n1\t1\t11.83 2.56 2.18\n2\t1\t5.36 6 1.81\n3\t1\t12.17 7.34 2.71\n4\t1\t3.38 4.28 4.84\n5\t1\t4.54 11.73 11.9\n6\t1\t7.34 0.18 2.18\n7\t1\t5.73 9.9 10.31\n8\t1\t5.32 11.19 1.83\n9\t1\t9.39 11.14 1.1\n10\t1\t12.06 0.64 8.92\n11\t1\t5.01 7.72 11.74\n12\t1\t3.64 3.19 1.08\n13\t1\t11.88 10.9 4.7\n14\t1\t3.88 8.05 2.51\n15\t1\t9.82 9.16 10.18\n16\t1\t11 5.31 5.17\n17\t1\t0.97 4.12 0.84\n18\t1\t1.24 5.99 4.76\n19\t1\t10.1 9.09 3.32\n20\t1\t8.39 5.72 10.39\n21\t1\t8.06 4.72 1.3\n22\t1\t3.23 3.34 8.26\n23\t1\t5.02 6.21 7.29\n24\t1\t3.94 10.55 3.95\n25\t1\t6.25 4.56 4.21\n26\t1\t12.18 2.35 11.11\n27\t1\t6.76 8.57 4.07\n28\t1\t5.87 5.84 9.79\n29\t1\t2.18 6.91 6.99\n30\t1\t10.89 7.69 6.78\n31\t1\t8.03 4.28 6.57\n32\t1\t5.29 1.92 11.56\n33\t1\t1.53 12.16 3.45\n34\t1\t2.04 5.49 9.71\n35\t1\t6.66 9.05 1.21\n36\t1\t11.75 6.18 11.01\n37\t1\t6.11 3.07 7.96\n38\t1\t8.22 1.38 9.43\n39\t1\t1.31 2.02 7.09\n40\t1\t9.48 2.11 5.92\n41\t1\t9.33 3.34 10.97\n42\t1\t1.31 8.49 0.81\n43\t1\t11.25 3.48 7.51\n44\t1\t11.7 0.33 0.6\n45\t1\t7.63 2.04 0.28\n46\t1\t8.86 9.52 7.4\n47\t1\t2.01 11.54 6.89\n48\t1\t8.02 11.27 4.66\n49\t1\t5.82 9.18 7.71\n50\t1\t8.11 11.19 11.22\n51\t1\t5.94 0.84 5.91\n52\t1\t1.88 11.66 0.64\n53\t1\t4.66 11.28 6.73\n54\t1\t10.32 6.26 0.89\n55\t1\t2.01 11.07 10.33\n56\t1\t5.36 2.06 3.14\n57\t1\t8.56 7.59 12.17\n58\t1\t10.51 5.81 8.66\n59\t1\t2.37 6.23 12.1\n60\t1\t0.3 8.77 10.05\n61\t1\t9.47 12.13 7.57\n62\t1\t1.41 2.32 4.17\n63\t1\t9.69 3.69 3.56\n64\t1\t0.75 9.22 7.39\n65\t2\t11.09 2.87 2.74\n66\t2\t5.51 5.51 2.6\n67\t2\t0.24 6.8 3.29\n68\t2\t4.28 4.19 4.46\n69\t2\t3.63 11.56 11.76\n70\t2\t6.53 12.07 1.99\n71\t2\t5.37 9.14 10.85\n72\t2\t5.74 10.31 1.64\n73\t2\t8.71 11.07 0.41\n74\t2\t0.27 1.04 8.27\n75\t2\t5.46 7.08 11.15\n76\t2\t3.84 4.17 1.06\n77\t2\t11.55 10.19 4.15\n78\t2\t3 8.17 2.14\n79\t2\t9.53 10.04 10.56\n80\t2\t11.18 4.84 6.05\n81\t2\t0.68 3.76 12.44\n82\t2\t0.43 5.51 4.88\n83\t2\t9.51 9.48 3.94\n84\t2\t9.08 5.89 9.68\n85\t2\t7.92 3.83 0.8\n86\t2\t4.23 3.22 8.21\n87\t2\t5.27 5.9 8.19\n88\t2\t4.44 10.9 3.14\n89\t2\t6.93 4.56 4.89\n90\t2\t11.88 1.61 11.75\n91\t2\t6.79 8.54 3.09\n92\t2\t5.62 4.99 10.13\n93\t2\t3.09 6.66 6.98\n94\t2\t11.73 8.22 6.86\n95\t2\t8.4 3.45 6.18\n96\t2\t4.64 2.37 12.16\n97\t2\t0.74 11.69 3.86\n98\t2\t1.1 5.3 9.72\n99\t2\t7.54 9.11 0.83\n100\t2\t11.36 6.2 11.9\n101\t2\t5.85 2.21 7.53\n102\t2\t8.81 2.01 9.89\n103\t2\t2.17 2.43 7.46\n104\t2\t10.1 2.53 6.55\n105\t2\t9.07 4.29 10.91\n106\t2\t0.64 8.13 1.45\n107\t2\t11.12 4.2 8.14\n108\t2\t11.03 12.03 0.58\n109\t2\t6.83 1.84 12.19\n110\t2\t9.13 10.45 7.25\n111\t2\t1.76 12.44 6.8\n112\t2\t7.37 11.88 5.08\n113\t2\t5.59 8.29 7.44\n114\t2\t8.14 12.12 10.86\n115\t2\t5.48 0.06 6.32\n116\t2\t0.99 11.99 0.41\n117\t2\t4.54 10.95 5.79\n118\t2\t10.94 6.29 1.65\n119\t2\t2.52 10.55 9.76\n120\t2\t4.74 2.41 2.42\n121\t2\t8.84 8.27 11.56\n122\t2\t10.89 6.06 9.58\n123\t2\t2.15 5.99 11.18\n124\t2\t0.49 9.06 9.13\n125\t2\t8.69 12.34 8.15\n126\t2\t1.76 1.44 4.07\n127\t2\t10.05 4.44 4.1\n128\t2\t1.44 8.49 7.32\n129\t2\t12.25 3.32 1.68\n130\t2\t6.27 6.22 1.56\n131\t2\t11.5 7.85 3.14\n132\t2\t2.96 3.44 4.61\n133\t2\t5.04 11.17 11.3\n134\t2\t7.6 12.39 3.03\n135\t2\t5.94 9.68 9.37\n136\t2\t4.93 11.46 0.96\n137\t2\t8.92 11.68 1.69\n138\t2\t12.07 1.36 9.54\n139\t2\t4.17 7.28 11.8\n140\t2\t2.67 3.09 1.05\n141\t2\t11.89 10.59 5.56\n142\t2\t4.27 7.23 2.16\n143\t2\t10.8 9.22 10.18\n144\t2\t10.68 6.2 5.44\n145\t2\t1.51 4.9 0.53\n146\t2\t1.94 5.36 4.61\n147\t2\t10.02 9.64 2.47\n148\t2\t8.41 6.43 11.04\n149\t2\t8.58 4.29 2.01\n150\t2\t3.12 4.28 8.41\n151\t2\t5.12 5.54 6.6\n152\t2\t3.97 9.58 3.89\n153\t2\t6.17 3.63 3.9\n154\t2\t11.4 2.92 10.99\n155\t2\t5.96 8.02 4.26\n156\t2\t6.81 5.9 9.83\n157\t2\t1.83 6.59 6.16\n158\t2\t10.19 8.36 6.88\n159\t2\t8.74 4.67 7.12\n160\t2\t4.84 1.08 11.38\n161\t2\t2.38 11.71 3.7\n162\t2\t2.02 6.1 8.92\n163\t2\t6.05 8.48 0.61\n164\t2\t12.12 7.05 10.81\n165\t2\t6.81 3.46 7.31\n166\t2\t7.52 1.99 9.1\n167\t2\t1.25 2.26 6.18\n168\t2\t9.31 1.19 6.35\n169\t2\t8.84 2.9 11.69\n170\t2\t1.39 9.44 0.85\n171\t2\t12.13 3.22 7.5\n172\t2\t11.38 0.95 1.34\n173\t2\t7.43 1.82 1.24\n174\t2\t9.15 9.43 8.28\n175\t2\t2.97 11.66 6.88\n176\t2\t7.5 10.43 4.47\n177\t2\t6.69 9.4 7.47\n178\t2\t7.29 10.77 10.93\n179\t2\t5.45 1.07 5.12\n180\t2\t1.92 11.57 1.62\n181\t2\t5.05 10.55 7.26\n182\t2\t9.73 5.52 1.11\n183\t2\t1.73 11.85 9.82\n184\t2\t6.02 1.57 2.67\n185\t2\t9.34 7.2 0.21\n186\t2\t10.71 6.56 7.97\n187\t2\t1.86 7.03 12.37\n188\t2\t1.08 9.16 10.59\n189\t2\t10.19 12.39 8.15\n190\t2\t0.92 2.56 3.28\n191\t2\t9.34 3.01 4.17\n192\t2\t1.11 10.09 7.21\n","name":"structure.lmp","contextProviders":[],"applicationName":"deepmd","executableName":"dp_lmp"},{"content":"import dpdata\nimport numpy as np\n\n# https://docs.deepmodeling.com/projects/dpdata/en/master/formats/QECPTrajFormat.html\ndata = dpdata.LabeledSystem(\"cp\", fmt=\"qe/cp/traj\")\nprint(\"Dataset contains total {0} frames\".format(len(data)))\n\n# randomly choose 20% index for validation_data\nsize = len(data)\nsize_validation = round(size * 0.2)\n\nindex_validation = np.random.choice(size, size=size_validation, replace=False)\nindex_training = list(set(range(size)) - set(index_validation))\n\ndata_training = data.sub_system(index_training)\ndata_validation = data.sub_system(index_validation)\n\nprint(\"Using {0} frames as training set\".format(len(data_training)))\nprint(\"Using {0} frames as validation set\".format(len(data_validation)))\n\n# save training and validation sets\ndata_training.to_deepmd_npy(\"./training\")\ndata_validation.to_deepmd_npy(\"./validation\")\n","name":"qe_cp_to_deepmd.py","contextProviders":[],"applicationName":"deepmd","executableName":"dp_prepare"},{"content":"{\n \"model\": {\n \"type_map\": [\n \"O\",\n \"H\"\n ],\n \"descriptor\": {\n \"type\": \"se_e2_r\",\n \"sel\": [\n 23,\n 46\n ],\n \"rcut_smth\": 0.50,\n \"rcut\": 5.00,\n \"neuron\": [\n 5,\n 5,\n 5\n ],\n \"resnet_dt\": false,\n \"seed\": 1\n },\n \"fitting_net\": {\n \"neuron\": [\n 60,\n 60,\n 60\n ],\n \"resnet_dt\": true,\n \"seed\": 1\n }\n },\n \"learning_rate\": {\n \"type\": \"exp\",\n \"decay_steps\": 1000,\n \"start_lr\": 0.005,\n \"stop_lr\": 3.51e-6\n },\n \"loss\": {\n \"start_pref_e\": 0.02,\n \"limit_pref_e\": 1,\n \"start_pref_f\": 1000,\n \"limit_pref_f\": 1,\n \"start_pref_v\": 0,\n \"limit_pref_v\": 0\n },\n \"training\": {\n \"training_data\": {\n \"systems\": [\n \"./training/\"\n ],\n \"batch_size\": \"auto\"\n },\n \"validation_data\": {\n \"systems\": [\n \"./validation/\"\n ],\n \"batch_size\": \"auto\"\n },\n \"numb_steps\": 301,\n \"seed\": 1,\n \"disp_file\": \"lcurve.out\",\n \"disp_freq\": 10,\n \"numb_test\": 1,\n \"save_freq\": 100\n }\n}\n","name":"dp_train_se_e2_r.json","contextProviders":[],"applicationName":"deepmd","executableName":"dp_train"},{"content":"1\npp.dat\n1.0\n3000\n3\n3.0000\n","name":"average.in","contextProviders":[],"applicationName":"espresso","executableName":"average.x"},{"content":"&BANDS\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n filband = {% raw %}'{{ JOB_WORK_DIR }}/bands.dat'{% endraw %}\n no_overlap = .true.\n/\n\n","name":"bands.in","contextProviders":[],"applicationName":"espresso","executableName":"bands.x"},{"content":"&DOS\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n degauss = 0.01\n/\n\n","name":"dos.in","contextProviders":[],"applicationName":"espresso","executableName":"dos.x"},{"content":"&INPUT\n asr = 'simple'\n flfrc = 'force_constants.fc'\n flfrq = 'frequencies.freq'\n dos = .true.\n fldos = 'phonon_dos.out'\n deltaE = 1.d0\n {% for d in igrid.dimensions -%}\n nk{{loop.index}} = {{d}}\n {% endfor %}\n /\n","name":"dynmat_grid.in","contextProviders":[{"name":"IGridFormDataManager"}],"applicationName":"espresso","executableName":"dynmat.x"},{"content":"&gw_input\n\n ! see http://www.sternheimergw.org for more information.\n\n ! config of the scf run\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n\n ! the grid used for the linear response\n kpt_grid = {{ kgrid.dimensions|join(', ') }}\n qpt_grid = {{ qgrid.dimensions|join(', ') }}\n\n ! truncation (used for both correlation and exchange)\n truncation = '2d'\n\n ! number of bands for which the GW correction is calculated\n num_band = 8\n\n ! configuration of the Coulomb solver\n thres_coul = 1.0d-2\n\n ! configuration of W in the convolution\n model_coul = 'godby-needs'\n max_freq_coul = 120\n num_freq_coul = 35\n\n ! configuration of the Green solver\n thres_green = 1.0d-3\n max_iter_green = 300\n\n ! configuration for the correlation self energy\n ecut_corr = 5.0\n max_freq_corr = 100.0\n num_freq_corr = 11\n\n ! configuration for the exchange self energy\n ecut_exch = 20.0\n\n ! configuration for the output\n eta = 0.1\n min_freq_wind = -30.0\n max_freq_wind = 30.0\n num_freq_wind = 601\n/\n\n&gw_output\n/\n\nFREQUENCIES\n2\n 0.0 0.0\n 0.0 10.0\n/\n\nK_points\n{{ explicitKPath2PIBA.length }}\n{% for point in explicitKPath2PIBA -%}\n{% for coordinate in point.coordinates %}{{ coordinate }}{% endfor %}\n{% endfor %}\n/\n\n","name":"gw_bands_plasmon_pole.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QGridFormDataManager"},{"name":"ExplicitKPath2PIBAFormDataManager"}],"applicationName":"espresso","executableName":"gw.x"},{"content":"&gw_input\n\n ! see http://www.sternheimergw.org for more information.\n\n ! config of the scf run\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n\n ! the grid used for the linear response\n kpt_grid = {{ kgrid.dimensions|join(', ') }}\n qpt_grid = {{ qgrid.dimensions|join(', ') }}\n\n ! number of bands for which the GW correction is calculated\n num_band = 8\n\n ! configuration of W in the convolution\n max_freq_coul = 200\n num_freq_coul = 51\n\n ! configuration for the correlation self energy\n ecut_corr = 6.0\n\n ! configuration for the exchange self energy\n ecut_exch = 15.0\n/\n\n&gw_output\n/\n\nFREQUENCIES\n35\n 0.0 0.0\n 0.0 0.3\n 0.0 0.9\n 0.0 1.8\n 0.0 3.0\n 0.0 4.5\n 0.0 6.3\n 0.0 8.4\n 0.0 10.8\n 0.0 13.5\n 0.0 16.5\n 0.0 19.8\n 0.0 23.4\n 0.0 27.3\n 0.0 31.5\n 0.0 36.0\n 0.0 40.8\n 0.0 45.9\n 0.0 51.3\n 0.0 57.0\n 0.0 63.0\n 0.0 69.3\n 0.0 75.9\n 0.0 82.8\n 0.0 90.0\n 0.0 97.5\n 0.0 105.3\n 0.0 113.4\n 0.0 121.8\n 0.0 130.5\n 0.0 139.5\n 0.0 148.8\n 0.0 158.4\n 0.0 168.3\n 0.0 178.5\n/\n\nK_points\n{{ explicitKPath2PIBA.length }}\n{% for point in explicitKPath2PIBA -%}\n{% for coordinate in point.coordinates %}{{ coordinate }}{% endfor %}\n{% endfor %}\n/\n\n","name":"gw_bands_full_frequency.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QGridFormDataManager"},{"name":"ExplicitKPath2PIBAFormDataManager"}],"applicationName":"espresso","executableName":"gw.x"},{"content":"&INPUT\n asr = 'simple'\n flfrc = 'force_constants.fc'\n flfrq = 'frequencies.freq'\n dos = .true.\n fldos = 'phonon_dos.out'\n deltaE = 1.d0\n {% for d in igrid.dimensions -%}\n nk{{loop.index}} = {{d}}\n {% endfor %}\n /\n","name":"matdyn_grid.in","contextProviders":[{"name":"IGridFormDataManager"}],"applicationName":"espresso","executableName":"matdyn.x"},{"content":"&INPUT\n asr = 'simple'\n flfrc ='force_constants.fc'\n flfrq ='frequencies.freq'\n flvec ='normal_modes.out'\n q_in_band_form = .true.\n /\n{{ipath.length}}\n{% for point in ipath -%}\n{% for d in point.coordinates %}{{d}} {% endfor -%}{{point.steps}}\n{% endfor %}\n","name":"matdyn_path.in","contextProviders":[{"name":"IPathFormDataManager"}],"applicationName":"espresso","executableName":"matdyn.x"},{"content":"BEGIN\nBEGIN_PATH_INPUT\n&PATH\n restart_mode = 'from_scratch'\n string_method = 'neb',\n nstep_path = 50,\n ds = 2.D0,\n opt_scheme = \"broyden\",\n num_of_images = {{ 2 + (input.INTERMEDIATE_IMAGES.length || neb.nImages) }},\n k_max = 0.3D0,\n k_min = 0.2D0,\n CI_scheme = \"auto\",\n path_thr = 0.1D0,\n/\nEND_PATH_INPUT\nBEGIN_ENGINE_INPUT\n&CONTROL\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.03\n nspin = 2\n starting_magnetization = 0.5\n/\n&ELECTRONS\n conv_thr = 1.D-8\n mixing_beta = 0.3\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nBEGIN_POSITIONS\nFIRST_IMAGE\nATOMIC_POSITIONS crystal\n{{ input.FIRST_IMAGE }}\n{%- for IMAGE in input.INTERMEDIATE_IMAGES %}\nINTERMEDIATE_IMAGE\nATOMIC_POSITIONS crystal\n{{ IMAGE }}\n{%- endfor %}\nLAST_IMAGE\nATOMIC_POSITIONS crystal\n{{ input.LAST_IMAGE }}\nEND_POSITIONS\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nEND_ENGINE_INPUT\nEND\n","name":"neb.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"NEBFormDataManager"},{"name":"QENEBInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"neb.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-12\n asr = .true.\n search_sym = .false.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n fildyn = 'dyn'\n ldisp = .true.\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_grid.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-12\n asr = .true.\n search_sym = .false.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n/\n{% for point in qpath -%}\n{% for d in point.coordinates %}{{d}} {% endfor %}\n{% endfor %}\n","name":"ph_path.in","contextProviders":[{"name":"QPathFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-12\n asr = .true.\n search_sym = .false.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n/\n0 0 0\n","name":"ph_gamma.in","contextProviders":[],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-18,\n recover = .false.\n start_irr = 0\n last_irr = 0\n ldisp = .true.\n fildyn = 'dyn0'\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_init_qpoints.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-18,\n recover = .true.\n ldisp = .true.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n fildyn = 'dyn'\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_grid_restart.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-18\n ldisp = .true.\n {% raw -%}\n start_q = {{MAP_DATA.qpoint}}\n last_q = {{MAP_DATA.qpoint}}\n start_irr = {{MAP_DATA.irr}}\n last_irr= {{MAP_DATA.irr}}\n {%- endraw %}\n recover = .true.\n fildyn = 'dyn'\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_SCRATCH_DIR }}/outdir'{% endraw %}\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_single_irr_qpt.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPP\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n filplot = 'pp.dat'\n plot_num = 0\n/\n&PLOT\n iflag = 3\n output_format = 5\n fileout ='density.xsf'\n/\n\n","name":"pp_density.in","contextProviders":[],"applicationName":"espresso","executableName":"pp.x"},{"content":"&INPUTPP\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n filplot = 'pp.dat'\n plot_num = 11\n/\n","name":"pp_electrostatic_potential.in","contextProviders":[],"applicationName":"espresso","executableName":"pp.x"},{"content":"&PROJWFC\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n degauss = 0.01\n deltaE = 0.05\n/\n","name":"projwfc.in","contextProviders":[],"applicationName":"espresso","executableName":"projwfc.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_scf.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n input_dft = 'hse',\n {% for d in qgrid.dimensions -%}\n nqx{{loop.index}} = {{d}}\n {% endfor %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS crystal\n{{ '{{' }} {{ explicitKPath.length }} {% raw %} + KPOINTS|length }} {% endraw %}\n{%- raw %}\n{% for point in KPOINTS -%}\n {% for d in point.coordinates %}{{ \"%14.9f\"|format(d) }} {% endfor -%}{{ point.weight }}\n{% endfor %}\n{% endraw -%}\n{% for point in explicitKPath -%}\n{% for d in point.coordinates %}{{d}} {% endfor -%}0.0000001\n{% endfor %}\n","name":"pw_scf_bands_hse.in","contextProviders":[{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"QGridFormDataManager"},{"name":"ExplicitKPathFormDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n assume_isolated = 'esm'\n esm_bc = '{{ boundaryConditions.type }}'\n fcp_mu = {{ boundaryConditions.targetFermiEnergy }}\n esm_w = {{ boundaryConditions.offset }}\n esm_efield = {{ boundaryConditions.electricField }}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_esm.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"BoundaryConditionsFormDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'relax'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n assume_isolated = 'esm'\n esm_bc = '{{ boundaryConditions.type }}'\n fcp_mu = {{ boundaryConditions.targetFermiEnergy }}\n esm_w = {{ boundaryConditions.offset }}\n esm_efield = {{ boundaryConditions.electricField }}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_esm_relax.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"BoundaryConditionsFormDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% raw %}{{PARAMETER | default('1')}} {{PARAMETER | default('1')}} {{PARAMETER | default('1')}} 0 0 0{% endraw %}\n","name":"pw_scf_kpt_conv.in","contextProviders":[{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'nscf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{input.RESTART_MODE}}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n{%- if subworkflowContext.NO_SYMMETRY_NO_INVERSION %}\n nosym = .true.\n noinv = .true.\n{%- endif %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_nscf.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'relax'\n nstep = 50\n title = ''\n verbosity = 'low'\n restart_mode = 'from_scratch'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_relax.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'vc-relax'\n title = ''\n verbosity = 'low'\n restart_mode = 'from_scratch'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_vc_relax.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'bands'\n title = ''\n verbosity = 'low'\n restart_mode = '{{input.RESTART_MODE}}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS crystal_b\n{{kpath.length}}\n{% for point in kpath -%}\n{% for d in point.coordinates %}{{d}} {% endfor -%}{{point.steps}}\n{% endfor %}\n","name":"pw_bands.in","contextProviders":[{"name":"KPathFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n ecutfock = 100\n occupations = 'smearing'\n degauss = 0.005\n input_dft='hse',\n nqx1 = {% if kgrid.dimensions[0]%2 == 0 %}{{kgrid.dimensions[0]/2}}{% else %}{{(kgrid.dimensions[0]+1)/2}}{% endif %}, nqx2 = {% if kgrid.dimensions[1]%2 == 0 %}{{kgrid.dimensions[1]/2}}{% else %}{{(kgrid.dimensions[1]+1)/2}}{% endif %}, nqx3 = {% if kgrid.dimensions[2]%2 == 0 %}{{kgrid.dimensions[2]/2}}{% else %}{{(kgrid.dimensions[2]+1)/2}}{% endif %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{% if d%2 == 0 %}{{d}} {% else %}{{d+1}} {% endif %}{% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_scf_hse.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nHUBBARD {ortho-atomic}\n{% for row in hubbard_u -%}\nU {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.hubbardUValue }}\n{% endfor -%}\n","name":"pw_scf_dft_u.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardUContextManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nHUBBARD {ortho-atomic}\n{% for row in hubbard_u -%}\nU {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.hubbardUValue }}\n{% endfor -%}\n{% for row in hubbard_v -%}\nV {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.atomicSpecies2 }}-{{ row.atomicOrbital2 }} {{ row.siteIndex }} {{ row.siteIndex2 }} {{ row.hubbardVValue }}\n{% endfor -%}\n","name":"pw_scf_dft_v.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardUContextManager"},{"name":"HubbardVContextManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nHUBBARD {ortho-atomic}\n{% for row in hubbard_j -%}\n{{ row.paramType }} {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.value }}\n{% endfor -%}\n","name":"pw_scf_dft_j.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardJContextManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n lda_plus_u = .true.\n lda_plus_u_kind = 0\n U_projection_type = 'ortho-atomic'\n {%- for row in hubbard_legacy %}\n Hubbard_U({{ row.atomicSpeciesIndex }}) = {{ row.hubbardUValue }}\n {%- endfor %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_scf_dft_u_legacy.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardContextManagerLegacy"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&INPUT\n fildyn = 'dyn'\n zasr = 'simple'\n flfrc = 'force_constants.fc'\n/\n","name":"q2r.in","contextProviders":[],"applicationName":"espresso","executableName":"q2r.x"},{"content":"&inputhp\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n {%- for d in qgrid.dimensions %}\n nq{{ loop.index }} = {{ d }}\n {%- endfor %}\n/\n","name":"hp.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"hp.x"},{"content":"&inputpp\n calculation = \"eps\"\n prefix = \"__prefix__\"\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n/\n\n&energy_grid\n smeartype = \"gauss\"\n intersmear = 0.2\n intrasmear = 0.0\n wmin = 0.0\n wmax = 30.0\n nw = 500\n shift = 0.0\n/\n\n","name":"epsilon.in","contextProviders":[],"applicationName":"espresso","executableName":"epsilon.x"},{"content":"# ------------------------------------------------------------------------------- #\n# #\n# Example JupyterLab requirements #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. A Python virtual environment is created #\n# - in /scratch/$USER/$JOB_ID (for build: 'Default') #\n# - in /export/share/python/ (for build: 'with-pre-installed-packages') #\n# 3. This list is used to populate a Python virtual environment #\n# 4. JupyterLab is started #\n# #\n# For more information visit: #\n# - https://jupyterlab.readthedocs.io/en/stable/index.html #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# Note: With the JupyterLab build 'with-pre-installed-packages', packages #\n# cannot be added during the notebook runtime. #\n# #\n# ------------------------------------------------------------------------------- #\n\njupyterlab==3.0.3\nnotebook>=6.2.0\nexabyte-api-client>=2020.10.19\nnumpy>=1.17.3\npandas>=1.1.4\nurllib3<2\n","name":"requirements.txt","contextProviders":[],"applicationName":"jupyterLab","executableName":"jupyter"},{"content":" start nwchem\n title \"Test\"\n charge {{ input.CHARGE }}\n geometry units au noautosym\n {{ input.ATOMIC_POSITIONS }}\n end\n basis\n * library {{ input.BASIS }}\n end\n dft\n xc {{ input.FUNCTIONAL }}\n mult {{ input.MULT }}\n end\n task dft energy\n","name":"nwchem_total_energy.inp","contextProviders":[{"name":"NWChemInputDataManager"}],"applicationName":"nwchem","executableName":"nwchem"},{"content":"# ---------------------------------------------------------------- #\n# #\n# Example python script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. runtime directory for this calculation is created #\n# 2. requirements.txt is used to create a virtual environment #\n# 3. virtual environment is activated #\n# 4. python process running this script is started #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\nimport pymatgen as mg\n\nsi = mg.Element(\"Si\")\n\nprint(si.atomic_mass)\n","name":"hello_world.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2\n","name":"requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------------ #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# Please add any packages required for this unit below following #\n# the requirements.txt specification: #\n# https://pip.pypa.io/en/stable/reference/requirements-file-format/ #\n# ------------------------------------------------------------------ #\n","name":"requirements_empty.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# -------------------------------------------------------------------------------\n# This script contains a few helpful commands for basic plotting with matplotlib.\n# The commented out blocks are optional suggestions and included for convenience.\n# -------------------------------------------------------------------------------\nimport matplotlib.pyplot as plt\nimport matplotlib.ticker as ticker\nimport numpy as np\n\n\n# Plot Settings\n# -------------\nfigure_size = (6.4, 4.8) # width, height [inches]\ndpi = 100 # resolution [dots-per-inch]\nfont_size_title = 16 # font size of title\nfont_size_axis = 12 # font size of axis label\nfont_size_tick = 12 # font size of tick label\nfont_size_legend = 14 # font size of legend\nx_axis_label = None # label for x-axis\ny_axis_label = None # label for y-axis\ntitle = None # figure title\nshow_legend = False # whether to show legend\nsave_name = \"plot.pdf\" # output filename (with suffix), e.g. 'plot.pdf'\nx_view_limits = {\"left\": None, \"right\": None} # view limits for x-axis\ny_view_limits = {\"top\": None, \"bottom\": None} # view limits for y-axis\nx_tick_spacing = None # custom tick spacing for x-axis (optional)\ny_tick_spacing = None # custom tick spacing for y-axis (optional)\nx_tick_labels = None # custom tick labels for x-axis (optional)\ny_tick_labels = None # custom tick labels for y-axis (optional)\n\n\n# Figure & axes objects\n# ---------------------\nfig = plt.figure(figsize=figure_size, dpi=dpi)\nax = fig.add_subplot(111)\n\n# Example plot (REPLACE ACCORDINGLY)\n# ------------\nx = np.linspace(0, 7, num=100)\ny = np.sin(x)\nax.plot(x, y, \"g-\", zorder=3)\n\n\n# Help lines\n# ----------\n# ax.axhline(y=0, color=\"0.25\", linewidth=0.6, zorder=1)\n# ax.axvline(x=0, color=\"0.25\", linewidth=0.6, zorder=1)\n\n\n# View limits\n# -----------\nax.set_xlim(**x_view_limits)\nax.set_ylim(**y_view_limits)\n\n\n# Grid lines\n# ----------\n# grid_style = {\n# \"linestyle\" : \"dotted\",\n# \"linewidth\" : 0.6,\n# \"color\" : \"0.25\",\n# }\n# ax.grid(**grid_style)\n\n# Custom tick spacing\n# -------------------\n# ax.xaxis.set_major_locator(ticker.MultipleLocator(x_tick_spacing))\n# ax.yaxis.set_major_locator(ticker.MultipleLocator(y_tick_spacing))\n\n# Custom tick labels\n# ------------------\nif x_tick_labels is not None:\n ax.set_xticklabels(x_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\nif y_tick_labels is not None:\n ax.set_yticklabels(y_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\n\n# Other tick settings\n# -------------------\n# ax.tick_params(axis=\"both\", which=\"major\", labelsize=font_size_tick, direction=\"in\")\n# ax.tick_params(axis=\"x\", which=\"major\", pad=10)\n# ax.tick_params(axis=\"x\", which=\"minor\", bottom=False, top=False)\n\n\n# Axis labels\n# -----------\nif x_axis_label is not None:\n ax.set_xlabel(x_axis_label, size=font_size_axis)\nif y_axis_label is not None:\n ax.set_ylabel(y_axis_label, size=font_size_axis)\n\n# Figure title\n# ------------\nif title is not None:\n ax.set_title(title, fontsize=font_size_title)\n\n# Legend\n# ------\nif show_legend:\n ax.legend(prop={'size': font_size_legend})\n\n# Save figure\n# -----------\nif save_name is not None:\n save_format = save_name.split(\".\")[-1]\n fig.savefig(save_name, format=save_format, bbox_inches=\"tight\")\n","name":"matplotlib_basic.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ---------------------------------------------------------- #\n# #\n# This script extracts q-points and irreducible #\n# representations from Quantum ESPRESSO xml data. #\n# #\n# Expects control_ph.xml and patterns.?.xml files to exist #\n# #\n# ---------------------------------------------------------- #\nfrom __future__ import print_function\nimport json\nfrom xml.dom import minidom\n\n{# JOB_WORK_DIR will be initialized at runtime => avoid substituion below #}\n{%- raw -%}\nCONTROL_PH_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/control_ph.xml\"\nPATTERNS_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/patterns.{}.xml\"\n{%- endraw -%}\n\n# get integer content of an xml tag in a document\ndef get_int_by_tag_name(doc, tag_name):\n element = doc.getElementsByTagName(tag_name)\n return int(element[0].firstChild.nodeValue)\n\nvalues = []\n\n# get number of q-points and cycle through them\nxmldoc = minidom.parse(CONTROL_PH_FILENAME)\nnumber_of_qpoints = get_int_by_tag_name(xmldoc, \"NUMBER_OF_Q_POINTS\")\n\nfor i in range(number_of_qpoints):\n # get number of irreducible representations per qpoint\n xmldoc = minidom.parse(PATTERNS_FILENAME.format(i+1))\n number_of_irr_per_qpoint = get_int_by_tag_name(xmldoc, \"NUMBER_IRR_REP\")\n # add each distinct combination of qpoint and irr as a separate entry\n for j in range(number_of_irr_per_qpoint):\n values.append({\n \"qpoint\": i + 1,\n \"irr\": j + 1\n })\n\n# store final values in standard output (STDOUT)\nprint(json.dumps(values, indent=4))\n","name":"espresso_xml_get_qpt_irr.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"import re\nimport json\n\ndouble_regex = r'[-+]?\\d*\\.\\d+(?:[eE][-+]?\\d+)?'\nregex = r\"\\s+k\\(\\s+\\d*\\)\\s+=\\s+\\(\\s+({0})\\s+({0})\\s+({0})\\),\\s+wk\\s+=\\s+({0}).+?\\n\".format(double_regex)\n\nwith open(\"pw_scf.out\") as f:\n text = f.read()\n\npattern = re.compile(regex, re.I | re.MULTILINE)\nmatch = pattern.findall(text[text.rfind(\" cryst. coord.\"):])\nkpoints = [{\"coordinates\": list(map(float, m[:3])), \"weight\": float(m[3])} for m in match]\nprint(json.dumps({\"name\": \"KPOINTS\", \"value\": kpoints, \"scope\": \"global\"}, indent=4))\n","name":"espresso_extract_kpoints.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------- #\n# This script aims to determine extrema for a given array. #\n# Please adjust the parameters according to your data. #\n# Note: This template expects the array to be defined in the #\n# context as 'array_from_context' (see details below). #\n# ----------------------------------------------------------- #\nimport numpy as np\nfrom scipy.signal import find_peaks\nimport json\nfrom munch import Munch\n\n# Data From Context\n# -----------------\n# The array 'array_from_context' is a 1D list (float or int) that has to be defined in\n# a preceding assignment unit in order to be extracted from the context.\n# Example: [0.0, 1.0, 4.0, 3.0]\n# Upon rendering the following Jinja template the extracted array will be inserted.\n{% raw %}Y = np.array({{array_from_context}}){% endraw %}\n\n# Settings\n# --------\nprominence = 0.3 # required prominence in the unit of the data array\n\n# Find Extrema\n# ------------\nmax_indices, _ = find_peaks(Y, prominence=prominence)\nmin_indices, _ = find_peaks(-1 * Y, prominence=prominence)\n\nresult = {\n \"maxima\": Y[max_indices].tolist(),\n \"minima\": Y[min_indices].tolist(),\n}\n\n# print final values to standard output (STDOUT),\n# so that they can be read by a subsequent assignment unit (using value=STDOUT)\nprint(json.dumps(result, indent=4))\n","name":"find_extrema.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# ----------------------------------------------------------------- #\n\n\nmunch==2.5.0\nnumpy>=1.19.5\nscipy>=1.5.4\nmatplotlib>=3.0.0\n","name":"processing_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# PythonML Package Requirements for use on the Exabyte.io Platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscikit-learn==0.24.1\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2;python_version>=\"3.6\"\n","name":"pyml_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# General settings for PythonML jobs on the Exabyte.io Platform #\n# #\n# This file generally shouldn't be modified directly by users. #\n# The \"datafile\" and \"is_workflow_running_to_predict\" variables #\n# are defined in the head subworkflow, and are templated into #\n# this file. This helps facilitate the workflow's behavior #\n# differing whether it is in a \"train\" or \"predict\" mode. #\n# #\n# Also in this file is the \"Context\" object, which helps maintain #\n# certain Python objects between workflow units, and between #\n# predict runs. #\n# #\n# Whenever a python object needs to be stored for subsequent runs #\n# (such as in the case of a trained model), context.save() can be #\n# called to save it. The object can then be loaded again by using #\n# context.load(). #\n# ----------------------------------------------------------------- #\n\n\nimport pickle, os\n\n# ==================================================\n# Variables modified in the Important Settings menu\n# ==================================================\n# Variables in this section can (and oftentimes need to) be modified by the user in the \"Important Settings\" tab\n# of a workflow.\n\n# Target_column_name is used during training to identify the variable the model is traing to predict.\n# For example, consider a CSV containing three columns, \"Y\", \"X1\", and \"X2\". If the goal is to train a model\n# that will predict the value of \"Y,\" then target_column_name would be set to \"Y\"\ntarget_column_name = \"{{ mlSettings.target_column_name }}\"\n\n# The type of ML problem being performed. Can be either \"regression\", \"classification,\" or \"clustering.\"\nproblem_category = \"{{ mlSettings.problem_category }}\"\n\n# =============================\n# Non user-modifiable variables\n# =============================\n# Variables in this section generally do not need to be modified.\n\n# The problem category, regression or classification or clustering. In regression, the target (predicted) variable\n# is continues. In classification, it is categorical. In clustering, there is no target - a set of labels is\n# automatically generated.\nis_regression = is_classification = is_clustering = False\nif problem_category.lower() == \"regression\":\n is_regression = True\nelif problem_category.lower() == \"classification\":\n is_classification = True\nelif problem_category.lower() == \"clustering\":\n is_clustering = True\nelse:\n raise ValueError(\n \"Variable 'problem_category' must be either 'regression', 'classification', or 'clustering'. Check settings.py\")\n\n# The variables \"is_workflow_running_to_predict\" and \"is_workflow_running_to_train\" are used to control whether\n# the workflow is in a \"training\" mode or a \"prediction\" mode. The \"IS_WORKFLOW_RUNNING_TO_PREDICT\" variable is set by\n# an assignment unit in the \"Set Up the Job\" subworkflow that executes at the start of the job. It is automatically\n# changed when the predict workflow is generated, so users should not need to modify this variable.\nis_workflow_running_to_predict = {% raw %}{{IS_WORKFLOW_RUNNING_TO_PREDICT}}{% endraw %}\nis_workflow_running_to_train = not is_workflow_running_to_predict\n\n# Sets the datafile variable. The \"datafile\" is the data that will be read in, and will be used by subsequent\n# workflow units for either training or prediction, depending on the workflow mode.\nif is_workflow_running_to_predict:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\nelse:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\n\n# The \"Context\" class allows for data to be saved and loaded between units, and between train and predict runs.\n# Variables which have been saved using the \"Save\" method are written to disk, and the predict workflow is automatically\n# configured to obtain these files when it starts.\n#\n# IMPORTANT NOTE: Do *not* adjust the value of \"context_dir_pathname\" in the Context object. If the value is changed, then\n# files will not be correctly copied into the generated predict workflow. This will cause the predict workflow to be\n# generated in a broken state, and it will not be able to make any predictions.\nclass Context(object):\n \"\"\"\n Saves and loads objects from the disk, useful for preserving data between workflow units\n\n Attributes:\n context_paths (dict): Dictionary of the format {variable_name: path}, that governs where\n pickle saves files.\n\n Methods:\n save: Used to save objects to the context directory\n load: Used to load objects from the context directory\n \"\"\"\n\n def __init__(self, context_file_basename=\"workflow_context_file_mapping\"):\n \"\"\"\n Constructor for Context objects\n\n Args:\n context_file_basename (str): Name of the file to store context paths in\n \"\"\"\n\n # Warning: DO NOT modify the context_dir_pathname variable below\n # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n context_dir_pathname = \"{% raw %}{{ CONTEXT_DIR_RELATIVE_PATH }}{% endraw %}\"\n # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n self._context_dir_pathname = context_dir_pathname\n self._context_file = os.path.join(context_dir_pathname, context_file_basename)\n\n # Make context dir if it does not exist\n if not os.path.exists(context_dir_pathname):\n os.makedirs(context_dir_pathname)\n\n # Read in the context sources dictionary, if it exists\n if os.path.exists(self._context_file):\n with open(self._context_file, \"rb\") as file_handle:\n self.context_paths: dict = pickle.load(file_handle)\n else:\n # Items is a dictionary of {varname: path}\n self.context_paths = {}\n\n def __enter__(self):\n return self\n\n def __exit__(self, exc_type, exc_value, traceback):\n self._update_context()\n\n def __contains__(self, item):\n return item in self.context_paths\n\n def _update_context(self):\n with open(self._context_file, \"wb\") as file_handle:\n pickle.dump(self.context_paths, file_handle)\n\n def load(self, name: str):\n \"\"\"\n Returns a contextd object\n\n Args:\n name (str): The name in self.context_paths of the object\n \"\"\"\n path = self.context_paths[name]\n with open(path, \"rb\") as file_handle:\n obj = pickle.load(file_handle)\n return obj\n\n def save(self, obj: object, name: str):\n \"\"\"\n Saves an object to disk using pickle\n\n Args:\n name (str): Friendly name for the object, used for lookup in load() method\n obj (object): Object to store on disk\n \"\"\"\n path = os.path.join(self._context_dir_pathname, f\"{name}.pkl\")\n self.context_paths[name] = path\n with open(path, \"wb\") as file_handle:\n pickle.dump(obj, file_handle)\n self._update_context()\n\n# Generate a context object, so that the \"with settings.context\" can be used by other units in this workflow.\ncontext = Context()\n\nis_using_train_test_split = \"is_using_train_test_split\" in context and (context.load(\"is_using_train_test_split\"))\n\n# Create a Class for a DummyScaler()\nclass DummyScaler:\n \"\"\"\n This class is a 'DummyScaler' which trivially acts on data by returning it unchanged.\n \"\"\"\n\n def fit(self, X):\n return self\n\n def transform(self, X):\n return X\n\n def fit_transform(self, X):\n return X\n\n def inverse_transform(self, X):\n return X\n\nif 'target_scaler' not in context:\n context.save(DummyScaler(), 'target_scaler')\n","name":"pyml_settings.py","contextProviders":[{"name":"MLSettingsDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Custom workflow unit template for the Exabyte.io platform #\n# #\n# This file imports a set of workflow-specific context variables #\n# from settings.py. It then uses a context manager to save and #\n# load Python objects. When saved, these objects can then be #\n# loaded either later in the same workflow, or by subsequent #\n# predict jobs. #\n# #\n# Any pickle-able Python object can be saved using #\n# settings.context. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport settings\n\n# The context manager exists to facilitate\n# saving and loading objects across Python units within a workflow.\n\n# To load an object, simply do to \\`context.load(\"name-of-the-saved-object\")\\`\n# To save an object, simply do \\`context.save(\"name-for-the-object\", object_here)\\`\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Do some transformations to the data here\n\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n descriptors = context.load(\"descriptors\")\n\n # Do some predictions or transformation to the data here\n","name":"pyml_custom.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to read in data for the ML workflow. #\n# #\n# Also showcased here is the concept of branching based on #\n# whether the workflow is in \"train\" or \"predict\" mode. #\n# #\n# If the workflow is in \"training\" mode, it will read in the data #\n# before converting it to a Numpy array and save it for use #\n# later. During training, we already have values for the output, #\n# and this gets saved to \"target.\" #\n# #\n# Finally, whether the workflow is in training or predict mode, #\n# it will always read in a set of descriptors from a datafile #\n# defined in settings.py #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport sklearn.preprocessing\nimport settings\n\nwith settings.context as context:\n data = pandas.read_csv(settings.datafile)\n\n # Train\n # By default, we don't do train/test splitting: the train and test represent the same dataset at first.\n # Other units (such as a train/test splitter) down the line can adjust this as-needed.\n if settings.is_workflow_running_to_train:\n\n # Handle the case where we are clustering\n if settings.is_clustering:\n target = data.to_numpy()[:, 0] # Just get the first column, it's not going to get used anyway\n else:\n target = data.pop(settings.target_column_name).to_numpy()\n\n # Handle the case where we are classifying. In this case, we must convert any labels provided to be categorical.\n # Specifically, labels are encoded with values between 0 and (N_Classes - 1)\n if settings.is_classification:\n label_encoder = sklearn.preprocessing.LabelEncoder()\n target = label_encoder.fit_transform(target)\n context.save(label_encoder, \"label_encoder\")\n\n target = target.reshape(-1, 1) # Reshape array from a row vector into a column vector\n\n context.save(target, \"train_target\")\n context.save(target, \"test_target\")\n\n descriptors = data.to_numpy()\n\n context.save(descriptors, \"train_descriptors\")\n context.save(descriptors, \"test_descriptors\")\n\n else:\n descriptors = data.to_numpy()\n context.save(descriptors, \"descriptors\")\n","name":"data_input_read_csv_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to perform a train/test split #\n# #\n# Splits the dataset into a training and testing set. The #\n# variable `percent_held_as_test` controls how much of the #\n# input dataset is removed for use as a testing set. By default, #\n# this unit puts 20% of the dataset into the testing set, and #\n# places the remaining 80% into the training set. #\n# #\n# Does nothing in the case of predictions. #\n# #\n# ----------------------------------------------------------------- #\n\nimport sklearn.model_selection\nimport numpy as np\nimport settings\n\n# `percent_held_as_test` is the amount of the dataset held out as the testing set. If it is set to 0.2,\n# then 20% of the dataset is held out as a testing set. The remaining 80% is the training set.\npercent_held_as_test = {{ mlTrainTestSplit.fraction_held_as_test_set }}\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Load training data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n\n # Combine datasets to facilitate train/test split\n\n # Do train/test split\n train_descriptors, test_descriptors, train_target, test_target = sklearn.model_selection.train_test_split(\n train_descriptors, train_target, test_size=percent_held_as_test)\n\n # Set the flag for using a train/test split\n context.save(True, \"is_using_train_test_split\")\n\n # Save training data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"data_input_train_test_split_sklearn.py","contextProviders":[{"name":"MLTrainTestSplitDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn MinMax Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it is on interval #\n# [0,1]. It then saves the data for use further down #\n# the road in the workflow, for use in un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the min and max of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor MinMax Scaler\n scaler = sklearn.preprocessing.MinMaxScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_min_max_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Duplicates workflow unit #\n# #\n# This workflow unit drops all duplicate rows, if it is running #\n# in the \"train\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop duplicates from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n df = df.drop_duplicates()\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop duplicates from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.drop_duplicates()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_duplicates_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Missing Workflow Unit #\n# #\n# This workflow unit allows missing rows and/or columns to be #\n# dropped from the dataset by configuring the `to_drop` #\n# parameter. #\n# #\n# Valid values for `to_drop`: #\n# - \"rows\": rows with missing values will be removed #\n# - \"columns\": columns with missing values will be removed #\n# - \"both\": rows and columns with missing values will be removed #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\n# `to_drop` can either be \"rows\" or \"columns\"\n# If it is set to \"rows\" (by default), then all rows with missing values will be dropped.\n# If it is set to \"columns\", then all columns with missing values will be dropped.\n# If it is set to \"both\", then all rows and columns with missing values will be dropped.\nto_drop = \"rows\"\n\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop missing from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n\n directions = {\n \"rows\": (\"index\",),\n \"columns\": (\"columns\",),\n \"both\": (\"index\", \"columns\"),\n }[to_drop]\n for direction in directions:\n df = df.dropna(direction)\n\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop missing from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.dropna()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_missing_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn Standard Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it a mean of 0 and #\n# a standard deviation of 1. It then saves the data for use #\n# further down the road in the workflow, for use in #\n# un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the mean and variance of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor Scaler\n scaler = sklearn.preprocessing.StandardScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_standardization_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge-regression model in Scikit-Learn. #\n# Alpha is taken from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.AdaBoostRegressor(\n n_estimators=50,\n learning_rate=1,\n loss=\"linear\",\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"adaboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"adaboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_adaboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a bagged trees regression model with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.BaggingRegressor(\n n_estimators=10,\n max_samples=1.0,\n max_features=1.0,\n bootstrap=True,\n bootstrap_features=False,\n oob_score=False,\n verbose=0,\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"bagged_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"bagged_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_bagged_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for gradient-boosted tree regression with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. Note: In the gradient- #\n# boosted trees ensemble used, the weak learners used as #\n# estimators cannot be tuned with the same level of fidelity #\n# allowed in the adaptive-boosted trees ensemble. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.GradientBoostingRegressor(\n loss=\"ls\",\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion=\"friedman_mse\",\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n max_features=None,\n alpha=0.9,\n verbose=0,\n max_leaf_nodes=None,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees regression #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBRegressor(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='reg:squarederror',\n eval_metric='rmse')\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_regression\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_regression\")\n\n # Make some predictions and unscale\n predictions = model.predict(descriptors)\n predictions = predictions.reshape(-1, 1)\n target_scaler = context.load(\"target_scaler\")\n\n predictions = target_scaler.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\")\n","name":"model_extreme_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for k-means clustering. #\n# #\n# In k-means clustering, the labels are not provided ahead of #\n# time. Instead, one supplies the number of groups the #\n# algorithm should split the dataset into. Here, we set our #\n# own default of 4 groups (fewer than sklearn's default of 8). #\n# Otherwise, the default parameters of the clustering method #\n# are the same as in sklearn. #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.cluster\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Initialize the Model\n model = sklearn.cluster.KMeans(\n n_clusters=4,\n init=\"k-means++\",\n n_init=10,\n max_iter=300,\n tol=0.0001,\n copy_x=True,\n algorithm=\"auto\",\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors)\n context.save(model, \"k_means\")\n train_labels = model.predict(train_descriptors)\n test_labels = model.predict(test_descriptors)\n\n context.save(train_labels, \"train_labels\")\n context.save(test_labels, \"test_labels\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"k_means\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_k_means_clustering_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a kernelized ridge-regression model with #\n# Scikit-Learn. Model parameters are derived from Scikit- #\n# Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.kernel_ridge\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.kernel_ridge.KernelRidge(\n alpha=1.0,\n kernel=\"linear\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"kernel_ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"kernel_ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_kernel_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a LASSO-regression model with Scikit- #\n# Learn. Model parameters derived from Scikit-Learn's #\n# Defaults. Alpha has been lowered from the default of 1.0, to #\n# 0.1. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Lasso(\n alpha=0.1,\n fit_intercept=True,\n normalize=False,\n precompute=False,\n tol=0.0001,\n positive=True,\n selection=\"cyclic\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"LASSO\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"LASSO\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_lasso_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit to train a simple feedforward neural network #\n# model on a regression problem using scikit-learn. In this #\n# template, we use the default values for hidden_layer_sizes, #\n# activation, solver, and learning rate. Other parameters are #\n# available (consult the sklearn docs), but in this case, we #\n# only include those relevant to the Adam optimizer. Sklearn #\n# Docs: Sklearn docs:http://scikit-learn.org/stable/modules/ge #\n# nerated/sklearn.neural_network.MLPRegressor.html #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.neural_network\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.neural_network.MLPRegressor(\n hidden_layer_sizes=(100,),\n activation=\"relu\",\n solver=\"adam\",\n max_iter=300,\n early_stopping=False,\n validation_fraction=0.1,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"multilayer_perceptron\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"multilayer_perceptron\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_mlp_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a random forest classification model with #\n# Scikit-Learn. Parameters derived from Scikit-Learn's #\n# defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. When #\n# the workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a filee named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestClassifier(\n n_estimators=100,\n criterion=\"gini\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n oob_score=False,\n verbose=0,\n class_weight=None,\n ccp_alpha=0.0,\n max_samples=None,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target, test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for a gradient boosted classification model with #\n# Scikit-Learn. Parameters derived from sklearn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = sklearn.ensemble.GradientBoostingClassifier(loss='deviance',\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion='friedman_mse',\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n min_impurity_split=None,\n init=None,\n random_state=None,\n max_features=None,\n verbose=0,\n max_leaf_nodes=None,\n warm_start=False,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees classification #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBClassifier(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='binary:logistic',\n eval_metric='logloss',\n use_label_encoder=False)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_extreme_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow for a random forest regression model with Scikit- #\n# Learn. Parameters are derived from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestRegressor(\n n_estimators=100,\n criterion=\"mse\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n max_samples=None,\n oob_score=False,\n ccp_alpha=0.0,\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge regression model with Scikit- #\n# Learn. Alpha is taken from Scikit-Learn's default #\n# parameters. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Ridge(\n alpha=1.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Parity plot generation unit #\n# #\n# This unit generates a parity plot based on the known values #\n# in the training data, and the predicted values generated #\n# using the training data. #\n# #\n# Because this metric compares predictions versus a ground truth, #\n# it doesn't make sense to generate the plot when a predict #\n# workflow is being run (because in that case, we generally don't #\n# know the ground truth for the values being predicted). Hence, #\n# this unit does nothing if the workflow is in \"predict\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_predictions = context.load(\"train_predictions\")\n test_target = context.load(\"test_target\")\n test_predictions = context.load(\"test_predictions\")\n\n # Un-transform the data\n target_scaler = context.load(\"target_scaler\")\n train_target = target_scaler.inverse_transform(train_target)\n train_predictions = target_scaler.inverse_transform(train_predictions)\n test_target = target_scaler.inverse_transform(test_target)\n test_predictions = target_scaler.inverse_transform(test_predictions)\n\n # Plot the data\n plt.scatter(train_target, train_predictions, c=\"#203d78\", label=\"Training Set\")\n if settings.is_using_train_test_split:\n plt.scatter(test_target, test_predictions, c=\"#67ac5b\", label=\"Testing Set\")\n plt.xlabel(\"Actual Value\")\n plt.ylabel(\"Predicted Value\")\n\n # Scale the plot\n target_range = (min(min(train_target), min(test_target)),\n max(max(train_target), max(test_target)))\n predictions_range = (min(min(train_predictions), min(test_predictions)),\n max(max(train_predictions), max(test_predictions)))\n\n limits = (min(min(target_range), min(target_range)),\n max(max(predictions_range), max(predictions_range)))\n plt.xlim = (limits[0], limits[1])\n plt.ylim = (limits[0], limits[1])\n\n # Draw a parity line, as a guide to the eye\n plt.plot((limits[0], limits[1]), (limits[0], limits[1]), c=\"black\", linestyle=\"dotted\", label=\"Parity\")\n plt.legend()\n\n # Save the figure\n plt.tight_layout()\n plt.savefig(\"my_parity_plot.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_parity_plot_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Cluster Visualization #\n# #\n# This unit takes an N-dimensional feature space, and uses #\n# Principal-component Analysis (PCA) to project into a 2D space #\n# to facilitate plotting on a scatter plot. #\n# #\n# The 2D space we project into are the first two principal #\n# components identified in PCA, which are the two vectors with #\n# the highest variance. #\n# #\n# Wikipedia Article on PCA: #\n# https://en.wikipedia.org/wiki/Principal_component_analysis #\n# #\n# We then plot the labels assigned to the train an test set, #\n# and color by class. #\n# #\n# ----------------------------------------------------------------- #\n\nimport pandas as pd\nimport matplotlib.cm\nimport matplotlib.lines\nimport matplotlib.pyplot as plt\nimport sklearn.decomposition\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_labels = context.load(\"train_labels\")\n train_descriptors = context.load(\"train_descriptors\")\n test_labels = context.load(\"test_labels\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Unscale the descriptors\n descriptor_scaler = context.load(\"descriptor_scaler\")\n train_descriptors = descriptor_scaler.inverse_transform(train_descriptors)\n test_descriptors = descriptor_scaler.inverse_transform(test_descriptors)\n\n # We need at least 2 dimensions, exit if the dataset is 1D\n if train_descriptors.ndim < 2:\n raise ValueError(\"The train descriptors do not have enough dimensions to be plot in 2D\")\n\n # The data could be multidimensional. Let's do some PCA to get things into 2 dimensions.\n pca = sklearn.decomposition.PCA(n_components=2)\n train_descriptors = pca.fit_transform(train_descriptors)\n test_descriptors = pca.transform(test_descriptors)\n xlabel = \"Principle Component 1\"\n ylabel = \"Principle Component 2\"\n\n # Determine the labels we're going to be using, and generate their colors\n labels = set(train_labels)\n colors = {}\n for count, label in enumerate(labels):\n cm = matplotlib.cm.get_cmap('jet', len(labels))\n color = cm(count / len(labels))\n colors[label] = color\n train_colors = [colors[label] for label in train_labels]\n test_colors = [colors[label] for label in test_labels]\n\n # Train / Test Split Visualization\n plt.title(\"Train Test Split Visualization\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=\"#33548c\", marker=\"o\", label=\"Training Set\")\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=\"#F0B332\", marker=\"o\", label=\"Testing Set\")\n xmin, xmax, ymin, ymax = plt.axis()\n plt.legend()\n plt.tight_layout()\n plt.savefig(\"train_test_split.png\", dpi=600)\n plt.close()\n\n def clusters_legend(cluster_colors):\n \"\"\"\n Helper function that creates a legend, given the coloration by clusters.\n Args:\n cluster_colors: A dictionary of the form {cluster_number : color_value}\n\n Returns:\n None; just creates the legend and puts it on the plot\n \"\"\"\n legend_symbols = []\n for group, color in cluster_colors.items():\n label = f\"Cluster {group}\"\n legend_symbols.append(matplotlib.lines.Line2D([], [], color=color, marker=\"o\",\n linewidth=0, label=label))\n plt.legend(handles=legend_symbols)\n\n # Training Set Clusters\n plt.title(\"Training Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=train_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"train_clusters.png\", dpi=600)\n plt.close()\n\n # Testing Set Clusters\n plt.title(\"Testing Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=test_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"test_clusters.png\", dpi=600)\n plt.close()\n\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_pca_2d_clusters_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# ROC Curve Generator #\n# #\n# Computes and displays the Receiver Operating Characteristic #\n# (ROC) curve. This is restricted to binary classification tasks. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\nimport matplotlib.collections\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n test_target = context.load(\"test_target\").flatten()\n # Slice the first column because Sklearn's ROC curve prefers probabilities for the positive class\n test_probabilities = context.load(\"test_probabilities\")[:, 1]\n\n # Exit if there's more than one label in the predictions\n if len(set(test_target)) > 2:\n exit()\n\n # ROC curve function in sklearn prefers the positive class\n false_positive_rate, true_positive_rate, thresholds = sklearn.metrics.roc_curve(test_target, test_probabilities,\n pos_label=1)\n thresholds[0] -= 1 # Sklearn arbitrarily adds 1 to the first threshold\n roc_auc = np.round(sklearn.metrics.auc(false_positive_rate, true_positive_rate), 3)\n\n # Plot the curve\n fig, ax = plt.subplots()\n points = np.array([false_positive_rate, true_positive_rate]).T.reshape(-1, 1, 2)\n segments = np.concatenate([points[:-1], points[1:]], axis=1)\n norm = plt.Normalize(thresholds.min(), thresholds.max())\n lc = matplotlib.collections.LineCollection(segments, cmap='jet', norm=norm, linewidths=2)\n lc.set_array(thresholds)\n line = ax.add_collection(lc)\n fig.colorbar(line, ax=ax).set_label('Threshold')\n\n # Padding to ensure we see the line\n ax.margins(0.01)\n\n plt.title(f\"ROC curve, AUC={roc_auc}\")\n plt.xlabel(\"False Positive Rate\")\n plt.ylabel(\"True Positive Rate\")\n plt.tight_layout()\n plt.savefig(\"my_roc_curve.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_roc_curve_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"#!/bin/bash\n# ---------------------------------------------------------------- #\n# #\n# Example shell script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. shebang line is read from the first line above #\n# 2. based on shebang one of the shell types is selected: #\n# - /bin/bash #\n# - /bin/csh #\n# - /bin/tclsh #\n# - /bin/tcsh #\n# - /bin/zsh #\n# 3. runtime directory for this calculation is created #\n# 4. the content of the script is executed #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\necho \"Hello world!\"\n","name":"hello_world.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ---------------------------------------------------------------- #\n# #\n# Example job submission script for Exabyte.io platform #\n# #\n# Shows resource manager directives for: #\n# #\n# 1. the name of the job (-N) #\n# 2. the number of nodes to be used (-l nodes=) #\n# 3. the number of processors per node (-l ppn=) #\n# 4. the walltime in dd:hh:mm:ss format (-l walltime=) #\n# 5. queue (-q) D, OR, OF, SR, SF #\n# 6. merging standard output and error (-j oe) #\n# 7. email about job abort, begin, end (-m abe) #\n# 8. email address to use (-M) #\n# #\n# For more information visit https://docs.exabyte.io/cli/jobs #\n# ---------------------------------------------------------------- #\n\n#PBS -N ESPRESSO-TEST\n#PBS -j oe\n#PBS -l nodes=1\n#PBS -l ppn=1\n#PBS -l walltime=00:00:10:00\n#PBS -q D\n#PBS -m abe\n#PBS -M info@exabyte.io\n\n# load module\nmodule add espresso/540-i-174-impi-044\n\n# go to the job working directory\ncd $PBS_O_WORKDIR\n\n# create input file\ncat > pw.in < pw.out\n","name":"job_espresso_pw_scf.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\nmkdir -p {{ JOB_SCRATCH_DIR }}/outdir/_ph0\ncd {{ JOB_SCRATCH_DIR }}/outdir\ncp -r {{ JOB_WORK_DIR }}/../outdir/__prefix__.* .\n{%- endraw -%}\n","name":"espresso_link_outdir_save.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\ncp {{ JOB_SCRATCH_DIR }}/outdir/_ph0/__prefix__.phsave/dynmat* {{ JOB_WORK_DIR }}/../outdir/_ph0/__prefix__.phsave\n{%- endraw -%}\n","name":"espresso_collect_dynmat.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ------------------------------------------------------------------ #\n# This script prepares necessary directories to run VASP NEB\n# calculation. It puts initial POSCAR into directory 00, final into 0N\n# and intermediate images in 01 to 0(N-1). It is assumed that SCF\n# calculations for initial and final structures are already done in\n# previous subworkflows and their standard outputs are written into\n# \"vasp_neb_initial.out\" and \"vasp_neb_final.out\" files respectively.\n# These outputs are here copied into initial (00) and final (0N)\n# directories to calculate the reaction energy profile.\n# ------------------------------------------------------------------ #\n\n{% raw -%}\ncd {{ JOB_WORK_DIR }}\n{%- endraw %}\n\n# Prepare First Directory\nmkdir -p 00\ncat > 00/POSCAR < 0{{ input.INTERMEDIATE_IMAGES.length + 1 }}/POSCAR < 0{{ loop.index }}/POSCAR <=6.2.0\nexabyte-api-client>=2020.10.19\nnumpy>=1.17.3\npandas>=1.1.4\nurllib3<2\n","name":"requirements.txt","contextProviders":[],"applicationName":"jupyterLab","executableName":"jupyter"},{"content":" start nwchem\n title \"Test\"\n charge {{ input.CHARGE }}\n geometry units au noautosym\n {{ input.ATOMIC_POSITIONS }}\n end\n basis\n * library {{ input.BASIS }}\n end\n dft\n xc {{ input.FUNCTIONAL }}\n mult {{ input.MULT }}\n end\n task dft energy\n","name":"nwchem_total_energy.inp","contextProviders":[{"name":"NWChemInputDataManager"}],"applicationName":"nwchem","executableName":"nwchem"},{"content":"# ---------------------------------------------------------------- #\n# #\n# Example python script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. runtime directory for this calculation is created #\n# 2. requirements.txt is used to create a virtual environment #\n# 3. virtual environment is activated #\n# 4. python process running this script is started #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\nimport pymatgen as mg\n\nsi = mg.Element(\"Si\")\n\nprint(si.atomic_mass)\n","name":"hello_world.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2\n","name":"requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------------ #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# Please add any packages required for this unit below following #\n# the requirements.txt specification: #\n# https://pip.pypa.io/en/stable/reference/requirements-file-format/ #\n# ------------------------------------------------------------------ #\n","name":"requirements_empty.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# -------------------------------------------------------------------------------\n# This script contains a few helpful commands for basic plotting with matplotlib.\n# The commented out blocks are optional suggestions and included for convenience.\n# -------------------------------------------------------------------------------\nimport matplotlib.pyplot as plt\nimport matplotlib.ticker as ticker\nimport numpy as np\n\n\n# Plot Settings\n# -------------\nfigure_size = (6.4, 4.8) # width, height [inches]\ndpi = 100 # resolution [dots-per-inch]\nfont_size_title = 16 # font size of title\nfont_size_axis = 12 # font size of axis label\nfont_size_tick = 12 # font size of tick label\nfont_size_legend = 14 # font size of legend\nx_axis_label = None # label for x-axis\ny_axis_label = None # label for y-axis\ntitle = None # figure title\nshow_legend = False # whether to show legend\nsave_name = \"plot.pdf\" # output filename (with suffix), e.g. 'plot.pdf'\nx_view_limits = {\"left\": None, \"right\": None} # view limits for x-axis\ny_view_limits = {\"top\": None, \"bottom\": None} # view limits for y-axis\nx_tick_spacing = None # custom tick spacing for x-axis (optional)\ny_tick_spacing = None # custom tick spacing for y-axis (optional)\nx_tick_labels = None # custom tick labels for x-axis (optional)\ny_tick_labels = None # custom tick labels for y-axis (optional)\n\n\n# Figure & axes objects\n# ---------------------\nfig = plt.figure(figsize=figure_size, dpi=dpi)\nax = fig.add_subplot(111)\n\n# Example plot (REPLACE ACCORDINGLY)\n# ------------\nx = np.linspace(0, 7, num=100)\ny = np.sin(x)\nax.plot(x, y, \"g-\", zorder=3)\n\n\n# Help lines\n# ----------\n# ax.axhline(y=0, color=\"0.25\", linewidth=0.6, zorder=1)\n# ax.axvline(x=0, color=\"0.25\", linewidth=0.6, zorder=1)\n\n\n# View limits\n# -----------\nax.set_xlim(**x_view_limits)\nax.set_ylim(**y_view_limits)\n\n\n# Grid lines\n# ----------\n# grid_style = {\n# \"linestyle\" : \"dotted\",\n# \"linewidth\" : 0.6,\n# \"color\" : \"0.25\",\n# }\n# ax.grid(**grid_style)\n\n# Custom tick spacing\n# -------------------\n# ax.xaxis.set_major_locator(ticker.MultipleLocator(x_tick_spacing))\n# ax.yaxis.set_major_locator(ticker.MultipleLocator(y_tick_spacing))\n\n# Custom tick labels\n# ------------------\nif x_tick_labels is not None:\n ax.set_xticklabels(x_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\nif y_tick_labels is not None:\n ax.set_yticklabels(y_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\n\n# Other tick settings\n# -------------------\n# ax.tick_params(axis=\"both\", which=\"major\", labelsize=font_size_tick, direction=\"in\")\n# ax.tick_params(axis=\"x\", which=\"major\", pad=10)\n# ax.tick_params(axis=\"x\", which=\"minor\", bottom=False, top=False)\n\n\n# Axis labels\n# -----------\nif x_axis_label is not None:\n ax.set_xlabel(x_axis_label, size=font_size_axis)\nif y_axis_label is not None:\n ax.set_ylabel(y_axis_label, size=font_size_axis)\n\n# Figure title\n# ------------\nif title is not None:\n ax.set_title(title, fontsize=font_size_title)\n\n# Legend\n# ------\nif show_legend:\n ax.legend(prop={'size': font_size_legend})\n\n# Save figure\n# -----------\nif save_name is not None:\n save_format = save_name.split(\".\")[-1]\n fig.savefig(save_name, format=save_format, bbox_inches=\"tight\")\n","name":"matplotlib_basic.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ---------------------------------------------------------- #\n# #\n# This script extracts q-points and irreducible #\n# representations from Quantum ESPRESSO xml data. #\n# #\n# Expects control_ph.xml and patterns.?.xml files to exist #\n# #\n# ---------------------------------------------------------- #\nfrom __future__ import print_function\nimport json\nfrom xml.dom import minidom\n\n{# JOB_WORK_DIR will be initialized at runtime => avoid substituion below #}\n{%- raw -%}\nCONTROL_PH_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/control_ph.xml\"\nPATTERNS_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/patterns.{}.xml\"\n{%- endraw -%}\n\n# get integer content of an xml tag in a document\ndef get_int_by_tag_name(doc, tag_name):\n element = doc.getElementsByTagName(tag_name)\n return int(element[0].firstChild.nodeValue)\n\nvalues = []\n\n# get number of q-points and cycle through them\nxmldoc = minidom.parse(CONTROL_PH_FILENAME)\nnumber_of_qpoints = get_int_by_tag_name(xmldoc, \"NUMBER_OF_Q_POINTS\")\n\nfor i in range(number_of_qpoints):\n # get number of irreducible representations per qpoint\n xmldoc = minidom.parse(PATTERNS_FILENAME.format(i+1))\n number_of_irr_per_qpoint = get_int_by_tag_name(xmldoc, \"NUMBER_IRR_REP\")\n # add each distinct combination of qpoint and irr as a separate entry\n for j in range(number_of_irr_per_qpoint):\n values.append({\n \"qpoint\": i + 1,\n \"irr\": j + 1\n })\n\n# store final values in standard output (STDOUT)\nprint(json.dumps(values, indent=4))\n","name":"espresso_xml_get_qpt_irr.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"import re\nimport json\n\ndouble_regex = r'[-+]?\\d*\\.\\d+(?:[eE][-+]?\\d+)?'\nregex = r\"\\s+k\\(\\s+\\d*\\)\\s+=\\s+\\(\\s+({0})\\s+({0})\\s+({0})\\),\\s+wk\\s+=\\s+({0}).+?\\n\".format(double_regex)\n\nwith open(\"pw_scf.out\") as f:\n text = f.read()\n\npattern = re.compile(regex, re.I | re.MULTILINE)\nmatch = pattern.findall(text[text.rfind(\" cryst. coord.\"):])\nkpoints = [{\"coordinates\": list(map(float, m[:3])), \"weight\": float(m[3])} for m in match]\nprint(json.dumps({\"name\": \"KPOINTS\", \"value\": kpoints, \"scope\": \"global\"}, indent=4))\n","name":"espresso_extract_kpoints.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------- #\n# This script aims to determine extrema for a given array. #\n# Please adjust the parameters according to your data. #\n# Note: This template expects the array to be defined in the #\n# context as 'array_from_context' (see details below). #\n# ----------------------------------------------------------- #\nimport numpy as np\nfrom scipy.signal import find_peaks\nimport json\nfrom munch import Munch\n\n# Data From Context\n# -----------------\n# The array 'array_from_context' is a 1D list (float or int) that has to be defined in\n# a preceding assignment unit in order to be extracted from the context.\n# Example: [0.0, 1.0, 4.0, 3.0]\n# Upon rendering the following Jinja template the extracted array will be inserted.\n{% raw %}Y = np.array({{array_from_context}}){% endraw %}\n\n# Settings\n# --------\nprominence = 0.3 # required prominence in the unit of the data array\n\n# Find Extrema\n# ------------\nmax_indices, _ = find_peaks(Y, prominence=prominence)\nmin_indices, _ = find_peaks(-1 * Y, prominence=prominence)\n\nresult = {\n \"maxima\": Y[max_indices].tolist(),\n \"minima\": Y[min_indices].tolist(),\n}\n\n# print final values to standard output (STDOUT),\n# so that they can be read by a subsequent assignment unit (using value=STDOUT)\nprint(json.dumps(result, indent=4))\n","name":"find_extrema.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# ----------------------------------------------------------------- #\n\n\nmunch==2.5.0\nnumpy>=1.19.5\nscipy>=1.5.4\nmatplotlib>=3.0.0\n","name":"processing_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# PythonML Package Requirements for use on the Exabyte.io Platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscikit-learn==0.24.1\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2;python_version>=\"3.6\"\n","name":"pyml_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# General settings for PythonML jobs on the Exabyte.io Platform #\n# #\n# This file generally shouldn't be modified directly by users. #\n# The \"datafile\" and \"is_workflow_running_to_predict\" variables #\n# are defined in the head subworkflow, and are templated into #\n# this file. This helps facilitate the workflow's behavior #\n# differing whether it is in a \"train\" or \"predict\" mode. #\n# #\n# Also in this file is the \"Context\" object, which helps maintain #\n# certain Python objects between workflow units, and between #\n# predict runs. #\n# #\n# Whenever a python object needs to be stored for subsequent runs #\n# (such as in the case of a trained model), context.save() can be #\n# called to save it. The object can then be loaded again by using #\n# context.load(). #\n# ----------------------------------------------------------------- #\n\n\nimport pickle, os\n\n# ==================================================\n# Variables modified in the Important Settings menu\n# ==================================================\n# Variables in this section can (and oftentimes need to) be modified by the user in the \"Important Settings\" tab\n# of a workflow.\n\n# Target_column_name is used during training to identify the variable the model is traing to predict.\n# For example, consider a CSV containing three columns, \"Y\", \"X1\", and \"X2\". If the goal is to train a model\n# that will predict the value of \"Y,\" then target_column_name would be set to \"Y\"\ntarget_column_name = \"{{ mlSettings.target_column_name }}\"\n\n# The type of ML problem being performed. Can be either \"regression\", \"classification,\" or \"clustering.\"\nproblem_category = \"{{ mlSettings.problem_category }}\"\n\n# =============================\n# Non user-modifiable variables\n# =============================\n# Variables in this section generally do not need to be modified.\n\n# The problem category, regression or classification or clustering. In regression, the target (predicted) variable\n# is continues. In classification, it is categorical. In clustering, there is no target - a set of labels is\n# automatically generated.\nis_regression = is_classification = is_clustering = False\nif problem_category.lower() == \"regression\":\n is_regression = True\nelif problem_category.lower() == \"classification\":\n is_classification = True\nelif problem_category.lower() == \"clustering\":\n is_clustering = True\nelse:\n raise ValueError(\n \"Variable 'problem_category' must be either 'regression', 'classification', or 'clustering'. Check settings.py\")\n\n# The variables \"is_workflow_running_to_predict\" and \"is_workflow_running_to_train\" are used to control whether\n# the workflow is in a \"training\" mode or a \"prediction\" mode. The \"IS_WORKFLOW_RUNNING_TO_PREDICT\" variable is set by\n# an assignment unit in the \"Set Up the Job\" subworkflow that executes at the start of the job. It is automatically\n# changed when the predict workflow is generated, so users should not need to modify this variable.\nis_workflow_running_to_predict = {% raw %}{{IS_WORKFLOW_RUNNING_TO_PREDICT}}{% endraw %}\nis_workflow_running_to_train = not is_workflow_running_to_predict\n\n# Sets the datafile variable. The \"datafile\" is the data that will be read in, and will be used by subsequent\n# workflow units for either training or prediction, depending on the workflow mode.\nif is_workflow_running_to_predict:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\nelse:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\n\n# The \"Context\" class allows for data to be saved and loaded between units, and between train and predict runs.\n# Variables which have been saved using the \"Save\" method are written to disk, and the predict workflow is automatically\n# configured to obtain these files when it starts.\n#\n# IMPORTANT NOTE: Do *not* adjust the value of \"context_dir_pathname\" in the Context object. If the value is changed, then\n# files will not be correctly copied into the generated predict workflow. This will cause the predict workflow to be\n# generated in a broken state, and it will not be able to make any predictions.\nclass Context(object):\n \"\"\"\n Saves and loads objects from the disk, useful for preserving data between workflow units\n\n Attributes:\n context_paths (dict): Dictionary of the format {variable_name: path}, that governs where\n pickle saves files.\n\n Methods:\n save: Used to save objects to the context directory\n load: Used to load objects from the context directory\n \"\"\"\n\n def __init__(self, context_file_basename=\"workflow_context_file_mapping\"):\n \"\"\"\n Constructor for Context objects\n\n Args:\n context_file_basename (str): Name of the file to store context paths in\n \"\"\"\n\n # Warning: DO NOT modify the context_dir_pathname variable below\n # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n context_dir_pathname = \"{% raw %}{{ CONTEXT_DIR_RELATIVE_PATH }}{% endraw %}\"\n # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n self._context_dir_pathname = context_dir_pathname\n self._context_file = os.path.join(context_dir_pathname, context_file_basename)\n\n # Make context dir if it does not exist\n if not os.path.exists(context_dir_pathname):\n os.makedirs(context_dir_pathname)\n\n # Read in the context sources dictionary, if it exists\n if os.path.exists(self._context_file):\n with open(self._context_file, \"rb\") as file_handle:\n self.context_paths: dict = pickle.load(file_handle)\n else:\n # Items is a dictionary of {varname: path}\n self.context_paths = {}\n\n def __enter__(self):\n return self\n\n def __exit__(self, exc_type, exc_value, traceback):\n self._update_context()\n\n def __contains__(self, item):\n return item in self.context_paths\n\n def _update_context(self):\n with open(self._context_file, \"wb\") as file_handle:\n pickle.dump(self.context_paths, file_handle)\n\n def load(self, name: str):\n \"\"\"\n Returns a contextd object\n\n Args:\n name (str): The name in self.context_paths of the object\n \"\"\"\n path = self.context_paths[name]\n with open(path, \"rb\") as file_handle:\n obj = pickle.load(file_handle)\n return obj\n\n def save(self, obj: object, name: str):\n \"\"\"\n Saves an object to disk using pickle\n\n Args:\n name (str): Friendly name for the object, used for lookup in load() method\n obj (object): Object to store on disk\n \"\"\"\n path = os.path.join(self._context_dir_pathname, f\"{name}.pkl\")\n self.context_paths[name] = path\n with open(path, \"wb\") as file_handle:\n pickle.dump(obj, file_handle)\n self._update_context()\n\n# Generate a context object, so that the \"with settings.context\" can be used by other units in this workflow.\ncontext = Context()\n\nis_using_train_test_split = \"is_using_train_test_split\" in context and (context.load(\"is_using_train_test_split\"))\n\n# Create a Class for a DummyScaler()\nclass DummyScaler:\n \"\"\"\n This class is a 'DummyScaler' which trivially acts on data by returning it unchanged.\n \"\"\"\n\n def fit(self, X):\n return self\n\n def transform(self, X):\n return X\n\n def fit_transform(self, X):\n return X\n\n def inverse_transform(self, X):\n return X\n\nif 'target_scaler' not in context:\n context.save(DummyScaler(), 'target_scaler')\n","name":"pyml_settings.py","contextProviders":[{"name":"MLSettingsDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Custom workflow unit template for the Exabyte.io platform #\n# #\n# This file imports a set of workflow-specific context variables #\n# from settings.py. It then uses a context manager to save and #\n# load Python objects. When saved, these objects can then be #\n# loaded either later in the same workflow, or by subsequent #\n# predict jobs. #\n# #\n# Any pickle-able Python object can be saved using #\n# settings.context. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport settings\n\n# The context manager exists to facilitate\n# saving and loading objects across Python units within a workflow.\n\n# To load an object, simply do to \\`context.load(\"name-of-the-saved-object\")\\`\n# To save an object, simply do \\`context.save(\"name-for-the-object\", object_here)\\`\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Do some transformations to the data here\n\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n descriptors = context.load(\"descriptors\")\n\n # Do some predictions or transformation to the data here\n","name":"pyml_custom.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to read in data for the ML workflow. #\n# #\n# Also showcased here is the concept of branching based on #\n# whether the workflow is in \"train\" or \"predict\" mode. #\n# #\n# If the workflow is in \"training\" mode, it will read in the data #\n# before converting it to a Numpy array and save it for use #\n# later. During training, we already have values for the output, #\n# and this gets saved to \"target.\" #\n# #\n# Finally, whether the workflow is in training or predict mode, #\n# it will always read in a set of descriptors from a datafile #\n# defined in settings.py #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport sklearn.preprocessing\nimport settings\n\nwith settings.context as context:\n data = pandas.read_csv(settings.datafile)\n\n # Train\n # By default, we don't do train/test splitting: the train and test represent the same dataset at first.\n # Other units (such as a train/test splitter) down the line can adjust this as-needed.\n if settings.is_workflow_running_to_train:\n\n # Handle the case where we are clustering\n if settings.is_clustering:\n target = data.to_numpy()[:, 0] # Just get the first column, it's not going to get used anyway\n else:\n target = data.pop(settings.target_column_name).to_numpy()\n\n # Handle the case where we are classifying. In this case, we must convert any labels provided to be categorical.\n # Specifically, labels are encoded with values between 0 and (N_Classes - 1)\n if settings.is_classification:\n label_encoder = sklearn.preprocessing.LabelEncoder()\n target = label_encoder.fit_transform(target)\n context.save(label_encoder, \"label_encoder\")\n\n target = target.reshape(-1, 1) # Reshape array from a row vector into a column vector\n\n context.save(target, \"train_target\")\n context.save(target, \"test_target\")\n\n descriptors = data.to_numpy()\n\n context.save(descriptors, \"train_descriptors\")\n context.save(descriptors, \"test_descriptors\")\n\n else:\n descriptors = data.to_numpy()\n context.save(descriptors, \"descriptors\")\n","name":"data_input_read_csv_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to perform a train/test split #\n# #\n# Splits the dataset into a training and testing set. The #\n# variable `percent_held_as_test` controls how much of the #\n# input dataset is removed for use as a testing set. By default, #\n# this unit puts 20% of the dataset into the testing set, and #\n# places the remaining 80% into the training set. #\n# #\n# Does nothing in the case of predictions. #\n# #\n# ----------------------------------------------------------------- #\n\nimport sklearn.model_selection\nimport numpy as np\nimport settings\n\n# `percent_held_as_test` is the amount of the dataset held out as the testing set. If it is set to 0.2,\n# then 20% of the dataset is held out as a testing set. The remaining 80% is the training set.\npercent_held_as_test = {{ mlTrainTestSplit.fraction_held_as_test_set }}\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Load training data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n\n # Combine datasets to facilitate train/test split\n\n # Do train/test split\n train_descriptors, test_descriptors, train_target, test_target = sklearn.model_selection.train_test_split(\n train_descriptors, train_target, test_size=percent_held_as_test)\n\n # Set the flag for using a train/test split\n context.save(True, \"is_using_train_test_split\")\n\n # Save training data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"data_input_train_test_split_sklearn.py","contextProviders":[{"name":"MLTrainTestSplitDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn MinMax Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it is on interval #\n# [0,1]. It then saves the data for use further down #\n# the road in the workflow, for use in un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the min and max of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor MinMax Scaler\n scaler = sklearn.preprocessing.MinMaxScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_min_max_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Duplicates workflow unit #\n# #\n# This workflow unit drops all duplicate rows, if it is running #\n# in the \"train\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop duplicates from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n df = df.drop_duplicates()\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop duplicates from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.drop_duplicates()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_duplicates_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Missing Workflow Unit #\n# #\n# This workflow unit allows missing rows and/or columns to be #\n# dropped from the dataset by configuring the `to_drop` #\n# parameter. #\n# #\n# Valid values for `to_drop`: #\n# - \"rows\": rows with missing values will be removed #\n# - \"columns\": columns with missing values will be removed #\n# - \"both\": rows and columns with missing values will be removed #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\n# `to_drop` can either be \"rows\" or \"columns\"\n# If it is set to \"rows\" (by default), then all rows with missing values will be dropped.\n# If it is set to \"columns\", then all columns with missing values will be dropped.\n# If it is set to \"both\", then all rows and columns with missing values will be dropped.\nto_drop = \"rows\"\n\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop missing from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n\n directions = {\n \"rows\": (\"index\",),\n \"columns\": (\"columns\",),\n \"both\": (\"index\", \"columns\"),\n }[to_drop]\n for direction in directions:\n df = df.dropna(direction)\n\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop missing from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.dropna()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_missing_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn Standard Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it a mean of 0 and #\n# a standard deviation of 1. It then saves the data for use #\n# further down the road in the workflow, for use in #\n# un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the mean and variance of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor Scaler\n scaler = sklearn.preprocessing.StandardScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_standardization_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge-regression model in Scikit-Learn. #\n# Alpha is taken from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.AdaBoostRegressor(\n n_estimators=50,\n learning_rate=1,\n loss=\"linear\",\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"adaboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"adaboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_adaboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a bagged trees regression model with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.BaggingRegressor(\n n_estimators=10,\n max_samples=1.0,\n max_features=1.0,\n bootstrap=True,\n bootstrap_features=False,\n oob_score=False,\n verbose=0,\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"bagged_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"bagged_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_bagged_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for gradient-boosted tree regression with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. Note: In the gradient- #\n# boosted trees ensemble used, the weak learners used as #\n# estimators cannot be tuned with the same level of fidelity #\n# allowed in the adaptive-boosted trees ensemble. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.GradientBoostingRegressor(\n loss=\"ls\",\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion=\"friedman_mse\",\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n max_features=None,\n alpha=0.9,\n verbose=0,\n max_leaf_nodes=None,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees regression #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBRegressor(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='reg:squarederror',\n eval_metric='rmse')\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_regression\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_regression\")\n\n # Make some predictions and unscale\n predictions = model.predict(descriptors)\n predictions = predictions.reshape(-1, 1)\n target_scaler = context.load(\"target_scaler\")\n\n predictions = target_scaler.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\")\n","name":"model_extreme_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for k-means clustering. #\n# #\n# In k-means clustering, the labels are not provided ahead of #\n# time. Instead, one supplies the number of groups the #\n# algorithm should split the dataset into. Here, we set our #\n# own default of 4 groups (fewer than sklearn's default of 8). #\n# Otherwise, the default parameters of the clustering method #\n# are the same as in sklearn. #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.cluster\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Initialize the Model\n model = sklearn.cluster.KMeans(\n n_clusters=4,\n init=\"k-means++\",\n n_init=10,\n max_iter=300,\n tol=0.0001,\n copy_x=True,\n algorithm=\"auto\",\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors)\n context.save(model, \"k_means\")\n train_labels = model.predict(train_descriptors)\n test_labels = model.predict(test_descriptors)\n\n context.save(train_labels, \"train_labels\")\n context.save(test_labels, \"test_labels\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"k_means\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_k_means_clustering_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a kernelized ridge-regression model with #\n# Scikit-Learn. Model parameters are derived from Scikit- #\n# Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.kernel_ridge\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.kernel_ridge.KernelRidge(\n alpha=1.0,\n kernel=\"linear\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"kernel_ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"kernel_ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_kernel_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a LASSO-regression model with Scikit- #\n# Learn. Model parameters derived from Scikit-Learn's #\n# Defaults. Alpha has been lowered from the default of 1.0, to #\n# 0.1. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Lasso(\n alpha=0.1,\n fit_intercept=True,\n normalize=False,\n precompute=False,\n tol=0.0001,\n positive=True,\n selection=\"cyclic\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"LASSO\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"LASSO\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_lasso_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit to train a simple feedforward neural network #\n# model on a regression problem using scikit-learn. In this #\n# template, we use the default values for hidden_layer_sizes, #\n# activation, solver, and learning rate. Other parameters are #\n# available (consult the sklearn docs), but in this case, we #\n# only include those relevant to the Adam optimizer. Sklearn #\n# Docs: Sklearn docs:http://scikit-learn.org/stable/modules/ge #\n# nerated/sklearn.neural_network.MLPRegressor.html #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.neural_network\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.neural_network.MLPRegressor(\n hidden_layer_sizes=(100,),\n activation=\"relu\",\n solver=\"adam\",\n max_iter=300,\n early_stopping=False,\n validation_fraction=0.1,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"multilayer_perceptron\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"multilayer_perceptron\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_mlp_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a random forest classification model with #\n# Scikit-Learn. Parameters derived from Scikit-Learn's #\n# defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. When #\n# the workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a filee named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestClassifier(\n n_estimators=100,\n criterion=\"gini\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n oob_score=False,\n verbose=0,\n class_weight=None,\n ccp_alpha=0.0,\n max_samples=None,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target, test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for a gradient boosted classification model with #\n# Scikit-Learn. Parameters derived from sklearn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = sklearn.ensemble.GradientBoostingClassifier(loss='deviance',\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion='friedman_mse',\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n min_impurity_split=None,\n init=None,\n random_state=None,\n max_features=None,\n verbose=0,\n max_leaf_nodes=None,\n warm_start=False,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees classification #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBClassifier(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='binary:logistic',\n eval_metric='logloss',\n use_label_encoder=False)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_extreme_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow for a random forest regression model with Scikit- #\n# Learn. Parameters are derived from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestRegressor(\n n_estimators=100,\n criterion=\"mse\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n max_samples=None,\n oob_score=False,\n ccp_alpha=0.0,\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge regression model with Scikit- #\n# Learn. Alpha is taken from Scikit-Learn's default #\n# parameters. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Ridge(\n alpha=1.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Parity plot generation unit #\n# #\n# This unit generates a parity plot based on the known values #\n# in the training data, and the predicted values generated #\n# using the training data. #\n# #\n# Because this metric compares predictions versus a ground truth, #\n# it doesn't make sense to generate the plot when a predict #\n# workflow is being run (because in that case, we generally don't #\n# know the ground truth for the values being predicted). Hence, #\n# this unit does nothing if the workflow is in \"predict\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_predictions = context.load(\"train_predictions\")\n test_target = context.load(\"test_target\")\n test_predictions = context.load(\"test_predictions\")\n\n # Un-transform the data\n target_scaler = context.load(\"target_scaler\")\n train_target = target_scaler.inverse_transform(train_target)\n train_predictions = target_scaler.inverse_transform(train_predictions)\n test_target = target_scaler.inverse_transform(test_target)\n test_predictions = target_scaler.inverse_transform(test_predictions)\n\n # Plot the data\n plt.scatter(train_target, train_predictions, c=\"#203d78\", label=\"Training Set\")\n if settings.is_using_train_test_split:\n plt.scatter(test_target, test_predictions, c=\"#67ac5b\", label=\"Testing Set\")\n plt.xlabel(\"Actual Value\")\n plt.ylabel(\"Predicted Value\")\n\n # Scale the plot\n target_range = (min(min(train_target), min(test_target)),\n max(max(train_target), max(test_target)))\n predictions_range = (min(min(train_predictions), min(test_predictions)),\n max(max(train_predictions), max(test_predictions)))\n\n limits = (min(min(target_range), min(target_range)),\n max(max(predictions_range), max(predictions_range)))\n plt.xlim = (limits[0], limits[1])\n plt.ylim = (limits[0], limits[1])\n\n # Draw a parity line, as a guide to the eye\n plt.plot((limits[0], limits[1]), (limits[0], limits[1]), c=\"black\", linestyle=\"dotted\", label=\"Parity\")\n plt.legend()\n\n # Save the figure\n plt.tight_layout()\n plt.savefig(\"my_parity_plot.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_parity_plot_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Cluster Visualization #\n# #\n# This unit takes an N-dimensional feature space, and uses #\n# Principal-component Analysis (PCA) to project into a 2D space #\n# to facilitate plotting on a scatter plot. #\n# #\n# The 2D space we project into are the first two principal #\n# components identified in PCA, which are the two vectors with #\n# the highest variance. #\n# #\n# Wikipedia Article on PCA: #\n# https://en.wikipedia.org/wiki/Principal_component_analysis #\n# #\n# We then plot the labels assigned to the train an test set, #\n# and color by class. #\n# #\n# ----------------------------------------------------------------- #\n\nimport pandas as pd\nimport matplotlib.cm\nimport matplotlib.lines\nimport matplotlib.pyplot as plt\nimport sklearn.decomposition\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_labels = context.load(\"train_labels\")\n train_descriptors = context.load(\"train_descriptors\")\n test_labels = context.load(\"test_labels\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Unscale the descriptors\n descriptor_scaler = context.load(\"descriptor_scaler\")\n train_descriptors = descriptor_scaler.inverse_transform(train_descriptors)\n test_descriptors = descriptor_scaler.inverse_transform(test_descriptors)\n\n # We need at least 2 dimensions, exit if the dataset is 1D\n if train_descriptors.ndim < 2:\n raise ValueError(\"The train descriptors do not have enough dimensions to be plot in 2D\")\n\n # The data could be multidimensional. Let's do some PCA to get things into 2 dimensions.\n pca = sklearn.decomposition.PCA(n_components=2)\n train_descriptors = pca.fit_transform(train_descriptors)\n test_descriptors = pca.transform(test_descriptors)\n xlabel = \"Principle Component 1\"\n ylabel = \"Principle Component 2\"\n\n # Determine the labels we're going to be using, and generate their colors\n labels = set(train_labels)\n colors = {}\n for count, label in enumerate(labels):\n cm = matplotlib.cm.get_cmap('jet', len(labels))\n color = cm(count / len(labels))\n colors[label] = color\n train_colors = [colors[label] for label in train_labels]\n test_colors = [colors[label] for label in test_labels]\n\n # Train / Test Split Visualization\n plt.title(\"Train Test Split Visualization\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=\"#33548c\", marker=\"o\", label=\"Training Set\")\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=\"#F0B332\", marker=\"o\", label=\"Testing Set\")\n xmin, xmax, ymin, ymax = plt.axis()\n plt.legend()\n plt.tight_layout()\n plt.savefig(\"train_test_split.png\", dpi=600)\n plt.close()\n\n def clusters_legend(cluster_colors):\n \"\"\"\n Helper function that creates a legend, given the coloration by clusters.\n Args:\n cluster_colors: A dictionary of the form {cluster_number : color_value}\n\n Returns:\n None; just creates the legend and puts it on the plot\n \"\"\"\n legend_symbols = []\n for group, color in cluster_colors.items():\n label = f\"Cluster {group}\"\n legend_symbols.append(matplotlib.lines.Line2D([], [], color=color, marker=\"o\",\n linewidth=0, label=label))\n plt.legend(handles=legend_symbols)\n\n # Training Set Clusters\n plt.title(\"Training Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=train_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"train_clusters.png\", dpi=600)\n plt.close()\n\n # Testing Set Clusters\n plt.title(\"Testing Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=test_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"test_clusters.png\", dpi=600)\n plt.close()\n\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_pca_2d_clusters_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# ROC Curve Generator #\n# #\n# Computes and displays the Receiver Operating Characteristic #\n# (ROC) curve. This is restricted to binary classification tasks. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\nimport matplotlib.collections\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n test_target = context.load(\"test_target\").flatten()\n # Slice the first column because Sklearn's ROC curve prefers probabilities for the positive class\n test_probabilities = context.load(\"test_probabilities\")[:, 1]\n\n # Exit if there's more than one label in the predictions\n if len(set(test_target)) > 2:\n exit()\n\n # ROC curve function in sklearn prefers the positive class\n false_positive_rate, true_positive_rate, thresholds = sklearn.metrics.roc_curve(test_target, test_probabilities,\n pos_label=1)\n thresholds[0] -= 1 # Sklearn arbitrarily adds 1 to the first threshold\n roc_auc = np.round(sklearn.metrics.auc(false_positive_rate, true_positive_rate), 3)\n\n # Plot the curve\n fig, ax = plt.subplots()\n points = np.array([false_positive_rate, true_positive_rate]).T.reshape(-1, 1, 2)\n segments = np.concatenate([points[:-1], points[1:]], axis=1)\n norm = plt.Normalize(thresholds.min(), thresholds.max())\n lc = matplotlib.collections.LineCollection(segments, cmap='jet', norm=norm, linewidths=2)\n lc.set_array(thresholds)\n line = ax.add_collection(lc)\n fig.colorbar(line, ax=ax).set_label('Threshold')\n\n # Padding to ensure we see the line\n ax.margins(0.01)\n\n plt.title(f\"ROC curve, AUC={roc_auc}\")\n plt.xlabel(\"False Positive Rate\")\n plt.ylabel(\"True Positive Rate\")\n plt.tight_layout()\n plt.savefig(\"my_roc_curve.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_roc_curve_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"#!/bin/bash\n# ---------------------------------------------------------------- #\n# #\n# Example shell script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. shebang line is read from the first line above #\n# 2. based on shebang one of the shell types is selected: #\n# - /bin/bash #\n# - /bin/csh #\n# - /bin/tclsh #\n# - /bin/tcsh #\n# - /bin/zsh #\n# 3. runtime directory for this calculation is created #\n# 4. the content of the script is executed #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\necho \"Hello world!\"\n","name":"hello_world.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ---------------------------------------------------------------- #\n# #\n# Example job submission script for Exabyte.io platform #\n# #\n# Shows resource manager directives for: #\n# #\n# 1. the name of the job (-N) #\n# 2. the number of nodes to be used (-l nodes=) #\n# 3. the number of processors per node (-l ppn=) #\n# 4. the walltime in dd:hh:mm:ss format (-l walltime=) #\n# 5. queue (-q) D, OR, OF, SR, SF #\n# 6. merging standard output and error (-j oe) #\n# 7. email about job abort, begin, end (-m abe) #\n# 8. email address to use (-M) #\n# #\n# For more information visit https://docs.exabyte.io/cli/jobs #\n# ---------------------------------------------------------------- #\n\n#PBS -N ESPRESSO-TEST\n#PBS -j oe\n#PBS -l nodes=1\n#PBS -l ppn=1\n#PBS -l walltime=00:00:10:00\n#PBS -q D\n#PBS -m abe\n#PBS -M info@exabyte.io\n\n# load module\nmodule add espresso/540-i-174-impi-044\n\n# go to the job working directory\ncd $PBS_O_WORKDIR\n\n# create input file\ncat > pw.in < pw.out\n","name":"job_espresso_pw_scf.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\nmkdir -p {{ JOB_SCRATCH_DIR }}/outdir/_ph0\ncd {{ JOB_SCRATCH_DIR }}/outdir\ncp -r {{ JOB_WORK_DIR }}/../outdir/__prefix__.* .\n{%- endraw -%}\n","name":"espresso_link_outdir_save.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\ncp {{ JOB_SCRATCH_DIR }}/outdir/_ph0/__prefix__.phsave/dynmat* {{ JOB_WORK_DIR }}/../outdir/_ph0/__prefix__.phsave\n{%- endraw -%}\n","name":"espresso_collect_dynmat.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ------------------------------------------------------------------ #\n# This script prepares necessary directories to run VASP NEB\n# calculation. It puts initial POSCAR into directory 00, final into 0N\n# and intermediate images in 01 to 0(N-1). It is assumed that SCF\n# calculations for initial and final structures are already done in\n# previous subworkflows and their standard outputs are written into\n# \"vasp_neb_initial.out\" and \"vasp_neb_final.out\" files respectively.\n# These outputs are here copied into initial (00) and final (0N)\n# directories to calculate the reaction energy profile.\n# ------------------------------------------------------------------ #\n\n{% raw -%}\ncd {{ JOB_WORK_DIR }}\n{%- endraw %}\n\n# Prepare First Directory\nmkdir -p 00\ncat > 00/POSCAR < 0{{ input.INTERMEDIATE_IMAGES.length + 1 }}/POSCAR < 0{{ loop.index }}/POSCAR < Date: Thu, 18 Jan 2024 19:48:09 +0800 Subject: [PATCH 13/20] SOF-7194: add way to convert qe structure to lmp structure + rename executables dp_prepare -> python, dp_lmp -> lmp etc --- assets/deepmd/qe_to_lmp_structure.j2.py | 10 + assets/deepmd/system.lmp | 202 ------------------ executables/deepmd/{dp_train.yml => dp.yml} | 2 +- executables/deepmd/{dp_lmp.yml => lmp.yml} | 1 - .../deepmd/{dp_prepare.yml => python.yml} | 12 +- executables/tree.yml | 6 +- src/js/data/templates.js | 2 +- src/js/data/tree.js | 2 +- .../deepmd/dp_create_lmp_structure.py.yml | 5 + templates/deepmd/dp_lmp.yml | 8 +- templates/deepmd/dp_prepare.py.yml | 2 +- templates/deepmd/dp_train.yml | 2 +- templates/templates.yml | 1 + 13 files changed, 36 insertions(+), 219 deletions(-) create mode 100644 assets/deepmd/qe_to_lmp_structure.j2.py delete mode 100644 assets/deepmd/system.lmp rename executables/deepmd/{dp_train.yml => dp.yml} (71%) rename executables/deepmd/{dp_lmp.yml => lmp.yml} (91%) rename executables/deepmd/{dp_prepare.yml => python.yml} (50%) create mode 100644 templates/deepmd/dp_create_lmp_structure.py.yml diff --git a/assets/deepmd/qe_to_lmp_structure.j2.py b/assets/deepmd/qe_to_lmp_structure.j2.py new file mode 100644 index 00000000..98841825 --- /dev/null +++ b/assets/deepmd/qe_to_lmp_structure.j2.py @@ -0,0 +1,10 @@ +import dpdata + +# load cp data files +# https://docs.deepmodeling.com/projects/dpdata/en/master/formats/QECPTrajFormat.html +system = dpdata.LabeledSystem("cp", fmt="qe/cp/traj") + +# convert dpdata to lammps format +# below procedure will convert input QE structure to lammps format, user may +# want to generate supercell or other complex structure for lammps calculation +system.to_lmp("system.lmp") diff --git a/assets/deepmd/system.lmp b/assets/deepmd/system.lmp deleted file mode 100644 index 8bd1def3..00000000 --- a/assets/deepmd/system.lmp +++ /dev/null @@ -1,202 +0,0 @@ -# LAMMPS data -192 atoms -2 atom types -0.0 12.44470 xlo xhi -0.0 12.44470 ylo yhi -0.0 12.44470 zlo zhi -0.0 0.0 0.0 xy xz yz - -Atoms # metal - -1 1 11.83 2.56 2.18 -2 1 5.36 6 1.81 -3 1 12.17 7.34 2.71 -4 1 3.38 4.28 4.84 -5 1 4.54 11.73 11.9 -6 1 7.34 0.18 2.18 -7 1 5.73 9.9 10.31 -8 1 5.32 11.19 1.83 -9 1 9.39 11.14 1.1 -10 1 12.06 0.64 8.92 -11 1 5.01 7.72 11.74 -12 1 3.64 3.19 1.08 -13 1 11.88 10.9 4.7 -14 1 3.88 8.05 2.51 -15 1 9.82 9.16 10.18 -16 1 11 5.31 5.17 -17 1 0.97 4.12 0.84 -18 1 1.24 5.99 4.76 -19 1 10.1 9.09 3.32 -20 1 8.39 5.72 10.39 -21 1 8.06 4.72 1.3 -22 1 3.23 3.34 8.26 -23 1 5.02 6.21 7.29 -24 1 3.94 10.55 3.95 -25 1 6.25 4.56 4.21 -26 1 12.18 2.35 11.11 -27 1 6.76 8.57 4.07 -28 1 5.87 5.84 9.79 -29 1 2.18 6.91 6.99 -30 1 10.89 7.69 6.78 -31 1 8.03 4.28 6.57 -32 1 5.29 1.92 11.56 -33 1 1.53 12.16 3.45 -34 1 2.04 5.49 9.71 -35 1 6.66 9.05 1.21 -36 1 11.75 6.18 11.01 -37 1 6.11 3.07 7.96 -38 1 8.22 1.38 9.43 -39 1 1.31 2.02 7.09 -40 1 9.48 2.11 5.92 -41 1 9.33 3.34 10.97 -42 1 1.31 8.49 0.81 -43 1 11.25 3.48 7.51 -44 1 11.7 0.33 0.6 -45 1 7.63 2.04 0.28 -46 1 8.86 9.52 7.4 -47 1 2.01 11.54 6.89 -48 1 8.02 11.27 4.66 -49 1 5.82 9.18 7.71 -50 1 8.11 11.19 11.22 -51 1 5.94 0.84 5.91 -52 1 1.88 11.66 0.64 -53 1 4.66 11.28 6.73 -54 1 10.32 6.26 0.89 -55 1 2.01 11.07 10.33 -56 1 5.36 2.06 3.14 -57 1 8.56 7.59 12.17 -58 1 10.51 5.81 8.66 -59 1 2.37 6.23 12.1 -60 1 0.3 8.77 10.05 -61 1 9.47 12.13 7.57 -62 1 1.41 2.32 4.17 -63 1 9.69 3.69 3.56 -64 1 0.75 9.22 7.39 -65 2 11.09 2.87 2.74 -66 2 5.51 5.51 2.6 -67 2 0.24 6.8 3.29 -68 2 4.28 4.19 4.46 -69 2 3.63 11.56 11.76 -70 2 6.53 12.07 1.99 -71 2 5.37 9.14 10.85 -72 2 5.74 10.31 1.64 -73 2 8.71 11.07 0.41 -74 2 0.27 1.04 8.27 -75 2 5.46 7.08 11.15 -76 2 3.84 4.17 1.06 -77 2 11.55 10.19 4.15 -78 2 3 8.17 2.14 -79 2 9.53 10.04 10.56 -80 2 11.18 4.84 6.05 -81 2 0.68 3.76 12.44 -82 2 0.43 5.51 4.88 -83 2 9.51 9.48 3.94 -84 2 9.08 5.89 9.68 -85 2 7.92 3.83 0.8 -86 2 4.23 3.22 8.21 -87 2 5.27 5.9 8.19 -88 2 4.44 10.9 3.14 -89 2 6.93 4.56 4.89 -90 2 11.88 1.61 11.75 -91 2 6.79 8.54 3.09 -92 2 5.62 4.99 10.13 -93 2 3.09 6.66 6.98 -94 2 11.73 8.22 6.86 -95 2 8.4 3.45 6.18 -96 2 4.64 2.37 12.16 -97 2 0.74 11.69 3.86 -98 2 1.1 5.3 9.72 -99 2 7.54 9.11 0.83 -100 2 11.36 6.2 11.9 -101 2 5.85 2.21 7.53 -102 2 8.81 2.01 9.89 -103 2 2.17 2.43 7.46 -104 2 10.1 2.53 6.55 -105 2 9.07 4.29 10.91 -106 2 0.64 8.13 1.45 -107 2 11.12 4.2 8.14 -108 2 11.03 12.03 0.58 -109 2 6.83 1.84 12.19 -110 2 9.13 10.45 7.25 -111 2 1.76 12.44 6.8 -112 2 7.37 11.88 5.08 -113 2 5.59 8.29 7.44 -114 2 8.14 12.12 10.86 -115 2 5.48 0.06 6.32 -116 2 0.99 11.99 0.41 -117 2 4.54 10.95 5.79 -118 2 10.94 6.29 1.65 -119 2 2.52 10.55 9.76 -120 2 4.74 2.41 2.42 -121 2 8.84 8.27 11.56 -122 2 10.89 6.06 9.58 -123 2 2.15 5.99 11.18 -124 2 0.49 9.06 9.13 -125 2 8.69 12.34 8.15 -126 2 1.76 1.44 4.07 -127 2 10.05 4.44 4.1 -128 2 1.44 8.49 7.32 -129 2 12.25 3.32 1.68 -130 2 6.27 6.22 1.56 -131 2 11.5 7.85 3.14 -132 2 2.96 3.44 4.61 -133 2 5.04 11.17 11.3 -134 2 7.6 12.39 3.03 -135 2 5.94 9.68 9.37 -136 2 4.93 11.46 0.96 -137 2 8.92 11.68 1.69 -138 2 12.07 1.36 9.54 -139 2 4.17 7.28 11.8 -140 2 2.67 3.09 1.05 -141 2 11.89 10.59 5.56 -142 2 4.27 7.23 2.16 -143 2 10.8 9.22 10.18 -144 2 10.68 6.2 5.44 -145 2 1.51 4.9 0.53 -146 2 1.94 5.36 4.61 -147 2 10.02 9.64 2.47 -148 2 8.41 6.43 11.04 -149 2 8.58 4.29 2.01 -150 2 3.12 4.28 8.41 -151 2 5.12 5.54 6.6 -152 2 3.97 9.58 3.89 -153 2 6.17 3.63 3.9 -154 2 11.4 2.92 10.99 -155 2 5.96 8.02 4.26 -156 2 6.81 5.9 9.83 -157 2 1.83 6.59 6.16 -158 2 10.19 8.36 6.88 -159 2 8.74 4.67 7.12 -160 2 4.84 1.08 11.38 -161 2 2.38 11.71 3.7 -162 2 2.02 6.1 8.92 -163 2 6.05 8.48 0.61 -164 2 12.12 7.05 10.81 -165 2 6.81 3.46 7.31 -166 2 7.52 1.99 9.1 -167 2 1.25 2.26 6.18 -168 2 9.31 1.19 6.35 -169 2 8.84 2.9 11.69 -170 2 1.39 9.44 0.85 -171 2 12.13 3.22 7.5 -172 2 11.38 0.95 1.34 -173 2 7.43 1.82 1.24 -174 2 9.15 9.43 8.28 -175 2 2.97 11.66 6.88 -176 2 7.5 10.43 4.47 -177 2 6.69 9.4 7.47 -178 2 7.29 10.77 10.93 -179 2 5.45 1.07 5.12 -180 2 1.92 11.57 1.62 -181 2 5.05 10.55 7.26 -182 2 9.73 5.52 1.11 -183 2 1.73 11.85 9.82 -184 2 6.02 1.57 2.67 -185 2 9.34 7.2 0.21 -186 2 10.71 6.56 7.97 -187 2 1.86 7.03 12.37 -188 2 1.08 9.16 10.59 -189 2 10.19 12.39 8.15 -190 2 0.92 2.56 3.28 -191 2 9.34 3.01 4.17 -192 2 1.11 10.09 7.21 diff --git a/executables/deepmd/dp_train.yml b/executables/deepmd/dp.yml similarity index 71% rename from executables/deepmd/dp_train.yml rename to executables/deepmd/dp.yml index d2579866..ae20caa8 100644 --- a/executables/deepmd/dp_train.yml +++ b/executables/deepmd/dp.yml @@ -10,4 +10,4 @@ flavors: monitors: - standard_output applicationName: deepmd - executableName: dp_train # dp train dp_train_se_e2_r.json && dp freeze -o graph.pb + executableName: dp # dp train dp_train_se_e2_r.json && dp freeze -o graph.pb diff --git a/executables/deepmd/dp_lmp.yml b/executables/deepmd/lmp.yml similarity index 91% rename from executables/deepmd/dp_lmp.yml rename to executables/deepmd/lmp.yml index 3c7e5909..260c3d20 100644 --- a/executables/deepmd/dp_lmp.yml +++ b/executables/deepmd/lmp.yml @@ -6,7 +6,6 @@ flavors: lammps: input: - name: in.lammps - - name: system.lmp results: [] monitors: - standard_output diff --git a/executables/deepmd/dp_prepare.yml b/executables/deepmd/python.yml similarity index 50% rename from executables/deepmd/dp_prepare.yml rename to executables/deepmd/python.yml index 0fbf91ad..e1b7670b 100644 --- a/executables/deepmd/dp_prepare.yml +++ b/executables/deepmd/python.yml @@ -11,4 +11,14 @@ flavors: monitors: - standard_output applicationName: deepmd - executableName: dp_prepare # python3 qe_cp_to_deepmd.py + executableName: python + + qe_to_lmp_structure: + isDefault: true + input: + - name: qe_to_lmp_structure.py + results: [] + monitors: + - standard_output + applicationName: deepmd + executableName: python diff --git a/executables/tree.yml b/executables/tree.yml index c8b939cc..a587f5ac 100644 --- a/executables/tree.yml +++ b/executables/tree.yml @@ -1,7 +1,7 @@ deepmd: - dp_lmp: !include 'executables/deepmd/dp_lmp.yml' - dp_prepare: !include 'executables/deepmd/dp_prepare.yml' - dp_train: !include 'executables/deepmd/dp_train.yml' + dp: !include 'executables/deepmd/dp.yml' + lmp: !include 'executables/deepmd/lmp.yml' + python: !include 'executables/deepmd/python.yml' espresso: average.x: !include 'executables/espresso/average.x.yml' bands.x: !include 'executables/espresso/bands.x.yml' diff --git a/src/js/data/templates.js b/src/js/data/templates.js index 8c7e595d..92352a97 100644 --- a/src/js/data/templates.js +++ b/src/js/data/templates.js @@ -1,2 +1,2 @@ /* eslint-disable */ -module.exports = {allTemplates: [{"content":"# LAMMPS input, deepmd-kit version. Built from bulk water calculation.\n\nunits metal\nboundary p p p\natom_style atomic\n\nneighbor 2.0 bin\nneigh_modify every 10 delay 0 check no\n\nread_data system.lmp\nmass 1 16\nmass 2 2\n\npair_style\t deepmd ./graph.pb\npair_coeff * *\n\nvelocity all create 330.0 23456789\n\nfix 1 all nvt temp 330.0 330.0 0.5\ntimestep 0.0005\nthermo_style custom step pe ke etotal temp press vol\nthermo 100\ndump 1 all custom 100 system.dump id type x y z\n\nrun 100\n","name":"in.lammps","contextProviders":[],"applicationName":"deepmd","executableName":"dp_lmp"},{"content":"# LAMMPS data\n192 atoms\n2 atom types\n0.0 12.44470 xlo xhi\n0.0 12.44470 ylo yhi\n0.0 12.44470 zlo zhi\n0.0 0.0 0.0 xy xz yz\n\nAtoms # metal\n\n1\t1\t11.83 2.56 2.18\n2\t1\t5.36 6 1.81\n3\t1\t12.17 7.34 2.71\n4\t1\t3.38 4.28 4.84\n5\t1\t4.54 11.73 11.9\n6\t1\t7.34 0.18 2.18\n7\t1\t5.73 9.9 10.31\n8\t1\t5.32 11.19 1.83\n9\t1\t9.39 11.14 1.1\n10\t1\t12.06 0.64 8.92\n11\t1\t5.01 7.72 11.74\n12\t1\t3.64 3.19 1.08\n13\t1\t11.88 10.9 4.7\n14\t1\t3.88 8.05 2.51\n15\t1\t9.82 9.16 10.18\n16\t1\t11 5.31 5.17\n17\t1\t0.97 4.12 0.84\n18\t1\t1.24 5.99 4.76\n19\t1\t10.1 9.09 3.32\n20\t1\t8.39 5.72 10.39\n21\t1\t8.06 4.72 1.3\n22\t1\t3.23 3.34 8.26\n23\t1\t5.02 6.21 7.29\n24\t1\t3.94 10.55 3.95\n25\t1\t6.25 4.56 4.21\n26\t1\t12.18 2.35 11.11\n27\t1\t6.76 8.57 4.07\n28\t1\t5.87 5.84 9.79\n29\t1\t2.18 6.91 6.99\n30\t1\t10.89 7.69 6.78\n31\t1\t8.03 4.28 6.57\n32\t1\t5.29 1.92 11.56\n33\t1\t1.53 12.16 3.45\n34\t1\t2.04 5.49 9.71\n35\t1\t6.66 9.05 1.21\n36\t1\t11.75 6.18 11.01\n37\t1\t6.11 3.07 7.96\n38\t1\t8.22 1.38 9.43\n39\t1\t1.31 2.02 7.09\n40\t1\t9.48 2.11 5.92\n41\t1\t9.33 3.34 10.97\n42\t1\t1.31 8.49 0.81\n43\t1\t11.25 3.48 7.51\n44\t1\t11.7 0.33 0.6\n45\t1\t7.63 2.04 0.28\n46\t1\t8.86 9.52 7.4\n47\t1\t2.01 11.54 6.89\n48\t1\t8.02 11.27 4.66\n49\t1\t5.82 9.18 7.71\n50\t1\t8.11 11.19 11.22\n51\t1\t5.94 0.84 5.91\n52\t1\t1.88 11.66 0.64\n53\t1\t4.66 11.28 6.73\n54\t1\t10.32 6.26 0.89\n55\t1\t2.01 11.07 10.33\n56\t1\t5.36 2.06 3.14\n57\t1\t8.56 7.59 12.17\n58\t1\t10.51 5.81 8.66\n59\t1\t2.37 6.23 12.1\n60\t1\t0.3 8.77 10.05\n61\t1\t9.47 12.13 7.57\n62\t1\t1.41 2.32 4.17\n63\t1\t9.69 3.69 3.56\n64\t1\t0.75 9.22 7.39\n65\t2\t11.09 2.87 2.74\n66\t2\t5.51 5.51 2.6\n67\t2\t0.24 6.8 3.29\n68\t2\t4.28 4.19 4.46\n69\t2\t3.63 11.56 11.76\n70\t2\t6.53 12.07 1.99\n71\t2\t5.37 9.14 10.85\n72\t2\t5.74 10.31 1.64\n73\t2\t8.71 11.07 0.41\n74\t2\t0.27 1.04 8.27\n75\t2\t5.46 7.08 11.15\n76\t2\t3.84 4.17 1.06\n77\t2\t11.55 10.19 4.15\n78\t2\t3 8.17 2.14\n79\t2\t9.53 10.04 10.56\n80\t2\t11.18 4.84 6.05\n81\t2\t0.68 3.76 12.44\n82\t2\t0.43 5.51 4.88\n83\t2\t9.51 9.48 3.94\n84\t2\t9.08 5.89 9.68\n85\t2\t7.92 3.83 0.8\n86\t2\t4.23 3.22 8.21\n87\t2\t5.27 5.9 8.19\n88\t2\t4.44 10.9 3.14\n89\t2\t6.93 4.56 4.89\n90\t2\t11.88 1.61 11.75\n91\t2\t6.79 8.54 3.09\n92\t2\t5.62 4.99 10.13\n93\t2\t3.09 6.66 6.98\n94\t2\t11.73 8.22 6.86\n95\t2\t8.4 3.45 6.18\n96\t2\t4.64 2.37 12.16\n97\t2\t0.74 11.69 3.86\n98\t2\t1.1 5.3 9.72\n99\t2\t7.54 9.11 0.83\n100\t2\t11.36 6.2 11.9\n101\t2\t5.85 2.21 7.53\n102\t2\t8.81 2.01 9.89\n103\t2\t2.17 2.43 7.46\n104\t2\t10.1 2.53 6.55\n105\t2\t9.07 4.29 10.91\n106\t2\t0.64 8.13 1.45\n107\t2\t11.12 4.2 8.14\n108\t2\t11.03 12.03 0.58\n109\t2\t6.83 1.84 12.19\n110\t2\t9.13 10.45 7.25\n111\t2\t1.76 12.44 6.8\n112\t2\t7.37 11.88 5.08\n113\t2\t5.59 8.29 7.44\n114\t2\t8.14 12.12 10.86\n115\t2\t5.48 0.06 6.32\n116\t2\t0.99 11.99 0.41\n117\t2\t4.54 10.95 5.79\n118\t2\t10.94 6.29 1.65\n119\t2\t2.52 10.55 9.76\n120\t2\t4.74 2.41 2.42\n121\t2\t8.84 8.27 11.56\n122\t2\t10.89 6.06 9.58\n123\t2\t2.15 5.99 11.18\n124\t2\t0.49 9.06 9.13\n125\t2\t8.69 12.34 8.15\n126\t2\t1.76 1.44 4.07\n127\t2\t10.05 4.44 4.1\n128\t2\t1.44 8.49 7.32\n129\t2\t12.25 3.32 1.68\n130\t2\t6.27 6.22 1.56\n131\t2\t11.5 7.85 3.14\n132\t2\t2.96 3.44 4.61\n133\t2\t5.04 11.17 11.3\n134\t2\t7.6 12.39 3.03\n135\t2\t5.94 9.68 9.37\n136\t2\t4.93 11.46 0.96\n137\t2\t8.92 11.68 1.69\n138\t2\t12.07 1.36 9.54\n139\t2\t4.17 7.28 11.8\n140\t2\t2.67 3.09 1.05\n141\t2\t11.89 10.59 5.56\n142\t2\t4.27 7.23 2.16\n143\t2\t10.8 9.22 10.18\n144\t2\t10.68 6.2 5.44\n145\t2\t1.51 4.9 0.53\n146\t2\t1.94 5.36 4.61\n147\t2\t10.02 9.64 2.47\n148\t2\t8.41 6.43 11.04\n149\t2\t8.58 4.29 2.01\n150\t2\t3.12 4.28 8.41\n151\t2\t5.12 5.54 6.6\n152\t2\t3.97 9.58 3.89\n153\t2\t6.17 3.63 3.9\n154\t2\t11.4 2.92 10.99\n155\t2\t5.96 8.02 4.26\n156\t2\t6.81 5.9 9.83\n157\t2\t1.83 6.59 6.16\n158\t2\t10.19 8.36 6.88\n159\t2\t8.74 4.67 7.12\n160\t2\t4.84 1.08 11.38\n161\t2\t2.38 11.71 3.7\n162\t2\t2.02 6.1 8.92\n163\t2\t6.05 8.48 0.61\n164\t2\t12.12 7.05 10.81\n165\t2\t6.81 3.46 7.31\n166\t2\t7.52 1.99 9.1\n167\t2\t1.25 2.26 6.18\n168\t2\t9.31 1.19 6.35\n169\t2\t8.84 2.9 11.69\n170\t2\t1.39 9.44 0.85\n171\t2\t12.13 3.22 7.5\n172\t2\t11.38 0.95 1.34\n173\t2\t7.43 1.82 1.24\n174\t2\t9.15 9.43 8.28\n175\t2\t2.97 11.66 6.88\n176\t2\t7.5 10.43 4.47\n177\t2\t6.69 9.4 7.47\n178\t2\t7.29 10.77 10.93\n179\t2\t5.45 1.07 5.12\n180\t2\t1.92 11.57 1.62\n181\t2\t5.05 10.55 7.26\n182\t2\t9.73 5.52 1.11\n183\t2\t1.73 11.85 9.82\n184\t2\t6.02 1.57 2.67\n185\t2\t9.34 7.2 0.21\n186\t2\t10.71 6.56 7.97\n187\t2\t1.86 7.03 12.37\n188\t2\t1.08 9.16 10.59\n189\t2\t10.19 12.39 8.15\n190\t2\t0.92 2.56 3.28\n191\t2\t9.34 3.01 4.17\n192\t2\t1.11 10.09 7.21\n","name":"system.lmp","contextProviders":[],"applicationName":"deepmd","executableName":"dp_lmp"},{"content":"import dpdata\nimport numpy as np\n\n# https://docs.deepmodeling.com/projects/dpdata/en/master/formats/QECPTrajFormat.html\ndata = dpdata.LabeledSystem(\"cp\", fmt=\"qe/cp/traj\")\nprint(\"Dataset contains total {0} frames\".format(len(data)))\n\n# randomly choose 20% index for validation_data\nsize = len(data)\nsize_validation = round(size * 0.2)\n\nindex_validation = np.random.choice(size, size=size_validation, replace=False)\nindex_training = list(set(range(size)) - set(index_validation))\n\ndata_training = data.sub_system(index_training)\ndata_validation = data.sub_system(index_validation)\n\nprint(\"Using {0} frames as training set\".format(len(data_training)))\nprint(\"Using {0} frames as validation set\".format(len(data_validation)))\n\n# save training and validation sets\ndata_training.to_deepmd_npy(\"./training\")\ndata_validation.to_deepmd_npy(\"./validation\")\n","name":"qe_cp_to_deepmd.py","contextProviders":[],"applicationName":"deepmd","executableName":"dp_prepare"},{"content":"{\n \"model\": {\n \"type_map\": [\n \"O\",\n \"H\"\n ],\n \"descriptor\": {\n \"type\": \"se_e2_r\",\n \"sel\": [\n 23,\n 46\n ],\n \"rcut_smth\": 0.50,\n \"rcut\": 5.00,\n \"neuron\": [\n 5,\n 5,\n 5\n ],\n \"resnet_dt\": false,\n \"seed\": 1\n },\n \"fitting_net\": {\n \"neuron\": [\n 60,\n 60,\n 60\n ],\n \"resnet_dt\": true,\n \"seed\": 1\n }\n },\n \"learning_rate\": {\n \"type\": \"exp\",\n \"decay_steps\": 1000,\n \"start_lr\": 0.005,\n \"stop_lr\": 3.51e-6\n },\n \"loss\": {\n \"start_pref_e\": 0.02,\n \"limit_pref_e\": 1,\n \"start_pref_f\": 1000,\n \"limit_pref_f\": 1,\n \"start_pref_v\": 0,\n \"limit_pref_v\": 0\n },\n \"training\": {\n \"training_data\": {\n \"systems\": [\n \"./training/\"\n ],\n \"batch_size\": \"auto\"\n },\n \"validation_data\": {\n \"systems\": [\n \"./validation/\"\n ],\n \"batch_size\": \"auto\"\n },\n \"numb_steps\": 301,\n \"seed\": 1,\n \"disp_file\": \"lcurve.out\",\n \"disp_freq\": 10,\n \"numb_test\": 1,\n \"save_freq\": 100\n }\n}\n","name":"dp_train_se_e2_r.json","contextProviders":[],"applicationName":"deepmd","executableName":"dp_train"},{"content":"1\npp.dat\n1.0\n3000\n3\n3.0000\n","name":"average.in","contextProviders":[],"applicationName":"espresso","executableName":"average.x"},{"content":"&BANDS\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n filband = {% raw %}'{{ JOB_WORK_DIR }}/bands.dat'{% endraw %}\n no_overlap = .true.\n/\n\n","name":"bands.in","contextProviders":[],"applicationName":"espresso","executableName":"bands.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'cp'\n restart_mode = '{{ input.RESTART_MODE }}'\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}'{% endraw %}\n prefix = 'cp'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n nstep = {{ dynamics.numberOfSteps }}\n iprint = 1\n dt = {{ dynamics.timeStep }}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n nr1b = 20\n nr2b = 20\n nr3b = 20\n/\n&ELECTRONS\n electron_dynamics = 'verlet'\n electron_velocities = 'zero'\n emass = {{ dynamics.electronMass }}\n/\n&IONS\n ion_dynamics = 'verlet'\n ion_velocities = 'zero'\n tempw = {{ dynamics.temperature }}\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\n","name":"cp.in","contextProviders":[{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"IonDynamicsContextProvider"}],"applicationName":"espresso","executableName":"cp.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'cp-wf'\n restart_mode = '{{ input.RESTART_MODE }}'\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}'{% endraw %}\n prefix = 'cp_wf'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n nstep = {{ dynamics.numberOfSteps }}\n iprint = 1\n dt = {{ dynamics.timeStep }}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n! TODO: figure out the runtime error when `ecutrho` is set. \n! In the meantime, using Norm-conserving pseudopotentials works.\n! ecutrho = {{ cutoffs.density }}\n nr1b = 20\n nr2b = 20\n nr3b = 20\n/\n&ELECTRONS\n electron_dynamics = 'verlet'\n electron_velocities = 'zero'\n emass = {{ dynamics.electronMass }}\n/\n&IONS\n ion_dynamics = 'verlet'\n ion_velocities = 'zero'\n tempw = {{ dynamics.temperature }}\n/\n&CELL\n/\n&WANNIER\n nit = 60,\n calwf = 3,\n tolw = 1.D-6,\n nsteps = 50,\n adapt = .FALSE.\n wfdt = 2.0D0,\n wf_q = 500.D0,\n wf_friction = 0.3d0,\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\n","name":"cp_wf.in","contextProviders":[{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"IonDynamicsContextProvider"}],"applicationName":"espresso","executableName":"cp.x"},{"content":"&DOS\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n degauss = 0.01\n/\n\n","name":"dos.in","contextProviders":[],"applicationName":"espresso","executableName":"dos.x"},{"content":"&INPUT\n asr = 'simple'\n flfrc = 'force_constants.fc'\n flfrq = 'frequencies.freq'\n dos = .true.\n fldos = 'phonon_dos.out'\n deltaE = 1.d0\n {% for d in igrid.dimensions -%}\n nk{{loop.index}} = {{d}}\n {% endfor %}\n /\n","name":"dynmat_grid.in","contextProviders":[{"name":"IGridFormDataManager"}],"applicationName":"espresso","executableName":"dynmat.x"},{"content":"&inputpp\n calculation = \"eps\"\n prefix = \"__prefix__\"\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n/\n\n&energy_grid\n smeartype = \"gauss\"\n intersmear = 0.2\n intrasmear = 0.0\n wmin = 0.0\n wmax = 30.0\n nw = 500\n shift = 0.0\n/\n\n","name":"epsilon.in","contextProviders":[],"applicationName":"espresso","executableName":"epsilon.x"},{"content":"&gw_input\n\n ! see http://www.sternheimergw.org for more information.\n\n ! config of the scf run\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n\n ! the grid used for the linear response\n kpt_grid = {{ kgrid.dimensions|join(', ') }}\n qpt_grid = {{ qgrid.dimensions|join(', ') }}\n\n ! truncation (used for both correlation and exchange)\n truncation = '2d'\n\n ! number of bands for which the GW correction is calculated\n num_band = 8\n\n ! configuration of the Coulomb solver\n thres_coul = 1.0d-2\n\n ! configuration of W in the convolution\n model_coul = 'godby-needs'\n max_freq_coul = 120\n num_freq_coul = 35\n\n ! configuration of the Green solver\n thres_green = 1.0d-3\n max_iter_green = 300\n\n ! configuration for the correlation self energy\n ecut_corr = 5.0\n max_freq_corr = 100.0\n num_freq_corr = 11\n\n ! configuration for the exchange self energy\n ecut_exch = 20.0\n\n ! configuration for the output\n eta = 0.1\n min_freq_wind = -30.0\n max_freq_wind = 30.0\n num_freq_wind = 601\n/\n\n&gw_output\n/\n\nFREQUENCIES\n2\n 0.0 0.0\n 0.0 10.0\n/\n\nK_points\n{{ explicitKPath2PIBA.length }}\n{% for point in explicitKPath2PIBA -%}\n{% for coordinate in point.coordinates %}{{ coordinate }}{% endfor %}\n{% endfor %}\n/\n\n","name":"gw_bands_plasmon_pole.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QGridFormDataManager"},{"name":"ExplicitKPath2PIBAFormDataManager"}],"applicationName":"espresso","executableName":"gw.x"},{"content":"&gw_input\n\n ! see http://www.sternheimergw.org for more information.\n\n ! config of the scf run\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n\n ! the grid used for the linear response\n kpt_grid = {{ kgrid.dimensions|join(', ') }}\n qpt_grid = {{ qgrid.dimensions|join(', ') }}\n\n ! number of bands for which the GW correction is calculated\n num_band = 8\n\n ! configuration of W in the convolution\n max_freq_coul = 200\n num_freq_coul = 51\n\n ! configuration for the correlation self energy\n ecut_corr = 6.0\n\n ! configuration for the exchange self energy\n ecut_exch = 15.0\n/\n\n&gw_output\n/\n\nFREQUENCIES\n35\n 0.0 0.0\n 0.0 0.3\n 0.0 0.9\n 0.0 1.8\n 0.0 3.0\n 0.0 4.5\n 0.0 6.3\n 0.0 8.4\n 0.0 10.8\n 0.0 13.5\n 0.0 16.5\n 0.0 19.8\n 0.0 23.4\n 0.0 27.3\n 0.0 31.5\n 0.0 36.0\n 0.0 40.8\n 0.0 45.9\n 0.0 51.3\n 0.0 57.0\n 0.0 63.0\n 0.0 69.3\n 0.0 75.9\n 0.0 82.8\n 0.0 90.0\n 0.0 97.5\n 0.0 105.3\n 0.0 113.4\n 0.0 121.8\n 0.0 130.5\n 0.0 139.5\n 0.0 148.8\n 0.0 158.4\n 0.0 168.3\n 0.0 178.5\n/\n\nK_points\n{{ explicitKPath2PIBA.length }}\n{% for point in explicitKPath2PIBA -%}\n{% for coordinate in point.coordinates %}{{ coordinate }}{% endfor %}\n{% endfor %}\n/\n\n","name":"gw_bands_full_frequency.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QGridFormDataManager"},{"name":"ExplicitKPath2PIBAFormDataManager"}],"applicationName":"espresso","executableName":"gw.x"},{"content":"&inputhp\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n {%- for d in qgrid.dimensions %}\n nq{{ loop.index }} = {{ d }}\n {%- endfor %}\n/\n","name":"hp.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"hp.x"},{"content":"&INPUT\n asr = 'simple'\n flfrc = 'force_constants.fc'\n flfrq = 'frequencies.freq'\n dos = .true.\n fldos = 'phonon_dos.out'\n deltaE = 1.d0\n {% for d in igrid.dimensions -%}\n nk{{loop.index}} = {{d}}\n {% endfor %}\n /\n","name":"matdyn_grid.in","contextProviders":[{"name":"IGridFormDataManager"}],"applicationName":"espresso","executableName":"matdyn.x"},{"content":"&INPUT\n asr = 'simple'\n flfrc ='force_constants.fc'\n flfrq ='frequencies.freq'\n flvec ='normal_modes.out'\n q_in_band_form = .true.\n /\n{{ipath.length}}\n{% for point in ipath -%}\n{% for d in point.coordinates %}{{d}} {% endfor -%}{{point.steps}}\n{% endfor %}\n","name":"matdyn_path.in","contextProviders":[{"name":"IPathFormDataManager"}],"applicationName":"espresso","executableName":"matdyn.x"},{"content":"BEGIN\nBEGIN_PATH_INPUT\n&PATH\n restart_mode = 'from_scratch'\n string_method = 'neb',\n nstep_path = 50,\n ds = 2.D0,\n opt_scheme = \"broyden\",\n num_of_images = {{ 2 + (input.INTERMEDIATE_IMAGES.length || neb.nImages) }},\n k_max = 0.3D0,\n k_min = 0.2D0,\n CI_scheme = \"auto\",\n path_thr = 0.1D0,\n/\nEND_PATH_INPUT\nBEGIN_ENGINE_INPUT\n&CONTROL\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.03\n nspin = 2\n starting_magnetization = 0.5\n/\n&ELECTRONS\n conv_thr = 1.D-8\n mixing_beta = 0.3\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nBEGIN_POSITIONS\nFIRST_IMAGE\nATOMIC_POSITIONS crystal\n{{ input.FIRST_IMAGE }}\n{%- for IMAGE in input.INTERMEDIATE_IMAGES %}\nINTERMEDIATE_IMAGE\nATOMIC_POSITIONS crystal\n{{ IMAGE }}\n{%- endfor %}\nLAST_IMAGE\nATOMIC_POSITIONS crystal\n{{ input.LAST_IMAGE }}\nEND_POSITIONS\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nEND_ENGINE_INPUT\nEND\n","name":"neb.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"NEBFormDataManager"},{"name":"QENEBInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"neb.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-12\n asr = .true.\n search_sym = .false.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n fildyn = 'dyn'\n ldisp = .true.\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_grid.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-12\n asr = .true.\n search_sym = .false.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n/\n{% for point in qpath -%}\n{% for d in point.coordinates %}{{d}} {% endfor %}\n{% endfor %}\n","name":"ph_path.in","contextProviders":[{"name":"QPathFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-12\n asr = .true.\n search_sym = .false.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n/\n0 0 0\n","name":"ph_gamma.in","contextProviders":[],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-18,\n recover = .false.\n start_irr = 0\n last_irr = 0\n ldisp = .true.\n fildyn = 'dyn0'\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_init_qpoints.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-18,\n recover = .true.\n ldisp = .true.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n fildyn = 'dyn'\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_grid_restart.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-18\n ldisp = .true.\n {% raw -%}\n start_q = {{MAP_DATA.qpoint}}\n last_q = {{MAP_DATA.qpoint}}\n start_irr = {{MAP_DATA.irr}}\n last_irr= {{MAP_DATA.irr}}\n {%- endraw %}\n recover = .true.\n fildyn = 'dyn'\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_SCRATCH_DIR }}/outdir'{% endraw %}\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_single_irr_qpt.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPP\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n filplot = 'pp.dat'\n plot_num = 0\n/\n&PLOT\n iflag = 3\n output_format = 5\n fileout ='density.xsf'\n/\n\n","name":"pp_density.in","contextProviders":[],"applicationName":"espresso","executableName":"pp.x"},{"content":"&INPUTPP\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n filplot = 'pp.dat'\n plot_num = 11\n/\n","name":"pp_electrostatic_potential.in","contextProviders":[],"applicationName":"espresso","executableName":"pp.x"},{"content":"&PROJWFC\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n degauss = 0.01\n deltaE = 0.05\n/\n","name":"projwfc.in","contextProviders":[],"applicationName":"espresso","executableName":"projwfc.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_scf.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n input_dft = 'hse',\n {% for d in qgrid.dimensions -%}\n nqx{{loop.index}} = {{d}}\n {% endfor %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS crystal\n{{ '{{' }} {{ explicitKPath.length }} {% raw %} + KPOINTS|length }} {% endraw %}\n{%- raw %}\n{% for point in KPOINTS -%}\n {% for d in point.coordinates %}{{ \"%14.9f\"|format(d) }} {% endfor -%}{{ point.weight }}\n{% endfor %}\n{% endraw -%}\n{% for point in explicitKPath -%}\n{% for d in point.coordinates %}{{d}} {% endfor -%}0.0000001\n{% endfor %}\n","name":"pw_scf_bands_hse.in","contextProviders":[{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"QGridFormDataManager"},{"name":"ExplicitKPathFormDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n assume_isolated = 'esm'\n esm_bc = '{{ boundaryConditions.type }}'\n fcp_mu = {{ boundaryConditions.targetFermiEnergy }}\n esm_w = {{ boundaryConditions.offset }}\n esm_efield = {{ boundaryConditions.electricField }}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_esm.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"BoundaryConditionsFormDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'relax'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n assume_isolated = 'esm'\n esm_bc = '{{ boundaryConditions.type }}'\n fcp_mu = {{ boundaryConditions.targetFermiEnergy }}\n esm_w = {{ boundaryConditions.offset }}\n esm_efield = {{ boundaryConditions.electricField }}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_esm_relax.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"BoundaryConditionsFormDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% raw %}{{PARAMETER | default('1')}} {{PARAMETER | default('1')}} {{PARAMETER | default('1')}} 0 0 0{% endraw %}\n","name":"pw_scf_kpt_conv.in","contextProviders":[{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'nscf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{input.RESTART_MODE}}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n{%- if subworkflowContext.NO_SYMMETRY_NO_INVERSION %}\n nosym = .true.\n noinv = .true.\n{%- endif %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_nscf.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'relax'\n nstep = 50\n title = ''\n verbosity = 'low'\n restart_mode = 'from_scratch'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_relax.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'vc-relax'\n title = ''\n verbosity = 'low'\n restart_mode = 'from_scratch'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_vc_relax.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'bands'\n title = ''\n verbosity = 'low'\n restart_mode = '{{input.RESTART_MODE}}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS crystal_b\n{{kpath.length}}\n{% for point in kpath -%}\n{% for d in point.coordinates %}{{d}} {% endfor -%}{{point.steps}}\n{% endfor %}\n","name":"pw_bands.in","contextProviders":[{"name":"KPathFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n ecutfock = 100\n occupations = 'smearing'\n degauss = 0.005\n input_dft='hse',\n nqx1 = {% if kgrid.dimensions[0]%2 == 0 %}{{kgrid.dimensions[0]/2}}{% else %}{{(kgrid.dimensions[0]+1)/2}}{% endif %}, nqx2 = {% if kgrid.dimensions[1]%2 == 0 %}{{kgrid.dimensions[1]/2}}{% else %}{{(kgrid.dimensions[1]+1)/2}}{% endif %}, nqx3 = {% if kgrid.dimensions[2]%2 == 0 %}{{kgrid.dimensions[2]/2}}{% else %}{{(kgrid.dimensions[2]+1)/2}}{% endif %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{% if d%2 == 0 %}{{d}} {% else %}{{d+1}} {% endif %}{% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_scf_hse.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nHUBBARD {ortho-atomic}\n{% for row in hubbard_u -%}\nU {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.hubbardUValue }}\n{% endfor -%}\n","name":"pw_scf_dft_u.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardUContextManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nHUBBARD {ortho-atomic}\n{% for row in hubbard_u -%}\nU {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.hubbardUValue }}\n{% endfor -%}\n{% for row in hubbard_v -%}\nV {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.atomicSpecies2 }}-{{ row.atomicOrbital2 }} {{ row.siteIndex }} {{ row.siteIndex2 }} {{ row.hubbardVValue }}\n{% endfor -%}\n","name":"pw_scf_dft_v.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardUContextManager"},{"name":"HubbardVContextManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nHUBBARD {ortho-atomic}\n{% for row in hubbard_j -%}\n{{ row.paramType }} {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.value }}\n{% endfor -%}\n","name":"pw_scf_dft_j.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardJContextManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n lda_plus_u = .true.\n lda_plus_u_kind = 0\n U_projection_type = 'ortho-atomic'\n {%- for row in hubbard_legacy %}\n Hubbard_U({{ row.atomicSpeciesIndex }}) = {{ row.hubbardUValue }}\n {%- endfor %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_scf_dft_u_legacy.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardContextManagerLegacy"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&INPUT\n fildyn = 'dyn'\n zasr = 'simple'\n flfrc = 'force_constants.fc'\n/\n","name":"q2r.in","contextProviders":[],"applicationName":"espresso","executableName":"q2r.x"},{"content":"# ------------------------------------------------------------------------------- #\n# #\n# Example JupyterLab requirements #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. A Python virtual environment is created #\n# - in /scratch/$USER/$JOB_ID (for build: 'Default') #\n# - in /export/share/python/ (for build: 'with-pre-installed-packages') #\n# 3. This list is used to populate a Python virtual environment #\n# 4. JupyterLab is started #\n# #\n# For more information visit: #\n# - https://jupyterlab.readthedocs.io/en/stable/index.html #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# Note: With the JupyterLab build 'with-pre-installed-packages', packages #\n# cannot be added during the notebook runtime. #\n# #\n# ------------------------------------------------------------------------------- #\n\njupyterlab==3.0.3\nnotebook>=6.2.0\nexabyte-api-client>=2020.10.19\nnumpy>=1.17.3\npandas>=1.1.4\nurllib3<2\n","name":"requirements.txt","contextProviders":[],"applicationName":"jupyterLab","executableName":"jupyter"},{"content":" start nwchem\n title \"Test\"\n charge {{ input.CHARGE }}\n geometry units au noautosym\n {{ input.ATOMIC_POSITIONS }}\n end\n basis\n * library {{ input.BASIS }}\n end\n dft\n xc {{ input.FUNCTIONAL }}\n mult {{ input.MULT }}\n end\n task dft energy\n","name":"nwchem_total_energy.inp","contextProviders":[{"name":"NWChemInputDataManager"}],"applicationName":"nwchem","executableName":"nwchem"},{"content":"# ---------------------------------------------------------------- #\n# #\n# Example python script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. runtime directory for this calculation is created #\n# 2. requirements.txt is used to create a virtual environment #\n# 3. virtual environment is activated #\n# 4. python process running this script is started #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\nimport pymatgen as mg\n\nsi = mg.Element(\"Si\")\n\nprint(si.atomic_mass)\n","name":"hello_world.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2\n","name":"requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------------ #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# Please add any packages required for this unit below following #\n# the requirements.txt specification: #\n# https://pip.pypa.io/en/stable/reference/requirements-file-format/ #\n# ------------------------------------------------------------------ #\n","name":"requirements_empty.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# -------------------------------------------------------------------------------\n# This script contains a few helpful commands for basic plotting with matplotlib.\n# The commented out blocks are optional suggestions and included for convenience.\n# -------------------------------------------------------------------------------\nimport matplotlib.pyplot as plt\nimport matplotlib.ticker as ticker\nimport numpy as np\n\n\n# Plot Settings\n# -------------\nfigure_size = (6.4, 4.8) # width, height [inches]\ndpi = 100 # resolution [dots-per-inch]\nfont_size_title = 16 # font size of title\nfont_size_axis = 12 # font size of axis label\nfont_size_tick = 12 # font size of tick label\nfont_size_legend = 14 # font size of legend\nx_axis_label = None # label for x-axis\ny_axis_label = None # label for y-axis\ntitle = None # figure title\nshow_legend = False # whether to show legend\nsave_name = \"plot.pdf\" # output filename (with suffix), e.g. 'plot.pdf'\nx_view_limits = {\"left\": None, \"right\": None} # view limits for x-axis\ny_view_limits = {\"top\": None, \"bottom\": None} # view limits for y-axis\nx_tick_spacing = None # custom tick spacing for x-axis (optional)\ny_tick_spacing = None # custom tick spacing for y-axis (optional)\nx_tick_labels = None # custom tick labels for x-axis (optional)\ny_tick_labels = None # custom tick labels for y-axis (optional)\n\n\n# Figure & axes objects\n# ---------------------\nfig = plt.figure(figsize=figure_size, dpi=dpi)\nax = fig.add_subplot(111)\n\n# Example plot (REPLACE ACCORDINGLY)\n# ------------\nx = np.linspace(0, 7, num=100)\ny = np.sin(x)\nax.plot(x, y, \"g-\", zorder=3)\n\n\n# Help lines\n# ----------\n# ax.axhline(y=0, color=\"0.25\", linewidth=0.6, zorder=1)\n# ax.axvline(x=0, color=\"0.25\", linewidth=0.6, zorder=1)\n\n\n# View limits\n# -----------\nax.set_xlim(**x_view_limits)\nax.set_ylim(**y_view_limits)\n\n\n# Grid lines\n# ----------\n# grid_style = {\n# \"linestyle\" : \"dotted\",\n# \"linewidth\" : 0.6,\n# \"color\" : \"0.25\",\n# }\n# ax.grid(**grid_style)\n\n# Custom tick spacing\n# -------------------\n# ax.xaxis.set_major_locator(ticker.MultipleLocator(x_tick_spacing))\n# ax.yaxis.set_major_locator(ticker.MultipleLocator(y_tick_spacing))\n\n# Custom tick labels\n# ------------------\nif x_tick_labels is not None:\n ax.set_xticklabels(x_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\nif y_tick_labels is not None:\n ax.set_yticklabels(y_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\n\n# Other tick settings\n# -------------------\n# ax.tick_params(axis=\"both\", which=\"major\", labelsize=font_size_tick, direction=\"in\")\n# ax.tick_params(axis=\"x\", which=\"major\", pad=10)\n# ax.tick_params(axis=\"x\", which=\"minor\", bottom=False, top=False)\n\n\n# Axis labels\n# -----------\nif x_axis_label is not None:\n ax.set_xlabel(x_axis_label, size=font_size_axis)\nif y_axis_label is not None:\n ax.set_ylabel(y_axis_label, size=font_size_axis)\n\n# Figure title\n# ------------\nif title is not None:\n ax.set_title(title, fontsize=font_size_title)\n\n# Legend\n# ------\nif show_legend:\n ax.legend(prop={'size': font_size_legend})\n\n# Save figure\n# -----------\nif save_name is not None:\n save_format = save_name.split(\".\")[-1]\n fig.savefig(save_name, format=save_format, bbox_inches=\"tight\")\n","name":"matplotlib_basic.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ---------------------------------------------------------- #\n# #\n# This script extracts q-points and irreducible #\n# representations from Quantum ESPRESSO xml data. #\n# #\n# Expects control_ph.xml and patterns.?.xml files to exist #\n# #\n# ---------------------------------------------------------- #\nfrom __future__ import print_function\nimport json\nfrom xml.dom import minidom\n\n{# JOB_WORK_DIR will be initialized at runtime => avoid substituion below #}\n{%- raw -%}\nCONTROL_PH_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/control_ph.xml\"\nPATTERNS_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/patterns.{}.xml\"\n{%- endraw -%}\n\n# get integer content of an xml tag in a document\ndef get_int_by_tag_name(doc, tag_name):\n element = doc.getElementsByTagName(tag_name)\n return int(element[0].firstChild.nodeValue)\n\nvalues = []\n\n# get number of q-points and cycle through them\nxmldoc = minidom.parse(CONTROL_PH_FILENAME)\nnumber_of_qpoints = get_int_by_tag_name(xmldoc, \"NUMBER_OF_Q_POINTS\")\n\nfor i in range(number_of_qpoints):\n # get number of irreducible representations per qpoint\n xmldoc = minidom.parse(PATTERNS_FILENAME.format(i+1))\n number_of_irr_per_qpoint = get_int_by_tag_name(xmldoc, \"NUMBER_IRR_REP\")\n # add each distinct combination of qpoint and irr as a separate entry\n for j in range(number_of_irr_per_qpoint):\n values.append({\n \"qpoint\": i + 1,\n \"irr\": j + 1\n })\n\n# store final values in standard output (STDOUT)\nprint(json.dumps(values, indent=4))\n","name":"espresso_xml_get_qpt_irr.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"import re\nimport json\n\ndouble_regex = r'[-+]?\\d*\\.\\d+(?:[eE][-+]?\\d+)?'\nregex = r\"\\s+k\\(\\s+\\d*\\)\\s+=\\s+\\(\\s+({0})\\s+({0})\\s+({0})\\),\\s+wk\\s+=\\s+({0}).+?\\n\".format(double_regex)\n\nwith open(\"pw_scf.out\") as f:\n text = f.read()\n\npattern = re.compile(regex, re.I | re.MULTILINE)\nmatch = pattern.findall(text[text.rfind(\" cryst. coord.\"):])\nkpoints = [{\"coordinates\": list(map(float, m[:3])), \"weight\": float(m[3])} for m in match]\nprint(json.dumps({\"name\": \"KPOINTS\", \"value\": kpoints, \"scope\": \"global\"}, indent=4))\n","name":"espresso_extract_kpoints.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------- #\n# This script aims to determine extrema for a given array. #\n# Please adjust the parameters according to your data. #\n# Note: This template expects the array to be defined in the #\n# context as 'array_from_context' (see details below). #\n# ----------------------------------------------------------- #\nimport numpy as np\nfrom scipy.signal import find_peaks\nimport json\nfrom munch import Munch\n\n# Data From Context\n# -----------------\n# The array 'array_from_context' is a 1D list (float or int) that has to be defined in\n# a preceding assignment unit in order to be extracted from the context.\n# Example: [0.0, 1.0, 4.0, 3.0]\n# Upon rendering the following Jinja template the extracted array will be inserted.\n{% raw %}Y = np.array({{array_from_context}}){% endraw %}\n\n# Settings\n# --------\nprominence = 0.3 # required prominence in the unit of the data array\n\n# Find Extrema\n# ------------\nmax_indices, _ = find_peaks(Y, prominence=prominence)\nmin_indices, _ = find_peaks(-1 * Y, prominence=prominence)\n\nresult = {\n \"maxima\": Y[max_indices].tolist(),\n \"minima\": Y[min_indices].tolist(),\n}\n\n# print final values to standard output (STDOUT),\n# so that they can be read by a subsequent assignment unit (using value=STDOUT)\nprint(json.dumps(result, indent=4))\n","name":"find_extrema.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# ----------------------------------------------------------------- #\n\n\nmunch==2.5.0\nnumpy>=1.19.5\nscipy>=1.5.4\nmatplotlib>=3.0.0\n","name":"processing_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# PythonML Package Requirements for use on the Exabyte.io Platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscikit-learn==0.24.1\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2;python_version>=\"3.6\"\n","name":"pyml_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# General settings for PythonML jobs on the Exabyte.io Platform #\n# #\n# This file generally shouldn't be modified directly by users. #\n# The \"datafile\" and \"is_workflow_running_to_predict\" variables #\n# are defined in the head subworkflow, and are templated into #\n# this file. This helps facilitate the workflow's behavior #\n# differing whether it is in a \"train\" or \"predict\" mode. #\n# #\n# Also in this file is the \"Context\" object, which helps maintain #\n# certain Python objects between workflow units, and between #\n# predict runs. #\n# #\n# Whenever a python object needs to be stored for subsequent runs #\n# (such as in the case of a trained model), context.save() can be #\n# called to save it. The object can then be loaded again by using #\n# context.load(). #\n# ----------------------------------------------------------------- #\n\n\nimport pickle, os\n\n# ==================================================\n# Variables modified in the Important Settings menu\n# ==================================================\n# Variables in this section can (and oftentimes need to) be modified by the user in the \"Important Settings\" tab\n# of a workflow.\n\n# Target_column_name is used during training to identify the variable the model is traing to predict.\n# For example, consider a CSV containing three columns, \"Y\", \"X1\", and \"X2\". If the goal is to train a model\n# that will predict the value of \"Y,\" then target_column_name would be set to \"Y\"\ntarget_column_name = \"{{ mlSettings.target_column_name }}\"\n\n# The type of ML problem being performed. Can be either \"regression\", \"classification,\" or \"clustering.\"\nproblem_category = \"{{ mlSettings.problem_category }}\"\n\n# =============================\n# Non user-modifiable variables\n# =============================\n# Variables in this section generally do not need to be modified.\n\n# The problem category, regression or classification or clustering. In regression, the target (predicted) variable\n# is continues. In classification, it is categorical. In clustering, there is no target - a set of labels is\n# automatically generated.\nis_regression = is_classification = is_clustering = False\nif problem_category.lower() == \"regression\":\n is_regression = True\nelif problem_category.lower() == \"classification\":\n is_classification = True\nelif problem_category.lower() == \"clustering\":\n is_clustering = True\nelse:\n raise ValueError(\n \"Variable 'problem_category' must be either 'regression', 'classification', or 'clustering'. Check settings.py\")\n\n# The variables \"is_workflow_running_to_predict\" and \"is_workflow_running_to_train\" are used to control whether\n# the workflow is in a \"training\" mode or a \"prediction\" mode. The \"IS_WORKFLOW_RUNNING_TO_PREDICT\" variable is set by\n# an assignment unit in the \"Set Up the Job\" subworkflow that executes at the start of the job. It is automatically\n# changed when the predict workflow is generated, so users should not need to modify this variable.\nis_workflow_running_to_predict = {% raw %}{{IS_WORKFLOW_RUNNING_TO_PREDICT}}{% endraw %}\nis_workflow_running_to_train = not is_workflow_running_to_predict\n\n# Sets the datafile variable. The \"datafile\" is the data that will be read in, and will be used by subsequent\n# workflow units for either training or prediction, depending on the workflow mode.\nif is_workflow_running_to_predict:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\nelse:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\n\n# The \"Context\" class allows for data to be saved and loaded between units, and between train and predict runs.\n# Variables which have been saved using the \"Save\" method are written to disk, and the predict workflow is automatically\n# configured to obtain these files when it starts.\n#\n# IMPORTANT NOTE: Do *not* adjust the value of \"context_dir_pathname\" in the Context object. If the value is changed, then\n# files will not be correctly copied into the generated predict workflow. This will cause the predict workflow to be\n# generated in a broken state, and it will not be able to make any predictions.\nclass Context(object):\n \"\"\"\n Saves and loads objects from the disk, useful for preserving data between workflow units\n\n Attributes:\n context_paths (dict): Dictionary of the format {variable_name: path}, that governs where\n pickle saves files.\n\n Methods:\n save: Used to save objects to the context directory\n load: Used to load objects from the context directory\n \"\"\"\n\n def __init__(self, context_file_basename=\"workflow_context_file_mapping\"):\n \"\"\"\n Constructor for Context objects\n\n Args:\n context_file_basename (str): Name of the file to store context paths in\n \"\"\"\n\n # Warning: DO NOT modify the context_dir_pathname variable below\n # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n context_dir_pathname = \"{% raw %}{{ CONTEXT_DIR_RELATIVE_PATH }}{% endraw %}\"\n # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n self._context_dir_pathname = context_dir_pathname\n self._context_file = os.path.join(context_dir_pathname, context_file_basename)\n\n # Make context dir if it does not exist\n if not os.path.exists(context_dir_pathname):\n os.makedirs(context_dir_pathname)\n\n # Read in the context sources dictionary, if it exists\n if os.path.exists(self._context_file):\n with open(self._context_file, \"rb\") as file_handle:\n self.context_paths: dict = pickle.load(file_handle)\n else:\n # Items is a dictionary of {varname: path}\n self.context_paths = {}\n\n def __enter__(self):\n return self\n\n def __exit__(self, exc_type, exc_value, traceback):\n self._update_context()\n\n def __contains__(self, item):\n return item in self.context_paths\n\n def _update_context(self):\n with open(self._context_file, \"wb\") as file_handle:\n pickle.dump(self.context_paths, file_handle)\n\n def load(self, name: str):\n \"\"\"\n Returns a contextd object\n\n Args:\n name (str): The name in self.context_paths of the object\n \"\"\"\n path = self.context_paths[name]\n with open(path, \"rb\") as file_handle:\n obj = pickle.load(file_handle)\n return obj\n\n def save(self, obj: object, name: str):\n \"\"\"\n Saves an object to disk using pickle\n\n Args:\n name (str): Friendly name for the object, used for lookup in load() method\n obj (object): Object to store on disk\n \"\"\"\n path = os.path.join(self._context_dir_pathname, f\"{name}.pkl\")\n self.context_paths[name] = path\n with open(path, \"wb\") as file_handle:\n pickle.dump(obj, file_handle)\n self._update_context()\n\n# Generate a context object, so that the \"with settings.context\" can be used by other units in this workflow.\ncontext = Context()\n\nis_using_train_test_split = \"is_using_train_test_split\" in context and (context.load(\"is_using_train_test_split\"))\n\n# Create a Class for a DummyScaler()\nclass DummyScaler:\n \"\"\"\n This class is a 'DummyScaler' which trivially acts on data by returning it unchanged.\n \"\"\"\n\n def fit(self, X):\n return self\n\n def transform(self, X):\n return X\n\n def fit_transform(self, X):\n return X\n\n def inverse_transform(self, X):\n return X\n\nif 'target_scaler' not in context:\n context.save(DummyScaler(), 'target_scaler')\n","name":"pyml_settings.py","contextProviders":[{"name":"MLSettingsDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Custom workflow unit template for the Exabyte.io platform #\n# #\n# This file imports a set of workflow-specific context variables #\n# from settings.py. It then uses a context manager to save and #\n# load Python objects. When saved, these objects can then be #\n# loaded either later in the same workflow, or by subsequent #\n# predict jobs. #\n# #\n# Any pickle-able Python object can be saved using #\n# settings.context. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport settings\n\n# The context manager exists to facilitate\n# saving and loading objects across Python units within a workflow.\n\n# To load an object, simply do to \\`context.load(\"name-of-the-saved-object\")\\`\n# To save an object, simply do \\`context.save(\"name-for-the-object\", object_here)\\`\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Do some transformations to the data here\n\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n descriptors = context.load(\"descriptors\")\n\n # Do some predictions or transformation to the data here\n","name":"pyml_custom.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to read in data for the ML workflow. #\n# #\n# Also showcased here is the concept of branching based on #\n# whether the workflow is in \"train\" or \"predict\" mode. #\n# #\n# If the workflow is in \"training\" mode, it will read in the data #\n# before converting it to a Numpy array and save it for use #\n# later. During training, we already have values for the output, #\n# and this gets saved to \"target.\" #\n# #\n# Finally, whether the workflow is in training or predict mode, #\n# it will always read in a set of descriptors from a datafile #\n# defined in settings.py #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport sklearn.preprocessing\nimport settings\n\nwith settings.context as context:\n data = pandas.read_csv(settings.datafile)\n\n # Train\n # By default, we don't do train/test splitting: the train and test represent the same dataset at first.\n # Other units (such as a train/test splitter) down the line can adjust this as-needed.\n if settings.is_workflow_running_to_train:\n\n # Handle the case where we are clustering\n if settings.is_clustering:\n target = data.to_numpy()[:, 0] # Just get the first column, it's not going to get used anyway\n else:\n target = data.pop(settings.target_column_name).to_numpy()\n\n # Handle the case where we are classifying. In this case, we must convert any labels provided to be categorical.\n # Specifically, labels are encoded with values between 0 and (N_Classes - 1)\n if settings.is_classification:\n label_encoder = sklearn.preprocessing.LabelEncoder()\n target = label_encoder.fit_transform(target)\n context.save(label_encoder, \"label_encoder\")\n\n target = target.reshape(-1, 1) # Reshape array from a row vector into a column vector\n\n context.save(target, \"train_target\")\n context.save(target, \"test_target\")\n\n descriptors = data.to_numpy()\n\n context.save(descriptors, \"train_descriptors\")\n context.save(descriptors, \"test_descriptors\")\n\n else:\n descriptors = data.to_numpy()\n context.save(descriptors, \"descriptors\")\n","name":"data_input_read_csv_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to perform a train/test split #\n# #\n# Splits the dataset into a training and testing set. The #\n# variable `percent_held_as_test` controls how much of the #\n# input dataset is removed for use as a testing set. By default, #\n# this unit puts 20% of the dataset into the testing set, and #\n# places the remaining 80% into the training set. #\n# #\n# Does nothing in the case of predictions. #\n# #\n# ----------------------------------------------------------------- #\n\nimport sklearn.model_selection\nimport numpy as np\nimport settings\n\n# `percent_held_as_test` is the amount of the dataset held out as the testing set. If it is set to 0.2,\n# then 20% of the dataset is held out as a testing set. The remaining 80% is the training set.\npercent_held_as_test = {{ mlTrainTestSplit.fraction_held_as_test_set }}\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Load training data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n\n # Combine datasets to facilitate train/test split\n\n # Do train/test split\n train_descriptors, test_descriptors, train_target, test_target = sklearn.model_selection.train_test_split(\n train_descriptors, train_target, test_size=percent_held_as_test)\n\n # Set the flag for using a train/test split\n context.save(True, \"is_using_train_test_split\")\n\n # Save training data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"data_input_train_test_split_sklearn.py","contextProviders":[{"name":"MLTrainTestSplitDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn MinMax Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it is on interval #\n# [0,1]. It then saves the data for use further down #\n# the road in the workflow, for use in un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the min and max of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor MinMax Scaler\n scaler = sklearn.preprocessing.MinMaxScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_min_max_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Duplicates workflow unit #\n# #\n# This workflow unit drops all duplicate rows, if it is running #\n# in the \"train\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop duplicates from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n df = df.drop_duplicates()\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop duplicates from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.drop_duplicates()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_duplicates_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Missing Workflow Unit #\n# #\n# This workflow unit allows missing rows and/or columns to be #\n# dropped from the dataset by configuring the `to_drop` #\n# parameter. #\n# #\n# Valid values for `to_drop`: #\n# - \"rows\": rows with missing values will be removed #\n# - \"columns\": columns with missing values will be removed #\n# - \"both\": rows and columns with missing values will be removed #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\n# `to_drop` can either be \"rows\" or \"columns\"\n# If it is set to \"rows\" (by default), then all rows with missing values will be dropped.\n# If it is set to \"columns\", then all columns with missing values will be dropped.\n# If it is set to \"both\", then all rows and columns with missing values will be dropped.\nto_drop = \"rows\"\n\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop missing from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n\n directions = {\n \"rows\": (\"index\",),\n \"columns\": (\"columns\",),\n \"both\": (\"index\", \"columns\"),\n }[to_drop]\n for direction in directions:\n df = df.dropna(direction)\n\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop missing from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.dropna()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_missing_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn Standard Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it a mean of 0 and #\n# a standard deviation of 1. It then saves the data for use #\n# further down the road in the workflow, for use in #\n# un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the mean and variance of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor Scaler\n scaler = sklearn.preprocessing.StandardScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_standardization_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge-regression model in Scikit-Learn. #\n# Alpha is taken from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.AdaBoostRegressor(\n n_estimators=50,\n learning_rate=1,\n loss=\"linear\",\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"adaboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"adaboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_adaboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a bagged trees regression model with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.BaggingRegressor(\n n_estimators=10,\n max_samples=1.0,\n max_features=1.0,\n bootstrap=True,\n bootstrap_features=False,\n oob_score=False,\n verbose=0,\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"bagged_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"bagged_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_bagged_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for gradient-boosted tree regression with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. Note: In the gradient- #\n# boosted trees ensemble used, the weak learners used as #\n# estimators cannot be tuned with the same level of fidelity #\n# allowed in the adaptive-boosted trees ensemble. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.GradientBoostingRegressor(\n loss=\"ls\",\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion=\"friedman_mse\",\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n max_features=None,\n alpha=0.9,\n verbose=0,\n max_leaf_nodes=None,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees regression #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBRegressor(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='reg:squarederror',\n eval_metric='rmse')\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_regression\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_regression\")\n\n # Make some predictions and unscale\n predictions = model.predict(descriptors)\n predictions = predictions.reshape(-1, 1)\n target_scaler = context.load(\"target_scaler\")\n\n predictions = target_scaler.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\")\n","name":"model_extreme_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for k-means clustering. #\n# #\n# In k-means clustering, the labels are not provided ahead of #\n# time. Instead, one supplies the number of groups the #\n# algorithm should split the dataset into. Here, we set our #\n# own default of 4 groups (fewer than sklearn's default of 8). #\n# Otherwise, the default parameters of the clustering method #\n# are the same as in sklearn. #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.cluster\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Initialize the Model\n model = sklearn.cluster.KMeans(\n n_clusters=4,\n init=\"k-means++\",\n n_init=10,\n max_iter=300,\n tol=0.0001,\n copy_x=True,\n algorithm=\"auto\",\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors)\n context.save(model, \"k_means\")\n train_labels = model.predict(train_descriptors)\n test_labels = model.predict(test_descriptors)\n\n context.save(train_labels, \"train_labels\")\n context.save(test_labels, \"test_labels\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"k_means\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_k_means_clustering_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a kernelized ridge-regression model with #\n# Scikit-Learn. Model parameters are derived from Scikit- #\n# Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.kernel_ridge\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.kernel_ridge.KernelRidge(\n alpha=1.0,\n kernel=\"linear\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"kernel_ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"kernel_ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_kernel_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a LASSO-regression model with Scikit- #\n# Learn. Model parameters derived from Scikit-Learn's #\n# Defaults. Alpha has been lowered from the default of 1.0, to #\n# 0.1. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Lasso(\n alpha=0.1,\n fit_intercept=True,\n normalize=False,\n precompute=False,\n tol=0.0001,\n positive=True,\n selection=\"cyclic\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"LASSO\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"LASSO\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_lasso_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit to train a simple feedforward neural network #\n# model on a regression problem using scikit-learn. In this #\n# template, we use the default values for hidden_layer_sizes, #\n# activation, solver, and learning rate. Other parameters are #\n# available (consult the sklearn docs), but in this case, we #\n# only include those relevant to the Adam optimizer. Sklearn #\n# Docs: Sklearn docs:http://scikit-learn.org/stable/modules/ge #\n# nerated/sklearn.neural_network.MLPRegressor.html #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.neural_network\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.neural_network.MLPRegressor(\n hidden_layer_sizes=(100,),\n activation=\"relu\",\n solver=\"adam\",\n max_iter=300,\n early_stopping=False,\n validation_fraction=0.1,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"multilayer_perceptron\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"multilayer_perceptron\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_mlp_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a random forest classification model with #\n# Scikit-Learn. Parameters derived from Scikit-Learn's #\n# defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. When #\n# the workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a filee named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestClassifier(\n n_estimators=100,\n criterion=\"gini\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n oob_score=False,\n verbose=0,\n class_weight=None,\n ccp_alpha=0.0,\n max_samples=None,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target, test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for a gradient boosted classification model with #\n# Scikit-Learn. Parameters derived from sklearn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = sklearn.ensemble.GradientBoostingClassifier(loss='deviance',\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion='friedman_mse',\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n min_impurity_split=None,\n init=None,\n random_state=None,\n max_features=None,\n verbose=0,\n max_leaf_nodes=None,\n warm_start=False,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees classification #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBClassifier(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='binary:logistic',\n eval_metric='logloss',\n use_label_encoder=False)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_extreme_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow for a random forest regression model with Scikit- #\n# Learn. Parameters are derived from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestRegressor(\n n_estimators=100,\n criterion=\"mse\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n max_samples=None,\n oob_score=False,\n ccp_alpha=0.0,\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge regression model with Scikit- #\n# Learn. Alpha is taken from Scikit-Learn's default #\n# parameters. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Ridge(\n alpha=1.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Parity plot generation unit #\n# #\n# This unit generates a parity plot based on the known values #\n# in the training data, and the predicted values generated #\n# using the training data. #\n# #\n# Because this metric compares predictions versus a ground truth, #\n# it doesn't make sense to generate the plot when a predict #\n# workflow is being run (because in that case, we generally don't #\n# know the ground truth for the values being predicted). Hence, #\n# this unit does nothing if the workflow is in \"predict\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_predictions = context.load(\"train_predictions\")\n test_target = context.load(\"test_target\")\n test_predictions = context.load(\"test_predictions\")\n\n # Un-transform the data\n target_scaler = context.load(\"target_scaler\")\n train_target = target_scaler.inverse_transform(train_target)\n train_predictions = target_scaler.inverse_transform(train_predictions)\n test_target = target_scaler.inverse_transform(test_target)\n test_predictions = target_scaler.inverse_transform(test_predictions)\n\n # Plot the data\n plt.scatter(train_target, train_predictions, c=\"#203d78\", label=\"Training Set\")\n if settings.is_using_train_test_split:\n plt.scatter(test_target, test_predictions, c=\"#67ac5b\", label=\"Testing Set\")\n plt.xlabel(\"Actual Value\")\n plt.ylabel(\"Predicted Value\")\n\n # Scale the plot\n target_range = (min(min(train_target), min(test_target)),\n max(max(train_target), max(test_target)))\n predictions_range = (min(min(train_predictions), min(test_predictions)),\n max(max(train_predictions), max(test_predictions)))\n\n limits = (min(min(target_range), min(target_range)),\n max(max(predictions_range), max(predictions_range)))\n plt.xlim = (limits[0], limits[1])\n plt.ylim = (limits[0], limits[1])\n\n # Draw a parity line, as a guide to the eye\n plt.plot((limits[0], limits[1]), (limits[0], limits[1]), c=\"black\", linestyle=\"dotted\", label=\"Parity\")\n plt.legend()\n\n # Save the figure\n plt.tight_layout()\n plt.savefig(\"my_parity_plot.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_parity_plot_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Cluster Visualization #\n# #\n# This unit takes an N-dimensional feature space, and uses #\n# Principal-component Analysis (PCA) to project into a 2D space #\n# to facilitate plotting on a scatter plot. #\n# #\n# The 2D space we project into are the first two principal #\n# components identified in PCA, which are the two vectors with #\n# the highest variance. #\n# #\n# Wikipedia Article on PCA: #\n# https://en.wikipedia.org/wiki/Principal_component_analysis #\n# #\n# We then plot the labels assigned to the train an test set, #\n# and color by class. #\n# #\n# ----------------------------------------------------------------- #\n\nimport pandas as pd\nimport matplotlib.cm\nimport matplotlib.lines\nimport matplotlib.pyplot as plt\nimport sklearn.decomposition\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_labels = context.load(\"train_labels\")\n train_descriptors = context.load(\"train_descriptors\")\n test_labels = context.load(\"test_labels\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Unscale the descriptors\n descriptor_scaler = context.load(\"descriptor_scaler\")\n train_descriptors = descriptor_scaler.inverse_transform(train_descriptors)\n test_descriptors = descriptor_scaler.inverse_transform(test_descriptors)\n\n # We need at least 2 dimensions, exit if the dataset is 1D\n if train_descriptors.ndim < 2:\n raise ValueError(\"The train descriptors do not have enough dimensions to be plot in 2D\")\n\n # The data could be multidimensional. Let's do some PCA to get things into 2 dimensions.\n pca = sklearn.decomposition.PCA(n_components=2)\n train_descriptors = pca.fit_transform(train_descriptors)\n test_descriptors = pca.transform(test_descriptors)\n xlabel = \"Principle Component 1\"\n ylabel = \"Principle Component 2\"\n\n # Determine the labels we're going to be using, and generate their colors\n labels = set(train_labels)\n colors = {}\n for count, label in enumerate(labels):\n cm = matplotlib.cm.get_cmap('jet', len(labels))\n color = cm(count / len(labels))\n colors[label] = color\n train_colors = [colors[label] for label in train_labels]\n test_colors = [colors[label] for label in test_labels]\n\n # Train / Test Split Visualization\n plt.title(\"Train Test Split Visualization\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=\"#33548c\", marker=\"o\", label=\"Training Set\")\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=\"#F0B332\", marker=\"o\", label=\"Testing Set\")\n xmin, xmax, ymin, ymax = plt.axis()\n plt.legend()\n plt.tight_layout()\n plt.savefig(\"train_test_split.png\", dpi=600)\n plt.close()\n\n def clusters_legend(cluster_colors):\n \"\"\"\n Helper function that creates a legend, given the coloration by clusters.\n Args:\n cluster_colors: A dictionary of the form {cluster_number : color_value}\n\n Returns:\n None; just creates the legend and puts it on the plot\n \"\"\"\n legend_symbols = []\n for group, color in cluster_colors.items():\n label = f\"Cluster {group}\"\n legend_symbols.append(matplotlib.lines.Line2D([], [], color=color, marker=\"o\",\n linewidth=0, label=label))\n plt.legend(handles=legend_symbols)\n\n # Training Set Clusters\n plt.title(\"Training Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=train_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"train_clusters.png\", dpi=600)\n plt.close()\n\n # Testing Set Clusters\n plt.title(\"Testing Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=test_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"test_clusters.png\", dpi=600)\n plt.close()\n\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_pca_2d_clusters_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# ROC Curve Generator #\n# #\n# Computes and displays the Receiver Operating Characteristic #\n# (ROC) curve. This is restricted to binary classification tasks. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\nimport matplotlib.collections\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n test_target = context.load(\"test_target\").flatten()\n # Slice the first column because Sklearn's ROC curve prefers probabilities for the positive class\n test_probabilities = context.load(\"test_probabilities\")[:, 1]\n\n # Exit if there's more than one label in the predictions\n if len(set(test_target)) > 2:\n exit()\n\n # ROC curve function in sklearn prefers the positive class\n false_positive_rate, true_positive_rate, thresholds = sklearn.metrics.roc_curve(test_target, test_probabilities,\n pos_label=1)\n thresholds[0] -= 1 # Sklearn arbitrarily adds 1 to the first threshold\n roc_auc = np.round(sklearn.metrics.auc(false_positive_rate, true_positive_rate), 3)\n\n # Plot the curve\n fig, ax = plt.subplots()\n points = np.array([false_positive_rate, true_positive_rate]).T.reshape(-1, 1, 2)\n segments = np.concatenate([points[:-1], points[1:]], axis=1)\n norm = plt.Normalize(thresholds.min(), thresholds.max())\n lc = matplotlib.collections.LineCollection(segments, cmap='jet', norm=norm, linewidths=2)\n lc.set_array(thresholds)\n line = ax.add_collection(lc)\n fig.colorbar(line, ax=ax).set_label('Threshold')\n\n # Padding to ensure we see the line\n ax.margins(0.01)\n\n plt.title(f\"ROC curve, AUC={roc_auc}\")\n plt.xlabel(\"False Positive Rate\")\n plt.ylabel(\"True Positive Rate\")\n plt.tight_layout()\n plt.savefig(\"my_roc_curve.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_roc_curve_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"#!/bin/bash\n# ---------------------------------------------------------------- #\n# #\n# Example shell script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. shebang line is read from the first line above #\n# 2. based on shebang one of the shell types is selected: #\n# - /bin/bash #\n# - /bin/csh #\n# - /bin/tclsh #\n# - /bin/tcsh #\n# - /bin/zsh #\n# 3. runtime directory for this calculation is created #\n# 4. the content of the script is executed #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\necho \"Hello world!\"\n","name":"hello_world.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ---------------------------------------------------------------- #\n# #\n# Example job submission script for Exabyte.io platform #\n# #\n# Shows resource manager directives for: #\n# #\n# 1. the name of the job (-N) #\n# 2. the number of nodes to be used (-l nodes=) #\n# 3. the number of processors per node (-l ppn=) #\n# 4. the walltime in dd:hh:mm:ss format (-l walltime=) #\n# 5. queue (-q) D, OR, OF, SR, SF #\n# 6. merging standard output and error (-j oe) #\n# 7. email about job abort, begin, end (-m abe) #\n# 8. email address to use (-M) #\n# #\n# For more information visit https://docs.exabyte.io/cli/jobs #\n# ---------------------------------------------------------------- #\n\n#PBS -N ESPRESSO-TEST\n#PBS -j oe\n#PBS -l nodes=1\n#PBS -l ppn=1\n#PBS -l walltime=00:00:10:00\n#PBS -q D\n#PBS -m abe\n#PBS -M info@exabyte.io\n\n# load module\nmodule add espresso/540-i-174-impi-044\n\n# go to the job working directory\ncd $PBS_O_WORKDIR\n\n# create input file\ncat > pw.in < pw.out\n","name":"job_espresso_pw_scf.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\nmkdir -p {{ JOB_SCRATCH_DIR }}/outdir/_ph0\ncd {{ JOB_SCRATCH_DIR }}/outdir\ncp -r {{ JOB_WORK_DIR }}/../outdir/__prefix__.* .\n{%- endraw -%}\n","name":"espresso_link_outdir_save.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\ncp {{ JOB_SCRATCH_DIR }}/outdir/_ph0/__prefix__.phsave/dynmat* {{ JOB_WORK_DIR }}/../outdir/_ph0/__prefix__.phsave\n{%- endraw -%}\n","name":"espresso_collect_dynmat.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ------------------------------------------------------------------ #\n# This script prepares necessary directories to run VASP NEB\n# calculation. It puts initial POSCAR into directory 00, final into 0N\n# and intermediate images in 01 to 0(N-1). It is assumed that SCF\n# calculations for initial and final structures are already done in\n# previous subworkflows and their standard outputs are written into\n# \"vasp_neb_initial.out\" and \"vasp_neb_final.out\" files respectively.\n# These outputs are here copied into initial (00) and final (0N)\n# directories to calculate the reaction energy profile.\n# ------------------------------------------------------------------ #\n\n{% raw -%}\ncd {{ JOB_WORK_DIR }}\n{%- endraw %}\n\n# Prepare First Directory\nmkdir -p 00\ncat > 00/POSCAR < 0{{ input.INTERMEDIATE_IMAGES.length + 1 }}/POSCAR < 0{{ loop.index }}/POSCAR <=6.2.0\nexabyte-api-client>=2020.10.19\nnumpy>=1.17.3\npandas>=1.1.4\nurllib3<2\n","name":"requirements.txt","contextProviders":[],"applicationName":"jupyterLab","executableName":"jupyter"},{"content":" start nwchem\n title \"Test\"\n charge {{ input.CHARGE }}\n geometry units au noautosym\n {{ input.ATOMIC_POSITIONS }}\n end\n basis\n * library {{ input.BASIS }}\n end\n dft\n xc {{ input.FUNCTIONAL }}\n mult {{ input.MULT }}\n end\n task dft energy\n","name":"nwchem_total_energy.inp","contextProviders":[{"name":"NWChemInputDataManager"}],"applicationName":"nwchem","executableName":"nwchem"},{"content":"# ---------------------------------------------------------------- #\n# #\n# Example python script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. runtime directory for this calculation is created #\n# 2. requirements.txt is used to create a virtual environment #\n# 3. virtual environment is activated #\n# 4. python process running this script is started #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\nimport pymatgen as mg\n\nsi = mg.Element(\"Si\")\n\nprint(si.atomic_mass)\n","name":"hello_world.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2\n","name":"requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------------ #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# Please add any packages required for this unit below following #\n# the requirements.txt specification: #\n# https://pip.pypa.io/en/stable/reference/requirements-file-format/ #\n# ------------------------------------------------------------------ #\n","name":"requirements_empty.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# -------------------------------------------------------------------------------\n# This script contains a few helpful commands for basic plotting with matplotlib.\n# The commented out blocks are optional suggestions and included for convenience.\n# -------------------------------------------------------------------------------\nimport matplotlib.pyplot as plt\nimport matplotlib.ticker as ticker\nimport numpy as np\n\n\n# Plot Settings\n# -------------\nfigure_size = (6.4, 4.8) # width, height [inches]\ndpi = 100 # resolution [dots-per-inch]\nfont_size_title = 16 # font size of title\nfont_size_axis = 12 # font size of axis label\nfont_size_tick = 12 # font size of tick label\nfont_size_legend = 14 # font size of legend\nx_axis_label = None # label for x-axis\ny_axis_label = None # label for y-axis\ntitle = None # figure title\nshow_legend = False # whether to show legend\nsave_name = \"plot.pdf\" # output filename (with suffix), e.g. 'plot.pdf'\nx_view_limits = {\"left\": None, \"right\": None} # view limits for x-axis\ny_view_limits = {\"top\": None, \"bottom\": None} # view limits for y-axis\nx_tick_spacing = None # custom tick spacing for x-axis (optional)\ny_tick_spacing = None # custom tick spacing for y-axis (optional)\nx_tick_labels = None # custom tick labels for x-axis (optional)\ny_tick_labels = None # custom tick labels for y-axis (optional)\n\n\n# Figure & axes objects\n# ---------------------\nfig = plt.figure(figsize=figure_size, dpi=dpi)\nax = fig.add_subplot(111)\n\n# Example plot (REPLACE ACCORDINGLY)\n# ------------\nx = np.linspace(0, 7, num=100)\ny = np.sin(x)\nax.plot(x, y, \"g-\", zorder=3)\n\n\n# Help lines\n# ----------\n# ax.axhline(y=0, color=\"0.25\", linewidth=0.6, zorder=1)\n# ax.axvline(x=0, color=\"0.25\", linewidth=0.6, zorder=1)\n\n\n# View limits\n# -----------\nax.set_xlim(**x_view_limits)\nax.set_ylim(**y_view_limits)\n\n\n# Grid lines\n# ----------\n# grid_style = {\n# \"linestyle\" : \"dotted\",\n# \"linewidth\" : 0.6,\n# \"color\" : \"0.25\",\n# }\n# ax.grid(**grid_style)\n\n# Custom tick spacing\n# -------------------\n# ax.xaxis.set_major_locator(ticker.MultipleLocator(x_tick_spacing))\n# ax.yaxis.set_major_locator(ticker.MultipleLocator(y_tick_spacing))\n\n# Custom tick labels\n# ------------------\nif x_tick_labels is not None:\n ax.set_xticklabels(x_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\nif y_tick_labels is not None:\n ax.set_yticklabels(y_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\n\n# Other tick settings\n# -------------------\n# ax.tick_params(axis=\"both\", which=\"major\", labelsize=font_size_tick, direction=\"in\")\n# ax.tick_params(axis=\"x\", which=\"major\", pad=10)\n# ax.tick_params(axis=\"x\", which=\"minor\", bottom=False, top=False)\n\n\n# Axis labels\n# -----------\nif x_axis_label is not None:\n ax.set_xlabel(x_axis_label, size=font_size_axis)\nif y_axis_label is not None:\n ax.set_ylabel(y_axis_label, size=font_size_axis)\n\n# Figure title\n# ------------\nif title is not None:\n ax.set_title(title, fontsize=font_size_title)\n\n# Legend\n# ------\nif show_legend:\n ax.legend(prop={'size': font_size_legend})\n\n# Save figure\n# -----------\nif save_name is not None:\n save_format = save_name.split(\".\")[-1]\n fig.savefig(save_name, format=save_format, bbox_inches=\"tight\")\n","name":"matplotlib_basic.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ---------------------------------------------------------- #\n# #\n# This script extracts q-points and irreducible #\n# representations from Quantum ESPRESSO xml data. #\n# #\n# Expects control_ph.xml and patterns.?.xml files to exist #\n# #\n# ---------------------------------------------------------- #\nfrom __future__ import print_function\nimport json\nfrom xml.dom import minidom\n\n{# JOB_WORK_DIR will be initialized at runtime => avoid substituion below #}\n{%- raw -%}\nCONTROL_PH_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/control_ph.xml\"\nPATTERNS_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/patterns.{}.xml\"\n{%- endraw -%}\n\n# get integer content of an xml tag in a document\ndef get_int_by_tag_name(doc, tag_name):\n element = doc.getElementsByTagName(tag_name)\n return int(element[0].firstChild.nodeValue)\n\nvalues = []\n\n# get number of q-points and cycle through them\nxmldoc = minidom.parse(CONTROL_PH_FILENAME)\nnumber_of_qpoints = get_int_by_tag_name(xmldoc, \"NUMBER_OF_Q_POINTS\")\n\nfor i in range(number_of_qpoints):\n # get number of irreducible representations per qpoint\n xmldoc = minidom.parse(PATTERNS_FILENAME.format(i+1))\n number_of_irr_per_qpoint = get_int_by_tag_name(xmldoc, \"NUMBER_IRR_REP\")\n # add each distinct combination of qpoint and irr as a separate entry\n for j in range(number_of_irr_per_qpoint):\n values.append({\n \"qpoint\": i + 1,\n \"irr\": j + 1\n })\n\n# store final values in standard output (STDOUT)\nprint(json.dumps(values, indent=4))\n","name":"espresso_xml_get_qpt_irr.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"import re\nimport json\n\ndouble_regex = r'[-+]?\\d*\\.\\d+(?:[eE][-+]?\\d+)?'\nregex = r\"\\s+k\\(\\s+\\d*\\)\\s+=\\s+\\(\\s+({0})\\s+({0})\\s+({0})\\),\\s+wk\\s+=\\s+({0}).+?\\n\".format(double_regex)\n\nwith open(\"pw_scf.out\") as f:\n text = f.read()\n\npattern = re.compile(regex, re.I | re.MULTILINE)\nmatch = pattern.findall(text[text.rfind(\" cryst. coord.\"):])\nkpoints = [{\"coordinates\": list(map(float, m[:3])), \"weight\": float(m[3])} for m in match]\nprint(json.dumps({\"name\": \"KPOINTS\", \"value\": kpoints, \"scope\": \"global\"}, indent=4))\n","name":"espresso_extract_kpoints.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------- #\n# This script aims to determine extrema for a given array. #\n# Please adjust the parameters according to your data. #\n# Note: This template expects the array to be defined in the #\n# context as 'array_from_context' (see details below). #\n# ----------------------------------------------------------- #\nimport numpy as np\nfrom scipy.signal import find_peaks\nimport json\nfrom munch import Munch\n\n# Data From Context\n# -----------------\n# The array 'array_from_context' is a 1D list (float or int) that has to be defined in\n# a preceding assignment unit in order to be extracted from the context.\n# Example: [0.0, 1.0, 4.0, 3.0]\n# Upon rendering the following Jinja template the extracted array will be inserted.\n{% raw %}Y = np.array({{array_from_context}}){% endraw %}\n\n# Settings\n# --------\nprominence = 0.3 # required prominence in the unit of the data array\n\n# Find Extrema\n# ------------\nmax_indices, _ = find_peaks(Y, prominence=prominence)\nmin_indices, _ = find_peaks(-1 * Y, prominence=prominence)\n\nresult = {\n \"maxima\": Y[max_indices].tolist(),\n \"minima\": Y[min_indices].tolist(),\n}\n\n# print final values to standard output (STDOUT),\n# so that they can be read by a subsequent assignment unit (using value=STDOUT)\nprint(json.dumps(result, indent=4))\n","name":"find_extrema.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# ----------------------------------------------------------------- #\n\n\nmunch==2.5.0\nnumpy>=1.19.5\nscipy>=1.5.4\nmatplotlib>=3.0.0\n","name":"processing_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# PythonML Package Requirements for use on the Exabyte.io Platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscikit-learn==0.24.1\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2;python_version>=\"3.6\"\n","name":"pyml_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# General settings for PythonML jobs on the Exabyte.io Platform #\n# #\n# This file generally shouldn't be modified directly by users. #\n# The \"datafile\" and \"is_workflow_running_to_predict\" variables #\n# are defined in the head subworkflow, and are templated into #\n# this file. This helps facilitate the workflow's behavior #\n# differing whether it is in a \"train\" or \"predict\" mode. #\n# #\n# Also in this file is the \"Context\" object, which helps maintain #\n# certain Python objects between workflow units, and between #\n# predict runs. #\n# #\n# Whenever a python object needs to be stored for subsequent runs #\n# (such as in the case of a trained model), context.save() can be #\n# called to save it. The object can then be loaded again by using #\n# context.load(). #\n# ----------------------------------------------------------------- #\n\n\nimport pickle, os\n\n# ==================================================\n# Variables modified in the Important Settings menu\n# ==================================================\n# Variables in this section can (and oftentimes need to) be modified by the user in the \"Important Settings\" tab\n# of a workflow.\n\n# Target_column_name is used during training to identify the variable the model is traing to predict.\n# For example, consider a CSV containing three columns, \"Y\", \"X1\", and \"X2\". If the goal is to train a model\n# that will predict the value of \"Y,\" then target_column_name would be set to \"Y\"\ntarget_column_name = \"{{ mlSettings.target_column_name }}\"\n\n# The type of ML problem being performed. Can be either \"regression\", \"classification,\" or \"clustering.\"\nproblem_category = \"{{ mlSettings.problem_category }}\"\n\n# =============================\n# Non user-modifiable variables\n# =============================\n# Variables in this section generally do not need to be modified.\n\n# The problem category, regression or classification or clustering. In regression, the target (predicted) variable\n# is continues. In classification, it is categorical. In clustering, there is no target - a set of labels is\n# automatically generated.\nis_regression = is_classification = is_clustering = False\nif problem_category.lower() == \"regression\":\n is_regression = True\nelif problem_category.lower() == \"classification\":\n is_classification = True\nelif problem_category.lower() == \"clustering\":\n is_clustering = True\nelse:\n raise ValueError(\n \"Variable 'problem_category' must be either 'regression', 'classification', or 'clustering'. Check settings.py\")\n\n# The variables \"is_workflow_running_to_predict\" and \"is_workflow_running_to_train\" are used to control whether\n# the workflow is in a \"training\" mode or a \"prediction\" mode. The \"IS_WORKFLOW_RUNNING_TO_PREDICT\" variable is set by\n# an assignment unit in the \"Set Up the Job\" subworkflow that executes at the start of the job. It is automatically\n# changed when the predict workflow is generated, so users should not need to modify this variable.\nis_workflow_running_to_predict = {% raw %}{{IS_WORKFLOW_RUNNING_TO_PREDICT}}{% endraw %}\nis_workflow_running_to_train = not is_workflow_running_to_predict\n\n# Sets the datafile variable. The \"datafile\" is the data that will be read in, and will be used by subsequent\n# workflow units for either training or prediction, depending on the workflow mode.\nif is_workflow_running_to_predict:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\nelse:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\n\n# The \"Context\" class allows for data to be saved and loaded between units, and between train and predict runs.\n# Variables which have been saved using the \"Save\" method are written to disk, and the predict workflow is automatically\n# configured to obtain these files when it starts.\n#\n# IMPORTANT NOTE: Do *not* adjust the value of \"context_dir_pathname\" in the Context object. If the value is changed, then\n# files will not be correctly copied into the generated predict workflow. This will cause the predict workflow to be\n# generated in a broken state, and it will not be able to make any predictions.\nclass Context(object):\n \"\"\"\n Saves and loads objects from the disk, useful for preserving data between workflow units\n\n Attributes:\n context_paths (dict): Dictionary of the format {variable_name: path}, that governs where\n pickle saves files.\n\n Methods:\n save: Used to save objects to the context directory\n load: Used to load objects from the context directory\n \"\"\"\n\n def __init__(self, context_file_basename=\"workflow_context_file_mapping\"):\n \"\"\"\n Constructor for Context objects\n\n Args:\n context_file_basename (str): Name of the file to store context paths in\n \"\"\"\n\n # Warning: DO NOT modify the context_dir_pathname variable below\n # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n context_dir_pathname = \"{% raw %}{{ CONTEXT_DIR_RELATIVE_PATH }}{% endraw %}\"\n # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n self._context_dir_pathname = context_dir_pathname\n self._context_file = os.path.join(context_dir_pathname, context_file_basename)\n\n # Make context dir if it does not exist\n if not os.path.exists(context_dir_pathname):\n os.makedirs(context_dir_pathname)\n\n # Read in the context sources dictionary, if it exists\n if os.path.exists(self._context_file):\n with open(self._context_file, \"rb\") as file_handle:\n self.context_paths: dict = pickle.load(file_handle)\n else:\n # Items is a dictionary of {varname: path}\n self.context_paths = {}\n\n def __enter__(self):\n return self\n\n def __exit__(self, exc_type, exc_value, traceback):\n self._update_context()\n\n def __contains__(self, item):\n return item in self.context_paths\n\n def _update_context(self):\n with open(self._context_file, \"wb\") as file_handle:\n pickle.dump(self.context_paths, file_handle)\n\n def load(self, name: str):\n \"\"\"\n Returns a contextd object\n\n Args:\n name (str): The name in self.context_paths of the object\n \"\"\"\n path = self.context_paths[name]\n with open(path, \"rb\") as file_handle:\n obj = pickle.load(file_handle)\n return obj\n\n def save(self, obj: object, name: str):\n \"\"\"\n Saves an object to disk using pickle\n\n Args:\n name (str): Friendly name for the object, used for lookup in load() method\n obj (object): Object to store on disk\n \"\"\"\n path = os.path.join(self._context_dir_pathname, f\"{name}.pkl\")\n self.context_paths[name] = path\n with open(path, \"wb\") as file_handle:\n pickle.dump(obj, file_handle)\n self._update_context()\n\n# Generate a context object, so that the \"with settings.context\" can be used by other units in this workflow.\ncontext = Context()\n\nis_using_train_test_split = \"is_using_train_test_split\" in context and (context.load(\"is_using_train_test_split\"))\n\n# Create a Class for a DummyScaler()\nclass DummyScaler:\n \"\"\"\n This class is a 'DummyScaler' which trivially acts on data by returning it unchanged.\n \"\"\"\n\n def fit(self, X):\n return self\n\n def transform(self, X):\n return X\n\n def fit_transform(self, X):\n return X\n\n def inverse_transform(self, X):\n return X\n\nif 'target_scaler' not in context:\n context.save(DummyScaler(), 'target_scaler')\n","name":"pyml_settings.py","contextProviders":[{"name":"MLSettingsDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Custom workflow unit template for the Exabyte.io platform #\n# #\n# This file imports a set of workflow-specific context variables #\n# from settings.py. It then uses a context manager to save and #\n# load Python objects. When saved, these objects can then be #\n# loaded either later in the same workflow, or by subsequent #\n# predict jobs. #\n# #\n# Any pickle-able Python object can be saved using #\n# settings.context. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport settings\n\n# The context manager exists to facilitate\n# saving and loading objects across Python units within a workflow.\n\n# To load an object, simply do to \\`context.load(\"name-of-the-saved-object\")\\`\n# To save an object, simply do \\`context.save(\"name-for-the-object\", object_here)\\`\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Do some transformations to the data here\n\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n descriptors = context.load(\"descriptors\")\n\n # Do some predictions or transformation to the data here\n","name":"pyml_custom.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to read in data for the ML workflow. #\n# #\n# Also showcased here is the concept of branching based on #\n# whether the workflow is in \"train\" or \"predict\" mode. #\n# #\n# If the workflow is in \"training\" mode, it will read in the data #\n# before converting it to a Numpy array and save it for use #\n# later. During training, we already have values for the output, #\n# and this gets saved to \"target.\" #\n# #\n# Finally, whether the workflow is in training or predict mode, #\n# it will always read in a set of descriptors from a datafile #\n# defined in settings.py #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport sklearn.preprocessing\nimport settings\n\nwith settings.context as context:\n data = pandas.read_csv(settings.datafile)\n\n # Train\n # By default, we don't do train/test splitting: the train and test represent the same dataset at first.\n # Other units (such as a train/test splitter) down the line can adjust this as-needed.\n if settings.is_workflow_running_to_train:\n\n # Handle the case where we are clustering\n if settings.is_clustering:\n target = data.to_numpy()[:, 0] # Just get the first column, it's not going to get used anyway\n else:\n target = data.pop(settings.target_column_name).to_numpy()\n\n # Handle the case where we are classifying. In this case, we must convert any labels provided to be categorical.\n # Specifically, labels are encoded with values between 0 and (N_Classes - 1)\n if settings.is_classification:\n label_encoder = sklearn.preprocessing.LabelEncoder()\n target = label_encoder.fit_transform(target)\n context.save(label_encoder, \"label_encoder\")\n\n target = target.reshape(-1, 1) # Reshape array from a row vector into a column vector\n\n context.save(target, \"train_target\")\n context.save(target, \"test_target\")\n\n descriptors = data.to_numpy()\n\n context.save(descriptors, \"train_descriptors\")\n context.save(descriptors, \"test_descriptors\")\n\n else:\n descriptors = data.to_numpy()\n context.save(descriptors, \"descriptors\")\n","name":"data_input_read_csv_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to perform a train/test split #\n# #\n# Splits the dataset into a training and testing set. The #\n# variable `percent_held_as_test` controls how much of the #\n# input dataset is removed for use as a testing set. By default, #\n# this unit puts 20% of the dataset into the testing set, and #\n# places the remaining 80% into the training set. #\n# #\n# Does nothing in the case of predictions. #\n# #\n# ----------------------------------------------------------------- #\n\nimport sklearn.model_selection\nimport numpy as np\nimport settings\n\n# `percent_held_as_test` is the amount of the dataset held out as the testing set. If it is set to 0.2,\n# then 20% of the dataset is held out as a testing set. The remaining 80% is the training set.\npercent_held_as_test = {{ mlTrainTestSplit.fraction_held_as_test_set }}\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Load training data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n\n # Combine datasets to facilitate train/test split\n\n # Do train/test split\n train_descriptors, test_descriptors, train_target, test_target = sklearn.model_selection.train_test_split(\n train_descriptors, train_target, test_size=percent_held_as_test)\n\n # Set the flag for using a train/test split\n context.save(True, \"is_using_train_test_split\")\n\n # Save training data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"data_input_train_test_split_sklearn.py","contextProviders":[{"name":"MLTrainTestSplitDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn MinMax Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it is on interval #\n# [0,1]. It then saves the data for use further down #\n# the road in the workflow, for use in un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the min and max of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor MinMax Scaler\n scaler = sklearn.preprocessing.MinMaxScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_min_max_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Duplicates workflow unit #\n# #\n# This workflow unit drops all duplicate rows, if it is running #\n# in the \"train\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop duplicates from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n df = df.drop_duplicates()\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop duplicates from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.drop_duplicates()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_duplicates_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Missing Workflow Unit #\n# #\n# This workflow unit allows missing rows and/or columns to be #\n# dropped from the dataset by configuring the `to_drop` #\n# parameter. #\n# #\n# Valid values for `to_drop`: #\n# - \"rows\": rows with missing values will be removed #\n# - \"columns\": columns with missing values will be removed #\n# - \"both\": rows and columns with missing values will be removed #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\n# `to_drop` can either be \"rows\" or \"columns\"\n# If it is set to \"rows\" (by default), then all rows with missing values will be dropped.\n# If it is set to \"columns\", then all columns with missing values will be dropped.\n# If it is set to \"both\", then all rows and columns with missing values will be dropped.\nto_drop = \"rows\"\n\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop missing from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n\n directions = {\n \"rows\": (\"index\",),\n \"columns\": (\"columns\",),\n \"both\": (\"index\", \"columns\"),\n }[to_drop]\n for direction in directions:\n df = df.dropna(direction)\n\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop missing from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.dropna()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_missing_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn Standard Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it a mean of 0 and #\n# a standard deviation of 1. It then saves the data for use #\n# further down the road in the workflow, for use in #\n# un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the mean and variance of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor Scaler\n scaler = sklearn.preprocessing.StandardScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_standardization_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge-regression model in Scikit-Learn. #\n# Alpha is taken from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.AdaBoostRegressor(\n n_estimators=50,\n learning_rate=1,\n loss=\"linear\",\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"adaboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"adaboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_adaboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a bagged trees regression model with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.BaggingRegressor(\n n_estimators=10,\n max_samples=1.0,\n max_features=1.0,\n bootstrap=True,\n bootstrap_features=False,\n oob_score=False,\n verbose=0,\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"bagged_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"bagged_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_bagged_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for gradient-boosted tree regression with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. Note: In the gradient- #\n# boosted trees ensemble used, the weak learners used as #\n# estimators cannot be tuned with the same level of fidelity #\n# allowed in the adaptive-boosted trees ensemble. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.GradientBoostingRegressor(\n loss=\"ls\",\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion=\"friedman_mse\",\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n max_features=None,\n alpha=0.9,\n verbose=0,\n max_leaf_nodes=None,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees regression #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBRegressor(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='reg:squarederror',\n eval_metric='rmse')\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_regression\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_regression\")\n\n # Make some predictions and unscale\n predictions = model.predict(descriptors)\n predictions = predictions.reshape(-1, 1)\n target_scaler = context.load(\"target_scaler\")\n\n predictions = target_scaler.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\")\n","name":"model_extreme_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for k-means clustering. #\n# #\n# In k-means clustering, the labels are not provided ahead of #\n# time. Instead, one supplies the number of groups the #\n# algorithm should split the dataset into. Here, we set our #\n# own default of 4 groups (fewer than sklearn's default of 8). #\n# Otherwise, the default parameters of the clustering method #\n# are the same as in sklearn. #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.cluster\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Initialize the Model\n model = sklearn.cluster.KMeans(\n n_clusters=4,\n init=\"k-means++\",\n n_init=10,\n max_iter=300,\n tol=0.0001,\n copy_x=True,\n algorithm=\"auto\",\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors)\n context.save(model, \"k_means\")\n train_labels = model.predict(train_descriptors)\n test_labels = model.predict(test_descriptors)\n\n context.save(train_labels, \"train_labels\")\n context.save(test_labels, \"test_labels\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"k_means\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_k_means_clustering_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a kernelized ridge-regression model with #\n# Scikit-Learn. Model parameters are derived from Scikit- #\n# Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.kernel_ridge\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.kernel_ridge.KernelRidge(\n alpha=1.0,\n kernel=\"linear\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"kernel_ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"kernel_ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_kernel_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a LASSO-regression model with Scikit- #\n# Learn. Model parameters derived from Scikit-Learn's #\n# Defaults. Alpha has been lowered from the default of 1.0, to #\n# 0.1. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Lasso(\n alpha=0.1,\n fit_intercept=True,\n normalize=False,\n precompute=False,\n tol=0.0001,\n positive=True,\n selection=\"cyclic\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"LASSO\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"LASSO\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_lasso_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit to train a simple feedforward neural network #\n# model on a regression problem using scikit-learn. In this #\n# template, we use the default values for hidden_layer_sizes, #\n# activation, solver, and learning rate. Other parameters are #\n# available (consult the sklearn docs), but in this case, we #\n# only include those relevant to the Adam optimizer. Sklearn #\n# Docs: Sklearn docs:http://scikit-learn.org/stable/modules/ge #\n# nerated/sklearn.neural_network.MLPRegressor.html #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.neural_network\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.neural_network.MLPRegressor(\n hidden_layer_sizes=(100,),\n activation=\"relu\",\n solver=\"adam\",\n max_iter=300,\n early_stopping=False,\n validation_fraction=0.1,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"multilayer_perceptron\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"multilayer_perceptron\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_mlp_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a random forest classification model with #\n# Scikit-Learn. Parameters derived from Scikit-Learn's #\n# defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. When #\n# the workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a filee named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestClassifier(\n n_estimators=100,\n criterion=\"gini\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n oob_score=False,\n verbose=0,\n class_weight=None,\n ccp_alpha=0.0,\n max_samples=None,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target, test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for a gradient boosted classification model with #\n# Scikit-Learn. Parameters derived from sklearn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = sklearn.ensemble.GradientBoostingClassifier(loss='deviance',\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion='friedman_mse',\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n min_impurity_split=None,\n init=None,\n random_state=None,\n max_features=None,\n verbose=0,\n max_leaf_nodes=None,\n warm_start=False,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees classification #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBClassifier(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='binary:logistic',\n eval_metric='logloss',\n use_label_encoder=False)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_extreme_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow for a random forest regression model with Scikit- #\n# Learn. Parameters are derived from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestRegressor(\n n_estimators=100,\n criterion=\"mse\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n max_samples=None,\n oob_score=False,\n ccp_alpha=0.0,\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge regression model with Scikit- #\n# Learn. Alpha is taken from Scikit-Learn's default #\n# parameters. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Ridge(\n alpha=1.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Parity plot generation unit #\n# #\n# This unit generates a parity plot based on the known values #\n# in the training data, and the predicted values generated #\n# using the training data. #\n# #\n# Because this metric compares predictions versus a ground truth, #\n# it doesn't make sense to generate the plot when a predict #\n# workflow is being run (because in that case, we generally don't #\n# know the ground truth for the values being predicted). Hence, #\n# this unit does nothing if the workflow is in \"predict\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_predictions = context.load(\"train_predictions\")\n test_target = context.load(\"test_target\")\n test_predictions = context.load(\"test_predictions\")\n\n # Un-transform the data\n target_scaler = context.load(\"target_scaler\")\n train_target = target_scaler.inverse_transform(train_target)\n train_predictions = target_scaler.inverse_transform(train_predictions)\n test_target = target_scaler.inverse_transform(test_target)\n test_predictions = target_scaler.inverse_transform(test_predictions)\n\n # Plot the data\n plt.scatter(train_target, train_predictions, c=\"#203d78\", label=\"Training Set\")\n if settings.is_using_train_test_split:\n plt.scatter(test_target, test_predictions, c=\"#67ac5b\", label=\"Testing Set\")\n plt.xlabel(\"Actual Value\")\n plt.ylabel(\"Predicted Value\")\n\n # Scale the plot\n target_range = (min(min(train_target), min(test_target)),\n max(max(train_target), max(test_target)))\n predictions_range = (min(min(train_predictions), min(test_predictions)),\n max(max(train_predictions), max(test_predictions)))\n\n limits = (min(min(target_range), min(target_range)),\n max(max(predictions_range), max(predictions_range)))\n plt.xlim = (limits[0], limits[1])\n plt.ylim = (limits[0], limits[1])\n\n # Draw a parity line, as a guide to the eye\n plt.plot((limits[0], limits[1]), (limits[0], limits[1]), c=\"black\", linestyle=\"dotted\", label=\"Parity\")\n plt.legend()\n\n # Save the figure\n plt.tight_layout()\n plt.savefig(\"my_parity_plot.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_parity_plot_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Cluster Visualization #\n# #\n# This unit takes an N-dimensional feature space, and uses #\n# Principal-component Analysis (PCA) to project into a 2D space #\n# to facilitate plotting on a scatter plot. #\n# #\n# The 2D space we project into are the first two principal #\n# components identified in PCA, which are the two vectors with #\n# the highest variance. #\n# #\n# Wikipedia Article on PCA: #\n# https://en.wikipedia.org/wiki/Principal_component_analysis #\n# #\n# We then plot the labels assigned to the train an test set, #\n# and color by class. #\n# #\n# ----------------------------------------------------------------- #\n\nimport pandas as pd\nimport matplotlib.cm\nimport matplotlib.lines\nimport matplotlib.pyplot as plt\nimport sklearn.decomposition\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_labels = context.load(\"train_labels\")\n train_descriptors = context.load(\"train_descriptors\")\n test_labels = context.load(\"test_labels\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Unscale the descriptors\n descriptor_scaler = context.load(\"descriptor_scaler\")\n train_descriptors = descriptor_scaler.inverse_transform(train_descriptors)\n test_descriptors = descriptor_scaler.inverse_transform(test_descriptors)\n\n # We need at least 2 dimensions, exit if the dataset is 1D\n if train_descriptors.ndim < 2:\n raise ValueError(\"The train descriptors do not have enough dimensions to be plot in 2D\")\n\n # The data could be multidimensional. Let's do some PCA to get things into 2 dimensions.\n pca = sklearn.decomposition.PCA(n_components=2)\n train_descriptors = pca.fit_transform(train_descriptors)\n test_descriptors = pca.transform(test_descriptors)\n xlabel = \"Principle Component 1\"\n ylabel = \"Principle Component 2\"\n\n # Determine the labels we're going to be using, and generate their colors\n labels = set(train_labels)\n colors = {}\n for count, label in enumerate(labels):\n cm = matplotlib.cm.get_cmap('jet', len(labels))\n color = cm(count / len(labels))\n colors[label] = color\n train_colors = [colors[label] for label in train_labels]\n test_colors = [colors[label] for label in test_labels]\n\n # Train / Test Split Visualization\n plt.title(\"Train Test Split Visualization\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=\"#33548c\", marker=\"o\", label=\"Training Set\")\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=\"#F0B332\", marker=\"o\", label=\"Testing Set\")\n xmin, xmax, ymin, ymax = plt.axis()\n plt.legend()\n plt.tight_layout()\n plt.savefig(\"train_test_split.png\", dpi=600)\n plt.close()\n\n def clusters_legend(cluster_colors):\n \"\"\"\n Helper function that creates a legend, given the coloration by clusters.\n Args:\n cluster_colors: A dictionary of the form {cluster_number : color_value}\n\n Returns:\n None; just creates the legend and puts it on the plot\n \"\"\"\n legend_symbols = []\n for group, color in cluster_colors.items():\n label = f\"Cluster {group}\"\n legend_symbols.append(matplotlib.lines.Line2D([], [], color=color, marker=\"o\",\n linewidth=0, label=label))\n plt.legend(handles=legend_symbols)\n\n # Training Set Clusters\n plt.title(\"Training Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=train_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"train_clusters.png\", dpi=600)\n plt.close()\n\n # Testing Set Clusters\n plt.title(\"Testing Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=test_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"test_clusters.png\", dpi=600)\n plt.close()\n\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_pca_2d_clusters_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# ROC Curve Generator #\n# #\n# Computes and displays the Receiver Operating Characteristic #\n# (ROC) curve. This is restricted to binary classification tasks. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\nimport matplotlib.collections\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n test_target = context.load(\"test_target\").flatten()\n # Slice the first column because Sklearn's ROC curve prefers probabilities for the positive class\n test_probabilities = context.load(\"test_probabilities\")[:, 1]\n\n # Exit if there's more than one label in the predictions\n if len(set(test_target)) > 2:\n exit()\n\n # ROC curve function in sklearn prefers the positive class\n false_positive_rate, true_positive_rate, thresholds = sklearn.metrics.roc_curve(test_target, test_probabilities,\n pos_label=1)\n thresholds[0] -= 1 # Sklearn arbitrarily adds 1 to the first threshold\n roc_auc = np.round(sklearn.metrics.auc(false_positive_rate, true_positive_rate), 3)\n\n # Plot the curve\n fig, ax = plt.subplots()\n points = np.array([false_positive_rate, true_positive_rate]).T.reshape(-1, 1, 2)\n segments = np.concatenate([points[:-1], points[1:]], axis=1)\n norm = plt.Normalize(thresholds.min(), thresholds.max())\n lc = matplotlib.collections.LineCollection(segments, cmap='jet', norm=norm, linewidths=2)\n lc.set_array(thresholds)\n line = ax.add_collection(lc)\n fig.colorbar(line, ax=ax).set_label('Threshold')\n\n # Padding to ensure we see the line\n ax.margins(0.01)\n\n plt.title(f\"ROC curve, AUC={roc_auc}\")\n plt.xlabel(\"False Positive Rate\")\n plt.ylabel(\"True Positive Rate\")\n plt.tight_layout()\n plt.savefig(\"my_roc_curve.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_roc_curve_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"#!/bin/bash\n# ---------------------------------------------------------------- #\n# #\n# Example shell script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. shebang line is read from the first line above #\n# 2. based on shebang one of the shell types is selected: #\n# - /bin/bash #\n# - /bin/csh #\n# - /bin/tclsh #\n# - /bin/tcsh #\n# - /bin/zsh #\n# 3. runtime directory for this calculation is created #\n# 4. the content of the script is executed #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\necho \"Hello world!\"\n","name":"hello_world.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ---------------------------------------------------------------- #\n# #\n# Example job submission script for Exabyte.io platform #\n# #\n# Shows resource manager directives for: #\n# #\n# 1. the name of the job (-N) #\n# 2. the number of nodes to be used (-l nodes=) #\n# 3. the number of processors per node (-l ppn=) #\n# 4. the walltime in dd:hh:mm:ss format (-l walltime=) #\n# 5. queue (-q) D, OR, OF, SR, SF #\n# 6. merging standard output and error (-j oe) #\n# 7. email about job abort, begin, end (-m abe) #\n# 8. email address to use (-M) #\n# #\n# For more information visit https://docs.exabyte.io/cli/jobs #\n# ---------------------------------------------------------------- #\n\n#PBS -N ESPRESSO-TEST\n#PBS -j oe\n#PBS -l nodes=1\n#PBS -l ppn=1\n#PBS -l walltime=00:00:10:00\n#PBS -q D\n#PBS -m abe\n#PBS -M info@exabyte.io\n\n# load module\nmodule add espresso/540-i-174-impi-044\n\n# go to the job working directory\ncd $PBS_O_WORKDIR\n\n# create input file\ncat > pw.in < pw.out\n","name":"job_espresso_pw_scf.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\nmkdir -p {{ JOB_SCRATCH_DIR }}/outdir/_ph0\ncd {{ JOB_SCRATCH_DIR }}/outdir\ncp -r {{ JOB_WORK_DIR }}/../outdir/__prefix__.* .\n{%- endraw -%}\n","name":"espresso_link_outdir_save.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\ncp {{ JOB_SCRATCH_DIR }}/outdir/_ph0/__prefix__.phsave/dynmat* {{ JOB_WORK_DIR }}/../outdir/_ph0/__prefix__.phsave\n{%- endraw -%}\n","name":"espresso_collect_dynmat.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ------------------------------------------------------------------ #\n# This script prepares necessary directories to run VASP NEB\n# calculation. It puts initial POSCAR into directory 00, final into 0N\n# and intermediate images in 01 to 0(N-1). It is assumed that SCF\n# calculations for initial and final structures are already done in\n# previous subworkflows and their standard outputs are written into\n# \"vasp_neb_initial.out\" and \"vasp_neb_final.out\" files respectively.\n# These outputs are here copied into initial (00) and final (0N)\n# directories to calculate the reaction energy profile.\n# ------------------------------------------------------------------ #\n\n{% raw -%}\ncd {{ JOB_WORK_DIR }}\n{%- endraw %}\n\n# Prepare First Directory\nmkdir -p 00\ncat > 00/POSCAR < 0{{ input.INTERMEDIATE_IMAGES.length + 1 }}/POSCAR < 0{{ loop.index }}/POSCAR < Date: Fri, 19 Jan 2024 00:11:32 +0800 Subject: [PATCH 14/20] SOF-7194: fix lmp executable name --- executables/deepmd/lmp.yml | 2 +- src/js/data/tree.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/executables/deepmd/lmp.yml b/executables/deepmd/lmp.yml index 260c3d20..c1f19948 100644 --- a/executables/deepmd/lmp.yml +++ b/executables/deepmd/lmp.yml @@ -10,4 +10,4 @@ flavors: monitors: - standard_output applicationName: deepmd - executableName: dp_lmp # mpirun -np $NP lmp -in in.lammps + executableName: lmp # mpirun -np $NP lmp -in in.lammps diff --git a/src/js/data/tree.js b/src/js/data/tree.js index 6d8f5eac..b7c18395 100644 --- a/src/js/data/tree.js +++ b/src/js/data/tree.js @@ -1,2 +1,2 @@ /* eslint-disable */ -module.exports = {applicationTree: {"deepmd":{"dp":{"isDefault":false,"monitors":["standard_output"],"results":[],"flavors":{"dp_train_se_e2_r":{"input":[{"name":"dp_train_se_e2_r.json"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"dp"}}},"lmp":{"isDefault":false,"monitors":["standard_output"],"results":[],"flavors":{"lammps":{"input":[{"name":"in.lammps"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"dp_lmp"}}},"python":{"isDefault":true,"monitors":["standard_output"],"results":[],"flavors":{"qe_cp_to_deepmd":{"isDefault":true,"input":[{"name":"qe_cp_to_deepmd.py"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"python"},"qe_to_lmp_structure":{"isDefault":true,"input":[{"name":"qe_to_lmp_structure.py"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"python"}}}},"espresso":{"average.x":{"monitors":["standard_output"],"results":["average_potential_profile"],"flavors":{"average":{"input":[{"name":"average.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"average.x"},"average_potential":{"input":[{"name":"average.in"}],"results":["average_potential_profile"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"average.x"}}},"bands.x":{"monitors":["standard_output"],"results":["band_structure"],"flavors":{"bands":{"input":[{"name":"bands.in"}],"results":["band_structure"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"bands.x"}}},"cp.x":{"monitors":["standard_output"],"results":[],"flavors":{"cp":{"input":[{"name":"cp.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"cp.x"},"cp_wf":{"input":[{"name":"cp_wf.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"cp.x"}}},"dos.x":{"monitors":["standard_output"],"results":["density_of_states"],"flavors":{"dos":{"input":[{"name":"dos.in"}],"results":["density_of_states"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"dos.x"}}},"dynmat.x":{"monitors":["standard_output"],"results":[],"flavors":{"dynmat":{"input":[{"name":"dynmat_grid.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"dynmat.x"}}},"epsilon.x":{"monitors":["standard_output"],"results":["dielectric_tensor"],"flavors":{"dielectric_tensor":{"input":[{"name":"epsilon.in"}],"results":["dielectric_tensor"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"epsilon.x"}}},"gw.x":{"monitors":["standard_output"],"results":["band_structure","fermi_energy","band_gaps"],"flavors":{"gw_bands_plasmon_pole":{"input":[{"name":"gw_bands_plasmon_pole.in"}],"results":["band_structure","fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"gw.x"},"gw_bands_full_frequency":{"input":[{"name":"gw_bands_full_frequency.in"}],"results":["band_structure","fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"gw.x"}}},"hp.x":{"monitors":["standard_output"],"results":["hubbard_u","hubbard_v_nn","hubbard_v"],"flavors":{"hp":{"input":[{"name":"hp.in"}],"results":["hubbard_u","hubbard_v_nn","hubbard_v"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"hp.x"}},"supportedApplicationVersions":["7.0","7.2"]},"matdyn.x":{"monitors":["standard_output"],"results":["phonon_dos","phonon_dispersions"],"flavors":{"matdyn_grid":{"input":[{"name":"matdyn_grid.in"}],"monitors":["standard_output"],"results":["phonon_dos"],"applicationName":"espresso","executableName":"matdyn.x"},"matdyn_path":{"input":[{"name":"matdyn_path.in"}],"monitors":["standard_output"],"results":["phonon_dispersions"],"applicationName":"espresso","executableName":"matdyn.x"}}},"neb.x":{"monitors":["standard_output"],"results":["reaction_energy_barrier","reaction_energy_profile"],"flavors":{"neb":{"isMultiMaterial":true,"input":[{"name":"neb.in"}],"results":["reaction_energy_barrier","reaction_energy_profile"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"neb.x"}}},"ph.x":{"monitors":["standard_output"],"results":["phonon_dos","phonon_dispersions","zero_point_energy"],"flavors":{"ph_path":{"input":[{"name":"ph_path.in"}],"results":["phonon_dispersions"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_grid":{"input":[{"name":"ph_grid.in"}],"results":["phonon_dos"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_gamma":{"input":[{"name":"ph_gamma.in"}],"results":["zero_point_energy"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_init_qpoints":{"input":[{"name":"ph_init_qpoints.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_grid_restart":{"input":[{"name":"ph_grid_restart.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_single_irr_qpt":{"input":[{"name":"ph_single_irr_qpt.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"}}},"pp.x":{"monitors":["standard_output"],"results":[],"flavors":{"pp_density":{"input":[{"name":"pp_density.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pp.x"},"pp_electrostatic_potential":{"input":[{"name":"pp_electrostatic_potential.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pp.x"}}},"projwfc.x":{"monitors":["standard_output"],"results":["density_of_states"],"flavors":{"projwfc":{"input":[{"name":"projwfc.in"}],"results":["density_of_states"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"projwfc.x"}}},"pw.x":{"isDefault":true,"hasAdvancedComputeOptions":true,"postProcessors":["remove_non_zero_weight_kpoints"],"monitors":["standard_output","convergence_ionic","convergence_electronic"],"results":["atomic_forces","band_gaps","charge_density_profile","density_of_states","fermi_energy","final_structure","magnetic_moments","potential_profile","pressure","reaction_energy_barrier","reaction_energy_profile","stress_tensor","total_energy","total_energy_contributions","total_force"],"flavors":{"pw_scf":{"isDefault":true,"input":[{"name":"pw_scf.in"}],"results":["atomic_forces","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_bands_hse":{"input":[{"name":"pw_scf_bands_hse.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_hse":{"input":[{"name":"pw_scf_hse.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_dft_u":{"input":[{"name":"pw_scf_dft_u.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u+v":{"input":[{"name":"pw_scf_dft_v.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u+j":{"input":[{"name":"pw_scf_dft_j.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u_legacy":{"input":[{"name":"pw_scf_dft_u_legacy.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["5.2.1","5.4.0","6.0.0","6.3","6.4.1","6.5.0","6.6.0","6.7.0","6.8.0","7.0"]},"pw_esm":{"input":[{"name":"pw_esm.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor","potential_profile","charge_density_profile"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_esm_relax":{"input":[{"name":"pw_esm_relax.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor","potential_profile","charge_density_profile"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_nscf":{"input":[{"name":"pw_nscf.in"}],"results":["fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pw.x"},"pw_bands":{"input":[{"name":"pw_bands.in"}],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pw.x"},"pw_relax":{"input":[{"name":"pw_relax.in"}],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor","final_structure"],"applicationName":"espresso","executableName":"pw.x"},"pw_vc-relax":{"input":[{"name":"pw_vc_relax.in"}],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor","final_structure"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_kpt_conv":{"input":[{"name":"pw_scf_kpt_conv.in"}],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"}}},"q2r.x":{"monitors":["standard_output"],"results":[],"flavors":{"q2r":{"input":[{"name":"q2r.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"q2r.x"}}}},"jupyterLab":{"jupyter":{"isDefault":true,"monitors":["standard_output","jupyter_notebook_endpoint"],"results":[],"flavors":{"notebook":{"isDefault":true,"input":[{"name":"requirements.txt","templateName":"requirements.txt"}],"monitors":["standard_output","jupyter_notebook_endpoint"],"applicationName":"jupyterLab","executableName":"jupyter"}}}},"exabyteml":{"score":{"isDefault":false,"monitors":["standard_output"],"results":["predicted_properties"],"flavors":{"score":{"isDefault":true,"input":[],"monitors":["standard_output"],"applicationName":"exabyteml","executableName":"score"}}},"train":{"isDefault":true,"monitors":["standard_output"],"results":["workflow:ml_predict"],"flavors":{"train":{"isDefault":true,"input":[],"monitors":["standard_output"],"applicationName":"exabyteml","executableName":"train"}}}},"nwchem":{"nwchem":{"isDefault":true,"hasAdvancedComputeOptions":false,"postProcessors":["error_handler"],"monitors":["standard_output"],"results":["total_energy","total_energy_contributions"],"flavors":{"nwchem_total_energy":{"isDefault":true,"input":[{"name":"nwchem_total_energy.inp"}],"results":["total_energy","total_energy_contributions"],"monitors":["standard_output"],"applicationName":"nwchem","executableName":"nwchem"}}}},"python":{"python":{"isDefault":true,"monitors":["standard_output"],"results":["file_content","workflow:pyml_predict"],"flavors":{"hello_world":{"isDefault":true,"input":[{"name":"script.py","templateName":"hello_world.py"},{"name":"requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"espresso_xml_get_qpt_irr":{"input":[{"name":"espresso_xml_get_qpt_irr.py"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"espresso_extract_kpoints":{"input":[{"name":"espresso_extract_kpoints.py"},{"name":"requirements.txt","templateName":"requirements_empty.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"generic:post_processing:plot:matplotlib":{"input":[{"name":"plot.py","templateName":"matplotlib_basic.py"},{"name":"requirements.txt","templateName":"processing_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"generic:processing:find_extrema:scipy":{"input":[{"name":"find_extrema.py","templateName":"find_extrema.py"},{"name":"requirements.txt","templateName":"processing_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:setup_variables_packages":{"input":[{"name":"settings.py","templateName":"pyml_settings.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:custom":{"input":[{"name":"pyml_custom.py","templateName":"pyml_custom.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:data_input:read_csv:pandas":{"input":[{"name":"data_input_read_csv_pandas.py","templateName":"data_input_read_csv_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:data_input:train_test_split:sklearn":{"input":[{"name":"data_input_train_test_split_sklearn.py","templateName":"data_input_train_test_split_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:min_max_scaler:sklearn":{"input":[{"name":"pre_processing_min_max_sklearn.py","templateName":"pre_processing_min_max_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:remove_duplicates:pandas":{"input":[{"name":"pre_processing_remove_duplicates_pandas.py","templateName":"pre_processing_remove_duplicates_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:remove_missing:pandas":{"input":[{"name":"pre_processing_remove_missing_pandas.py","templateName":"pre_processing_remove_missing_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:standardization:sklearn":{"input":[{"name":"pre_processing_standardization_sklearn.py","templateName":"pre_processing_standardization_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:adaboosted_trees_regression:sklearn":{"input":[{"name":"model_adaboosted_trees_regression_sklearn.py","templateName":"model_adaboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"results":["workflow:pyml_predict"],"applicationName":"python","executableName":"python"},"pyml:model:bagged_trees_regression:sklearn":{"input":[{"name":"model_bagged_trees_regression_sklearn.py","templateName":"model_bagged_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:gradboosted_trees_regression:sklearn":{"input":[{"name":"model_gradboosted_trees_regression_sklearn.py","templateName":"model_gradboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:extreme_gradboosted_trees_regression:sklearn":{"input":[{"name":"model_extreme_gradboosted_trees_regression_sklearn.py","templateName":"model_extreme_gradboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:k_means_clustering:sklearn":{"input":[{"name":"model_k_means_clustering_sklearn.py","templateName":"model_k_means_clustering_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:kernel_ridge_regression:sklearn":{"input":[{"name":"model_kernel_ridge_regression_sklearn.py","templateName":"model_kernel_ridge_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:lasso_regression:sklearn":{"input":[{"name":"model_lasso_regression_sklearn.py","templateName":"model_lasso_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:multilayer_perceptron:sklearn":{"input":[{"name":"model_mlp_sklearn.py","templateName":"model_mlp_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:random_forest_classification:sklearn":{"input":[{"name":"model_random_forest_classification_sklearn.py","templateName":"model_random_forest_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:gradboosted_trees_classification:sklearn":{"input":[{"name":"model_gradboosted_trees_classification_sklearn.py","templateName":"model_gradboosted_trees_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:extreme_gradboosted_trees_classification:sklearn":{"input":[{"name":"model_extreme_gradboosted_trees_classification_sklearn.py","templateName":"model_extreme_gradboosted_trees_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:random_forest_regression:sklearn":{"input":[{"name":"model_random_forest_regression_sklearn.py","templateName":"model_random_forest_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:ridge_regression:sklearn":{"input":[{"name":"model_ridge_regression_sklearn.py","templateName":"model_ridge_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:parity_plot:matplotlib":{"input":[{"name":"post_processing_parity_plot_matplotlib.py","templateName":"post_processing_parity_plot_matplotlib.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:pca_2d_clusters:matplotlib":{"input":[{"name":"post_processing_pca_2d_clusters_matplotlib.py","templateName":"post_processing_pca_2d_clusters_matplotlib.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:roc_curve:sklearn":{"input":[{"name":"post_processing_roc_curve_sklearn.py","templateName":"post_processing_roc_curve_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"}}}},"shell":{"sh":{"isDefault":true,"monitors":["standard_output"],"results":["atomic_forces","band_gaps","band_structure","density_of_states","fermi_energy","phonon_dispersions","phonon_dos","pressure","stress_tensor","total_energy","total_energy_contributions","total_force","zero_point_energy","final_structure","magnetic_moments","reaction_energy_barrier","reaction_energy_profile","potential_profile","charge_density_profile"],"flavors":{"hello_world":{"isDefault":true,"input":[{"name":"hello_world.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"job_espresso_pw_scf":{"input":[{"name":"job_espresso_pw_scf.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"espresso_link_outdir_save":{"input":[{"name":"espresso_link_outdir_save.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"espresso_collect_dynmat":{"input":[{"name":"espresso_collect_dynmat.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"bash_vasp_prepare_neb_images":{"isMultiMaterial":true,"input":[{"name":"bash_vasp_prepare_neb_images.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"}}}},"vasp":{"vasp":{"isDefault":true,"postProcessors":["error_handler","prepare_restart","remove_non_zero_weight_kpoints"],"monitors":["standard_output","convergence_ionic","convergence_electronic"],"results":["atomic_forces","band_gaps","band_structure","density_of_states","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force","zero_point_energy","final_structure","magnetic_moments","reaction_energy_barrier","reaction_energy_profile","potential_profile","charge_density_profile"],"flavors":{"vasp":{"isDefault":true,"input":[{"name":"INCAR"},{"name":"KPOINTS"},{"name":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_bands":{"input":[{"name":"INCAR","templateName":"INCAR_BANDS"},{"name":"KPOINTS","templateName":"KPOINTS_BANDS"},{"name":"POSCAR","templateName":""}],"results":["band_structure"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_nscf":{"input":[{"name":"INCAR","templateName":"INCAR_BANDS"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["band_gaps","fermi_energy"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_HSE"},{"name":"KPOINTS"},{"name":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_bands_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_BANDS_HSE"},{"name":"KPOINTS","templateName":"KPOINTS_BANDS"},{"name":"POSCAR","templateName":""}],"results":["band_structure"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_nscf_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_BANDS_HSE"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["band_gaps","fermi_energy"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_relax":{"input":[{"name":"INCAR","templateName":"INCAR_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","atomic_forces","fermi_energy","pressure","stress_tensor","total_force","final_structure"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"postProcessors":["prepare_restart"],"applicationName":"vasp","executableName":"vasp"},"vasp_vc_relax":{"input":[{"name":"INCAR","templateName":"INCAR_VC_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","atomic_forces","fermi_energy","pressure","stress_tensor","total_force","final_structure"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"postProcessors":["prepare_restart"],"applicationName":"vasp","executableName":"vasp"},"vasp_zpe":{"input":[{"name":"INCAR","templateName":"INCAR_ZPE"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","fermi_energy","pressure","atomic_forces","stress_tensor","total_force","zero_point_energy"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"applicationName":"vasp","executableName":"vasp"},"vasp_kpt_conv":{"input":[{"name":"INCAR","templateName":"INCAR"},{"name":"KPOINTS","templateName":"KPOINTS_CONV"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_vc_relax_conv":{"input":[{"name":"INCAR","templateName":"INCAR_VC_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS_CONV"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB"},{"name":"KPOINTS","templateName":"KPOINTS"}],"results":["reaction_energy_barrier","reaction_energy_profile"],"monitors":["standard_output"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb_initial":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB_INITIAL_FINAL"},{"name":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR_NEB_INITIAL"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb_final":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB_INITIAL_FINAL"},{"name":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR_NEB_FINAL"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"}}}}}} +module.exports = {applicationTree: {"deepmd":{"dp":{"isDefault":false,"monitors":["standard_output"],"results":[],"flavors":{"dp_train_se_e2_r":{"input":[{"name":"dp_train_se_e2_r.json"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"dp"}}},"lmp":{"isDefault":false,"monitors":["standard_output"],"results":[],"flavors":{"lammps":{"input":[{"name":"in.lammps"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"lmp"}}},"python":{"isDefault":true,"monitors":["standard_output"],"results":[],"flavors":{"qe_cp_to_deepmd":{"isDefault":true,"input":[{"name":"qe_cp_to_deepmd.py"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"python"},"qe_to_lmp_structure":{"isDefault":true,"input":[{"name":"qe_to_lmp_structure.py"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"python"}}}},"espresso":{"average.x":{"monitors":["standard_output"],"results":["average_potential_profile"],"flavors":{"average":{"input":[{"name":"average.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"average.x"},"average_potential":{"input":[{"name":"average.in"}],"results":["average_potential_profile"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"average.x"}}},"bands.x":{"monitors":["standard_output"],"results":["band_structure"],"flavors":{"bands":{"input":[{"name":"bands.in"}],"results":["band_structure"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"bands.x"}}},"cp.x":{"monitors":["standard_output"],"results":[],"flavors":{"cp":{"input":[{"name":"cp.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"cp.x"},"cp_wf":{"input":[{"name":"cp_wf.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"cp.x"}}},"dos.x":{"monitors":["standard_output"],"results":["density_of_states"],"flavors":{"dos":{"input":[{"name":"dos.in"}],"results":["density_of_states"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"dos.x"}}},"dynmat.x":{"monitors":["standard_output"],"results":[],"flavors":{"dynmat":{"input":[{"name":"dynmat_grid.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"dynmat.x"}}},"epsilon.x":{"monitors":["standard_output"],"results":["dielectric_tensor"],"flavors":{"dielectric_tensor":{"input":[{"name":"epsilon.in"}],"results":["dielectric_tensor"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"epsilon.x"}}},"gw.x":{"monitors":["standard_output"],"results":["band_structure","fermi_energy","band_gaps"],"flavors":{"gw_bands_plasmon_pole":{"input":[{"name":"gw_bands_plasmon_pole.in"}],"results":["band_structure","fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"gw.x"},"gw_bands_full_frequency":{"input":[{"name":"gw_bands_full_frequency.in"}],"results":["band_structure","fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"gw.x"}}},"hp.x":{"monitors":["standard_output"],"results":["hubbard_u","hubbard_v_nn","hubbard_v"],"flavors":{"hp":{"input":[{"name":"hp.in"}],"results":["hubbard_u","hubbard_v_nn","hubbard_v"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"hp.x"}},"supportedApplicationVersions":["7.0","7.2"]},"matdyn.x":{"monitors":["standard_output"],"results":["phonon_dos","phonon_dispersions"],"flavors":{"matdyn_grid":{"input":[{"name":"matdyn_grid.in"}],"monitors":["standard_output"],"results":["phonon_dos"],"applicationName":"espresso","executableName":"matdyn.x"},"matdyn_path":{"input":[{"name":"matdyn_path.in"}],"monitors":["standard_output"],"results":["phonon_dispersions"],"applicationName":"espresso","executableName":"matdyn.x"}}},"neb.x":{"monitors":["standard_output"],"results":["reaction_energy_barrier","reaction_energy_profile"],"flavors":{"neb":{"isMultiMaterial":true,"input":[{"name":"neb.in"}],"results":["reaction_energy_barrier","reaction_energy_profile"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"neb.x"}}},"ph.x":{"monitors":["standard_output"],"results":["phonon_dos","phonon_dispersions","zero_point_energy"],"flavors":{"ph_path":{"input":[{"name":"ph_path.in"}],"results":["phonon_dispersions"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_grid":{"input":[{"name":"ph_grid.in"}],"results":["phonon_dos"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_gamma":{"input":[{"name":"ph_gamma.in"}],"results":["zero_point_energy"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_init_qpoints":{"input":[{"name":"ph_init_qpoints.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_grid_restart":{"input":[{"name":"ph_grid_restart.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_single_irr_qpt":{"input":[{"name":"ph_single_irr_qpt.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"}}},"pp.x":{"monitors":["standard_output"],"results":[],"flavors":{"pp_density":{"input":[{"name":"pp_density.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pp.x"},"pp_electrostatic_potential":{"input":[{"name":"pp_electrostatic_potential.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pp.x"}}},"projwfc.x":{"monitors":["standard_output"],"results":["density_of_states"],"flavors":{"projwfc":{"input":[{"name":"projwfc.in"}],"results":["density_of_states"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"projwfc.x"}}},"pw.x":{"isDefault":true,"hasAdvancedComputeOptions":true,"postProcessors":["remove_non_zero_weight_kpoints"],"monitors":["standard_output","convergence_ionic","convergence_electronic"],"results":["atomic_forces","band_gaps","charge_density_profile","density_of_states","fermi_energy","final_structure","magnetic_moments","potential_profile","pressure","reaction_energy_barrier","reaction_energy_profile","stress_tensor","total_energy","total_energy_contributions","total_force"],"flavors":{"pw_scf":{"isDefault":true,"input":[{"name":"pw_scf.in"}],"results":["atomic_forces","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_bands_hse":{"input":[{"name":"pw_scf_bands_hse.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_hse":{"input":[{"name":"pw_scf_hse.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_dft_u":{"input":[{"name":"pw_scf_dft_u.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u+v":{"input":[{"name":"pw_scf_dft_v.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u+j":{"input":[{"name":"pw_scf_dft_j.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u_legacy":{"input":[{"name":"pw_scf_dft_u_legacy.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["5.2.1","5.4.0","6.0.0","6.3","6.4.1","6.5.0","6.6.0","6.7.0","6.8.0","7.0"]},"pw_esm":{"input":[{"name":"pw_esm.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor","potential_profile","charge_density_profile"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_esm_relax":{"input":[{"name":"pw_esm_relax.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor","potential_profile","charge_density_profile"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_nscf":{"input":[{"name":"pw_nscf.in"}],"results":["fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pw.x"},"pw_bands":{"input":[{"name":"pw_bands.in"}],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pw.x"},"pw_relax":{"input":[{"name":"pw_relax.in"}],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor","final_structure"],"applicationName":"espresso","executableName":"pw.x"},"pw_vc-relax":{"input":[{"name":"pw_vc_relax.in"}],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor","final_structure"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_kpt_conv":{"input":[{"name":"pw_scf_kpt_conv.in"}],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"}}},"q2r.x":{"monitors":["standard_output"],"results":[],"flavors":{"q2r":{"input":[{"name":"q2r.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"q2r.x"}}}},"jupyterLab":{"jupyter":{"isDefault":true,"monitors":["standard_output","jupyter_notebook_endpoint"],"results":[],"flavors":{"notebook":{"isDefault":true,"input":[{"name":"requirements.txt","templateName":"requirements.txt"}],"monitors":["standard_output","jupyter_notebook_endpoint"],"applicationName":"jupyterLab","executableName":"jupyter"}}}},"exabyteml":{"score":{"isDefault":false,"monitors":["standard_output"],"results":["predicted_properties"],"flavors":{"score":{"isDefault":true,"input":[],"monitors":["standard_output"],"applicationName":"exabyteml","executableName":"score"}}},"train":{"isDefault":true,"monitors":["standard_output"],"results":["workflow:ml_predict"],"flavors":{"train":{"isDefault":true,"input":[],"monitors":["standard_output"],"applicationName":"exabyteml","executableName":"train"}}}},"nwchem":{"nwchem":{"isDefault":true,"hasAdvancedComputeOptions":false,"postProcessors":["error_handler"],"monitors":["standard_output"],"results":["total_energy","total_energy_contributions"],"flavors":{"nwchem_total_energy":{"isDefault":true,"input":[{"name":"nwchem_total_energy.inp"}],"results":["total_energy","total_energy_contributions"],"monitors":["standard_output"],"applicationName":"nwchem","executableName":"nwchem"}}}},"python":{"python":{"isDefault":true,"monitors":["standard_output"],"results":["file_content","workflow:pyml_predict"],"flavors":{"hello_world":{"isDefault":true,"input":[{"name":"script.py","templateName":"hello_world.py"},{"name":"requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"espresso_xml_get_qpt_irr":{"input":[{"name":"espresso_xml_get_qpt_irr.py"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"espresso_extract_kpoints":{"input":[{"name":"espresso_extract_kpoints.py"},{"name":"requirements.txt","templateName":"requirements_empty.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"generic:post_processing:plot:matplotlib":{"input":[{"name":"plot.py","templateName":"matplotlib_basic.py"},{"name":"requirements.txt","templateName":"processing_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"generic:processing:find_extrema:scipy":{"input":[{"name":"find_extrema.py","templateName":"find_extrema.py"},{"name":"requirements.txt","templateName":"processing_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:setup_variables_packages":{"input":[{"name":"settings.py","templateName":"pyml_settings.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:custom":{"input":[{"name":"pyml_custom.py","templateName":"pyml_custom.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:data_input:read_csv:pandas":{"input":[{"name":"data_input_read_csv_pandas.py","templateName":"data_input_read_csv_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:data_input:train_test_split:sklearn":{"input":[{"name":"data_input_train_test_split_sklearn.py","templateName":"data_input_train_test_split_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:min_max_scaler:sklearn":{"input":[{"name":"pre_processing_min_max_sklearn.py","templateName":"pre_processing_min_max_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:remove_duplicates:pandas":{"input":[{"name":"pre_processing_remove_duplicates_pandas.py","templateName":"pre_processing_remove_duplicates_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:remove_missing:pandas":{"input":[{"name":"pre_processing_remove_missing_pandas.py","templateName":"pre_processing_remove_missing_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:standardization:sklearn":{"input":[{"name":"pre_processing_standardization_sklearn.py","templateName":"pre_processing_standardization_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:adaboosted_trees_regression:sklearn":{"input":[{"name":"model_adaboosted_trees_regression_sklearn.py","templateName":"model_adaboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"results":["workflow:pyml_predict"],"applicationName":"python","executableName":"python"},"pyml:model:bagged_trees_regression:sklearn":{"input":[{"name":"model_bagged_trees_regression_sklearn.py","templateName":"model_bagged_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:gradboosted_trees_regression:sklearn":{"input":[{"name":"model_gradboosted_trees_regression_sklearn.py","templateName":"model_gradboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:extreme_gradboosted_trees_regression:sklearn":{"input":[{"name":"model_extreme_gradboosted_trees_regression_sklearn.py","templateName":"model_extreme_gradboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:k_means_clustering:sklearn":{"input":[{"name":"model_k_means_clustering_sklearn.py","templateName":"model_k_means_clustering_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:kernel_ridge_regression:sklearn":{"input":[{"name":"model_kernel_ridge_regression_sklearn.py","templateName":"model_kernel_ridge_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:lasso_regression:sklearn":{"input":[{"name":"model_lasso_regression_sklearn.py","templateName":"model_lasso_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:multilayer_perceptron:sklearn":{"input":[{"name":"model_mlp_sklearn.py","templateName":"model_mlp_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:random_forest_classification:sklearn":{"input":[{"name":"model_random_forest_classification_sklearn.py","templateName":"model_random_forest_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:gradboosted_trees_classification:sklearn":{"input":[{"name":"model_gradboosted_trees_classification_sklearn.py","templateName":"model_gradboosted_trees_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:extreme_gradboosted_trees_classification:sklearn":{"input":[{"name":"model_extreme_gradboosted_trees_classification_sklearn.py","templateName":"model_extreme_gradboosted_trees_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:random_forest_regression:sklearn":{"input":[{"name":"model_random_forest_regression_sklearn.py","templateName":"model_random_forest_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:ridge_regression:sklearn":{"input":[{"name":"model_ridge_regression_sklearn.py","templateName":"model_ridge_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:parity_plot:matplotlib":{"input":[{"name":"post_processing_parity_plot_matplotlib.py","templateName":"post_processing_parity_plot_matplotlib.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:pca_2d_clusters:matplotlib":{"input":[{"name":"post_processing_pca_2d_clusters_matplotlib.py","templateName":"post_processing_pca_2d_clusters_matplotlib.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:roc_curve:sklearn":{"input":[{"name":"post_processing_roc_curve_sklearn.py","templateName":"post_processing_roc_curve_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"}}}},"shell":{"sh":{"isDefault":true,"monitors":["standard_output"],"results":["atomic_forces","band_gaps","band_structure","density_of_states","fermi_energy","phonon_dispersions","phonon_dos","pressure","stress_tensor","total_energy","total_energy_contributions","total_force","zero_point_energy","final_structure","magnetic_moments","reaction_energy_barrier","reaction_energy_profile","potential_profile","charge_density_profile"],"flavors":{"hello_world":{"isDefault":true,"input":[{"name":"hello_world.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"job_espresso_pw_scf":{"input":[{"name":"job_espresso_pw_scf.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"espresso_link_outdir_save":{"input":[{"name":"espresso_link_outdir_save.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"espresso_collect_dynmat":{"input":[{"name":"espresso_collect_dynmat.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"bash_vasp_prepare_neb_images":{"isMultiMaterial":true,"input":[{"name":"bash_vasp_prepare_neb_images.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"}}}},"vasp":{"vasp":{"isDefault":true,"postProcessors":["error_handler","prepare_restart","remove_non_zero_weight_kpoints"],"monitors":["standard_output","convergence_ionic","convergence_electronic"],"results":["atomic_forces","band_gaps","band_structure","density_of_states","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force","zero_point_energy","final_structure","magnetic_moments","reaction_energy_barrier","reaction_energy_profile","potential_profile","charge_density_profile"],"flavors":{"vasp":{"isDefault":true,"input":[{"name":"INCAR"},{"name":"KPOINTS"},{"name":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_bands":{"input":[{"name":"INCAR","templateName":"INCAR_BANDS"},{"name":"KPOINTS","templateName":"KPOINTS_BANDS"},{"name":"POSCAR","templateName":""}],"results":["band_structure"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_nscf":{"input":[{"name":"INCAR","templateName":"INCAR_BANDS"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["band_gaps","fermi_energy"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_HSE"},{"name":"KPOINTS"},{"name":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_bands_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_BANDS_HSE"},{"name":"KPOINTS","templateName":"KPOINTS_BANDS"},{"name":"POSCAR","templateName":""}],"results":["band_structure"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_nscf_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_BANDS_HSE"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["band_gaps","fermi_energy"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_relax":{"input":[{"name":"INCAR","templateName":"INCAR_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","atomic_forces","fermi_energy","pressure","stress_tensor","total_force","final_structure"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"postProcessors":["prepare_restart"],"applicationName":"vasp","executableName":"vasp"},"vasp_vc_relax":{"input":[{"name":"INCAR","templateName":"INCAR_VC_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","atomic_forces","fermi_energy","pressure","stress_tensor","total_force","final_structure"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"postProcessors":["prepare_restart"],"applicationName":"vasp","executableName":"vasp"},"vasp_zpe":{"input":[{"name":"INCAR","templateName":"INCAR_ZPE"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","fermi_energy","pressure","atomic_forces","stress_tensor","total_force","zero_point_energy"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"applicationName":"vasp","executableName":"vasp"},"vasp_kpt_conv":{"input":[{"name":"INCAR","templateName":"INCAR"},{"name":"KPOINTS","templateName":"KPOINTS_CONV"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_vc_relax_conv":{"input":[{"name":"INCAR","templateName":"INCAR_VC_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS_CONV"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB"},{"name":"KPOINTS","templateName":"KPOINTS"}],"results":["reaction_energy_barrier","reaction_energy_profile"],"monitors":["standard_output"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb_initial":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB_INITIAL_FINAL"},{"name":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR_NEB_INITIAL"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb_final":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB_INITIAL_FINAL"},{"name":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR_NEB_FINAL"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"}}}}}} From a4273063e42442bd86e20ff4a18bcaf826ca3242 Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Fri, 19 Jan 2024 00:37:33 +0800 Subject: [PATCH 15/20] SOF-7194: fix default flavor for deepmd/python executable --- executables/deepmd/python.yml | 2 +- src/js/data/tree.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/executables/deepmd/python.yml b/executables/deepmd/python.yml index e1b7670b..26f98568 100644 --- a/executables/deepmd/python.yml +++ b/executables/deepmd/python.yml @@ -14,7 +14,7 @@ flavors: executableName: python qe_to_lmp_structure: - isDefault: true + isDefault: false input: - name: qe_to_lmp_structure.py results: [] diff --git a/src/js/data/tree.js b/src/js/data/tree.js index b7c18395..b951ae66 100644 --- a/src/js/data/tree.js +++ b/src/js/data/tree.js @@ -1,2 +1,2 @@ /* eslint-disable */ -module.exports = {applicationTree: {"deepmd":{"dp":{"isDefault":false,"monitors":["standard_output"],"results":[],"flavors":{"dp_train_se_e2_r":{"input":[{"name":"dp_train_se_e2_r.json"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"dp"}}},"lmp":{"isDefault":false,"monitors":["standard_output"],"results":[],"flavors":{"lammps":{"input":[{"name":"in.lammps"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"lmp"}}},"python":{"isDefault":true,"monitors":["standard_output"],"results":[],"flavors":{"qe_cp_to_deepmd":{"isDefault":true,"input":[{"name":"qe_cp_to_deepmd.py"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"python"},"qe_to_lmp_structure":{"isDefault":true,"input":[{"name":"qe_to_lmp_structure.py"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"python"}}}},"espresso":{"average.x":{"monitors":["standard_output"],"results":["average_potential_profile"],"flavors":{"average":{"input":[{"name":"average.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"average.x"},"average_potential":{"input":[{"name":"average.in"}],"results":["average_potential_profile"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"average.x"}}},"bands.x":{"monitors":["standard_output"],"results":["band_structure"],"flavors":{"bands":{"input":[{"name":"bands.in"}],"results":["band_structure"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"bands.x"}}},"cp.x":{"monitors":["standard_output"],"results":[],"flavors":{"cp":{"input":[{"name":"cp.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"cp.x"},"cp_wf":{"input":[{"name":"cp_wf.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"cp.x"}}},"dos.x":{"monitors":["standard_output"],"results":["density_of_states"],"flavors":{"dos":{"input":[{"name":"dos.in"}],"results":["density_of_states"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"dos.x"}}},"dynmat.x":{"monitors":["standard_output"],"results":[],"flavors":{"dynmat":{"input":[{"name":"dynmat_grid.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"dynmat.x"}}},"epsilon.x":{"monitors":["standard_output"],"results":["dielectric_tensor"],"flavors":{"dielectric_tensor":{"input":[{"name":"epsilon.in"}],"results":["dielectric_tensor"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"epsilon.x"}}},"gw.x":{"monitors":["standard_output"],"results":["band_structure","fermi_energy","band_gaps"],"flavors":{"gw_bands_plasmon_pole":{"input":[{"name":"gw_bands_plasmon_pole.in"}],"results":["band_structure","fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"gw.x"},"gw_bands_full_frequency":{"input":[{"name":"gw_bands_full_frequency.in"}],"results":["band_structure","fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"gw.x"}}},"hp.x":{"monitors":["standard_output"],"results":["hubbard_u","hubbard_v_nn","hubbard_v"],"flavors":{"hp":{"input":[{"name":"hp.in"}],"results":["hubbard_u","hubbard_v_nn","hubbard_v"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"hp.x"}},"supportedApplicationVersions":["7.0","7.2"]},"matdyn.x":{"monitors":["standard_output"],"results":["phonon_dos","phonon_dispersions"],"flavors":{"matdyn_grid":{"input":[{"name":"matdyn_grid.in"}],"monitors":["standard_output"],"results":["phonon_dos"],"applicationName":"espresso","executableName":"matdyn.x"},"matdyn_path":{"input":[{"name":"matdyn_path.in"}],"monitors":["standard_output"],"results":["phonon_dispersions"],"applicationName":"espresso","executableName":"matdyn.x"}}},"neb.x":{"monitors":["standard_output"],"results":["reaction_energy_barrier","reaction_energy_profile"],"flavors":{"neb":{"isMultiMaterial":true,"input":[{"name":"neb.in"}],"results":["reaction_energy_barrier","reaction_energy_profile"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"neb.x"}}},"ph.x":{"monitors":["standard_output"],"results":["phonon_dos","phonon_dispersions","zero_point_energy"],"flavors":{"ph_path":{"input":[{"name":"ph_path.in"}],"results":["phonon_dispersions"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_grid":{"input":[{"name":"ph_grid.in"}],"results":["phonon_dos"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_gamma":{"input":[{"name":"ph_gamma.in"}],"results":["zero_point_energy"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_init_qpoints":{"input":[{"name":"ph_init_qpoints.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_grid_restart":{"input":[{"name":"ph_grid_restart.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_single_irr_qpt":{"input":[{"name":"ph_single_irr_qpt.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"}}},"pp.x":{"monitors":["standard_output"],"results":[],"flavors":{"pp_density":{"input":[{"name":"pp_density.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pp.x"},"pp_electrostatic_potential":{"input":[{"name":"pp_electrostatic_potential.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pp.x"}}},"projwfc.x":{"monitors":["standard_output"],"results":["density_of_states"],"flavors":{"projwfc":{"input":[{"name":"projwfc.in"}],"results":["density_of_states"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"projwfc.x"}}},"pw.x":{"isDefault":true,"hasAdvancedComputeOptions":true,"postProcessors":["remove_non_zero_weight_kpoints"],"monitors":["standard_output","convergence_ionic","convergence_electronic"],"results":["atomic_forces","band_gaps","charge_density_profile","density_of_states","fermi_energy","final_structure","magnetic_moments","potential_profile","pressure","reaction_energy_barrier","reaction_energy_profile","stress_tensor","total_energy","total_energy_contributions","total_force"],"flavors":{"pw_scf":{"isDefault":true,"input":[{"name":"pw_scf.in"}],"results":["atomic_forces","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_bands_hse":{"input":[{"name":"pw_scf_bands_hse.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_hse":{"input":[{"name":"pw_scf_hse.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_dft_u":{"input":[{"name":"pw_scf_dft_u.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u+v":{"input":[{"name":"pw_scf_dft_v.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u+j":{"input":[{"name":"pw_scf_dft_j.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u_legacy":{"input":[{"name":"pw_scf_dft_u_legacy.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["5.2.1","5.4.0","6.0.0","6.3","6.4.1","6.5.0","6.6.0","6.7.0","6.8.0","7.0"]},"pw_esm":{"input":[{"name":"pw_esm.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor","potential_profile","charge_density_profile"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_esm_relax":{"input":[{"name":"pw_esm_relax.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor","potential_profile","charge_density_profile"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_nscf":{"input":[{"name":"pw_nscf.in"}],"results":["fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pw.x"},"pw_bands":{"input":[{"name":"pw_bands.in"}],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pw.x"},"pw_relax":{"input":[{"name":"pw_relax.in"}],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor","final_structure"],"applicationName":"espresso","executableName":"pw.x"},"pw_vc-relax":{"input":[{"name":"pw_vc_relax.in"}],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor","final_structure"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_kpt_conv":{"input":[{"name":"pw_scf_kpt_conv.in"}],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"}}},"q2r.x":{"monitors":["standard_output"],"results":[],"flavors":{"q2r":{"input":[{"name":"q2r.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"q2r.x"}}}},"jupyterLab":{"jupyter":{"isDefault":true,"monitors":["standard_output","jupyter_notebook_endpoint"],"results":[],"flavors":{"notebook":{"isDefault":true,"input":[{"name":"requirements.txt","templateName":"requirements.txt"}],"monitors":["standard_output","jupyter_notebook_endpoint"],"applicationName":"jupyterLab","executableName":"jupyter"}}}},"exabyteml":{"score":{"isDefault":false,"monitors":["standard_output"],"results":["predicted_properties"],"flavors":{"score":{"isDefault":true,"input":[],"monitors":["standard_output"],"applicationName":"exabyteml","executableName":"score"}}},"train":{"isDefault":true,"monitors":["standard_output"],"results":["workflow:ml_predict"],"flavors":{"train":{"isDefault":true,"input":[],"monitors":["standard_output"],"applicationName":"exabyteml","executableName":"train"}}}},"nwchem":{"nwchem":{"isDefault":true,"hasAdvancedComputeOptions":false,"postProcessors":["error_handler"],"monitors":["standard_output"],"results":["total_energy","total_energy_contributions"],"flavors":{"nwchem_total_energy":{"isDefault":true,"input":[{"name":"nwchem_total_energy.inp"}],"results":["total_energy","total_energy_contributions"],"monitors":["standard_output"],"applicationName":"nwchem","executableName":"nwchem"}}}},"python":{"python":{"isDefault":true,"monitors":["standard_output"],"results":["file_content","workflow:pyml_predict"],"flavors":{"hello_world":{"isDefault":true,"input":[{"name":"script.py","templateName":"hello_world.py"},{"name":"requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"espresso_xml_get_qpt_irr":{"input":[{"name":"espresso_xml_get_qpt_irr.py"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"espresso_extract_kpoints":{"input":[{"name":"espresso_extract_kpoints.py"},{"name":"requirements.txt","templateName":"requirements_empty.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"generic:post_processing:plot:matplotlib":{"input":[{"name":"plot.py","templateName":"matplotlib_basic.py"},{"name":"requirements.txt","templateName":"processing_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"generic:processing:find_extrema:scipy":{"input":[{"name":"find_extrema.py","templateName":"find_extrema.py"},{"name":"requirements.txt","templateName":"processing_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:setup_variables_packages":{"input":[{"name":"settings.py","templateName":"pyml_settings.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:custom":{"input":[{"name":"pyml_custom.py","templateName":"pyml_custom.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:data_input:read_csv:pandas":{"input":[{"name":"data_input_read_csv_pandas.py","templateName":"data_input_read_csv_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:data_input:train_test_split:sklearn":{"input":[{"name":"data_input_train_test_split_sklearn.py","templateName":"data_input_train_test_split_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:min_max_scaler:sklearn":{"input":[{"name":"pre_processing_min_max_sklearn.py","templateName":"pre_processing_min_max_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:remove_duplicates:pandas":{"input":[{"name":"pre_processing_remove_duplicates_pandas.py","templateName":"pre_processing_remove_duplicates_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:remove_missing:pandas":{"input":[{"name":"pre_processing_remove_missing_pandas.py","templateName":"pre_processing_remove_missing_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:standardization:sklearn":{"input":[{"name":"pre_processing_standardization_sklearn.py","templateName":"pre_processing_standardization_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:adaboosted_trees_regression:sklearn":{"input":[{"name":"model_adaboosted_trees_regression_sklearn.py","templateName":"model_adaboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"results":["workflow:pyml_predict"],"applicationName":"python","executableName":"python"},"pyml:model:bagged_trees_regression:sklearn":{"input":[{"name":"model_bagged_trees_regression_sklearn.py","templateName":"model_bagged_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:gradboosted_trees_regression:sklearn":{"input":[{"name":"model_gradboosted_trees_regression_sklearn.py","templateName":"model_gradboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:extreme_gradboosted_trees_regression:sklearn":{"input":[{"name":"model_extreme_gradboosted_trees_regression_sklearn.py","templateName":"model_extreme_gradboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:k_means_clustering:sklearn":{"input":[{"name":"model_k_means_clustering_sklearn.py","templateName":"model_k_means_clustering_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:kernel_ridge_regression:sklearn":{"input":[{"name":"model_kernel_ridge_regression_sklearn.py","templateName":"model_kernel_ridge_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:lasso_regression:sklearn":{"input":[{"name":"model_lasso_regression_sklearn.py","templateName":"model_lasso_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:multilayer_perceptron:sklearn":{"input":[{"name":"model_mlp_sklearn.py","templateName":"model_mlp_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:random_forest_classification:sklearn":{"input":[{"name":"model_random_forest_classification_sklearn.py","templateName":"model_random_forest_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:gradboosted_trees_classification:sklearn":{"input":[{"name":"model_gradboosted_trees_classification_sklearn.py","templateName":"model_gradboosted_trees_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:extreme_gradboosted_trees_classification:sklearn":{"input":[{"name":"model_extreme_gradboosted_trees_classification_sklearn.py","templateName":"model_extreme_gradboosted_trees_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:random_forest_regression:sklearn":{"input":[{"name":"model_random_forest_regression_sklearn.py","templateName":"model_random_forest_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:ridge_regression:sklearn":{"input":[{"name":"model_ridge_regression_sklearn.py","templateName":"model_ridge_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:parity_plot:matplotlib":{"input":[{"name":"post_processing_parity_plot_matplotlib.py","templateName":"post_processing_parity_plot_matplotlib.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:pca_2d_clusters:matplotlib":{"input":[{"name":"post_processing_pca_2d_clusters_matplotlib.py","templateName":"post_processing_pca_2d_clusters_matplotlib.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:roc_curve:sklearn":{"input":[{"name":"post_processing_roc_curve_sklearn.py","templateName":"post_processing_roc_curve_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"}}}},"shell":{"sh":{"isDefault":true,"monitors":["standard_output"],"results":["atomic_forces","band_gaps","band_structure","density_of_states","fermi_energy","phonon_dispersions","phonon_dos","pressure","stress_tensor","total_energy","total_energy_contributions","total_force","zero_point_energy","final_structure","magnetic_moments","reaction_energy_barrier","reaction_energy_profile","potential_profile","charge_density_profile"],"flavors":{"hello_world":{"isDefault":true,"input":[{"name":"hello_world.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"job_espresso_pw_scf":{"input":[{"name":"job_espresso_pw_scf.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"espresso_link_outdir_save":{"input":[{"name":"espresso_link_outdir_save.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"espresso_collect_dynmat":{"input":[{"name":"espresso_collect_dynmat.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"bash_vasp_prepare_neb_images":{"isMultiMaterial":true,"input":[{"name":"bash_vasp_prepare_neb_images.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"}}}},"vasp":{"vasp":{"isDefault":true,"postProcessors":["error_handler","prepare_restart","remove_non_zero_weight_kpoints"],"monitors":["standard_output","convergence_ionic","convergence_electronic"],"results":["atomic_forces","band_gaps","band_structure","density_of_states","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force","zero_point_energy","final_structure","magnetic_moments","reaction_energy_barrier","reaction_energy_profile","potential_profile","charge_density_profile"],"flavors":{"vasp":{"isDefault":true,"input":[{"name":"INCAR"},{"name":"KPOINTS"},{"name":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_bands":{"input":[{"name":"INCAR","templateName":"INCAR_BANDS"},{"name":"KPOINTS","templateName":"KPOINTS_BANDS"},{"name":"POSCAR","templateName":""}],"results":["band_structure"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_nscf":{"input":[{"name":"INCAR","templateName":"INCAR_BANDS"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["band_gaps","fermi_energy"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_HSE"},{"name":"KPOINTS"},{"name":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_bands_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_BANDS_HSE"},{"name":"KPOINTS","templateName":"KPOINTS_BANDS"},{"name":"POSCAR","templateName":""}],"results":["band_structure"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_nscf_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_BANDS_HSE"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["band_gaps","fermi_energy"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_relax":{"input":[{"name":"INCAR","templateName":"INCAR_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","atomic_forces","fermi_energy","pressure","stress_tensor","total_force","final_structure"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"postProcessors":["prepare_restart"],"applicationName":"vasp","executableName":"vasp"},"vasp_vc_relax":{"input":[{"name":"INCAR","templateName":"INCAR_VC_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","atomic_forces","fermi_energy","pressure","stress_tensor","total_force","final_structure"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"postProcessors":["prepare_restart"],"applicationName":"vasp","executableName":"vasp"},"vasp_zpe":{"input":[{"name":"INCAR","templateName":"INCAR_ZPE"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","fermi_energy","pressure","atomic_forces","stress_tensor","total_force","zero_point_energy"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"applicationName":"vasp","executableName":"vasp"},"vasp_kpt_conv":{"input":[{"name":"INCAR","templateName":"INCAR"},{"name":"KPOINTS","templateName":"KPOINTS_CONV"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_vc_relax_conv":{"input":[{"name":"INCAR","templateName":"INCAR_VC_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS_CONV"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB"},{"name":"KPOINTS","templateName":"KPOINTS"}],"results":["reaction_energy_barrier","reaction_energy_profile"],"monitors":["standard_output"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb_initial":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB_INITIAL_FINAL"},{"name":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR_NEB_INITIAL"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb_final":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB_INITIAL_FINAL"},{"name":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR_NEB_FINAL"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"}}}}}} +module.exports = {applicationTree: {"deepmd":{"dp":{"isDefault":false,"monitors":["standard_output"],"results":[],"flavors":{"dp_train_se_e2_r":{"input":[{"name":"dp_train_se_e2_r.json"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"dp"}}},"lmp":{"isDefault":false,"monitors":["standard_output"],"results":[],"flavors":{"lammps":{"input":[{"name":"in.lammps"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"lmp"}}},"python":{"isDefault":true,"monitors":["standard_output"],"results":[],"flavors":{"qe_cp_to_deepmd":{"isDefault":true,"input":[{"name":"qe_cp_to_deepmd.py"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"python"},"qe_to_lmp_structure":{"isDefault":false,"input":[{"name":"qe_to_lmp_structure.py"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"python"}}}},"espresso":{"average.x":{"monitors":["standard_output"],"results":["average_potential_profile"],"flavors":{"average":{"input":[{"name":"average.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"average.x"},"average_potential":{"input":[{"name":"average.in"}],"results":["average_potential_profile"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"average.x"}}},"bands.x":{"monitors":["standard_output"],"results":["band_structure"],"flavors":{"bands":{"input":[{"name":"bands.in"}],"results":["band_structure"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"bands.x"}}},"cp.x":{"monitors":["standard_output"],"results":[],"flavors":{"cp":{"input":[{"name":"cp.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"cp.x"},"cp_wf":{"input":[{"name":"cp_wf.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"cp.x"}}},"dos.x":{"monitors":["standard_output"],"results":["density_of_states"],"flavors":{"dos":{"input":[{"name":"dos.in"}],"results":["density_of_states"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"dos.x"}}},"dynmat.x":{"monitors":["standard_output"],"results":[],"flavors":{"dynmat":{"input":[{"name":"dynmat_grid.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"dynmat.x"}}},"epsilon.x":{"monitors":["standard_output"],"results":["dielectric_tensor"],"flavors":{"dielectric_tensor":{"input":[{"name":"epsilon.in"}],"results":["dielectric_tensor"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"epsilon.x"}}},"gw.x":{"monitors":["standard_output"],"results":["band_structure","fermi_energy","band_gaps"],"flavors":{"gw_bands_plasmon_pole":{"input":[{"name":"gw_bands_plasmon_pole.in"}],"results":["band_structure","fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"gw.x"},"gw_bands_full_frequency":{"input":[{"name":"gw_bands_full_frequency.in"}],"results":["band_structure","fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"gw.x"}}},"hp.x":{"monitors":["standard_output"],"results":["hubbard_u","hubbard_v_nn","hubbard_v"],"flavors":{"hp":{"input":[{"name":"hp.in"}],"results":["hubbard_u","hubbard_v_nn","hubbard_v"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"hp.x"}},"supportedApplicationVersions":["7.0","7.2"]},"matdyn.x":{"monitors":["standard_output"],"results":["phonon_dos","phonon_dispersions"],"flavors":{"matdyn_grid":{"input":[{"name":"matdyn_grid.in"}],"monitors":["standard_output"],"results":["phonon_dos"],"applicationName":"espresso","executableName":"matdyn.x"},"matdyn_path":{"input":[{"name":"matdyn_path.in"}],"monitors":["standard_output"],"results":["phonon_dispersions"],"applicationName":"espresso","executableName":"matdyn.x"}}},"neb.x":{"monitors":["standard_output"],"results":["reaction_energy_barrier","reaction_energy_profile"],"flavors":{"neb":{"isMultiMaterial":true,"input":[{"name":"neb.in"}],"results":["reaction_energy_barrier","reaction_energy_profile"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"neb.x"}}},"ph.x":{"monitors":["standard_output"],"results":["phonon_dos","phonon_dispersions","zero_point_energy"],"flavors":{"ph_path":{"input":[{"name":"ph_path.in"}],"results":["phonon_dispersions"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_grid":{"input":[{"name":"ph_grid.in"}],"results":["phonon_dos"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_gamma":{"input":[{"name":"ph_gamma.in"}],"results":["zero_point_energy"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_init_qpoints":{"input":[{"name":"ph_init_qpoints.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_grid_restart":{"input":[{"name":"ph_grid_restart.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_single_irr_qpt":{"input":[{"name":"ph_single_irr_qpt.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"}}},"pp.x":{"monitors":["standard_output"],"results":[],"flavors":{"pp_density":{"input":[{"name":"pp_density.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pp.x"},"pp_electrostatic_potential":{"input":[{"name":"pp_electrostatic_potential.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pp.x"}}},"projwfc.x":{"monitors":["standard_output"],"results":["density_of_states"],"flavors":{"projwfc":{"input":[{"name":"projwfc.in"}],"results":["density_of_states"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"projwfc.x"}}},"pw.x":{"isDefault":true,"hasAdvancedComputeOptions":true,"postProcessors":["remove_non_zero_weight_kpoints"],"monitors":["standard_output","convergence_ionic","convergence_electronic"],"results":["atomic_forces","band_gaps","charge_density_profile","density_of_states","fermi_energy","final_structure","magnetic_moments","potential_profile","pressure","reaction_energy_barrier","reaction_energy_profile","stress_tensor","total_energy","total_energy_contributions","total_force"],"flavors":{"pw_scf":{"isDefault":true,"input":[{"name":"pw_scf.in"}],"results":["atomic_forces","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_bands_hse":{"input":[{"name":"pw_scf_bands_hse.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_hse":{"input":[{"name":"pw_scf_hse.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_dft_u":{"input":[{"name":"pw_scf_dft_u.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u+v":{"input":[{"name":"pw_scf_dft_v.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u+j":{"input":[{"name":"pw_scf_dft_j.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u_legacy":{"input":[{"name":"pw_scf_dft_u_legacy.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["5.2.1","5.4.0","6.0.0","6.3","6.4.1","6.5.0","6.6.0","6.7.0","6.8.0","7.0"]},"pw_esm":{"input":[{"name":"pw_esm.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor","potential_profile","charge_density_profile"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_esm_relax":{"input":[{"name":"pw_esm_relax.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor","potential_profile","charge_density_profile"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_nscf":{"input":[{"name":"pw_nscf.in"}],"results":["fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pw.x"},"pw_bands":{"input":[{"name":"pw_bands.in"}],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pw.x"},"pw_relax":{"input":[{"name":"pw_relax.in"}],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor","final_structure"],"applicationName":"espresso","executableName":"pw.x"},"pw_vc-relax":{"input":[{"name":"pw_vc_relax.in"}],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor","final_structure"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_kpt_conv":{"input":[{"name":"pw_scf_kpt_conv.in"}],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"}}},"q2r.x":{"monitors":["standard_output"],"results":[],"flavors":{"q2r":{"input":[{"name":"q2r.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"q2r.x"}}}},"jupyterLab":{"jupyter":{"isDefault":true,"monitors":["standard_output","jupyter_notebook_endpoint"],"results":[],"flavors":{"notebook":{"isDefault":true,"input":[{"name":"requirements.txt","templateName":"requirements.txt"}],"monitors":["standard_output","jupyter_notebook_endpoint"],"applicationName":"jupyterLab","executableName":"jupyter"}}}},"exabyteml":{"score":{"isDefault":false,"monitors":["standard_output"],"results":["predicted_properties"],"flavors":{"score":{"isDefault":true,"input":[],"monitors":["standard_output"],"applicationName":"exabyteml","executableName":"score"}}},"train":{"isDefault":true,"monitors":["standard_output"],"results":["workflow:ml_predict"],"flavors":{"train":{"isDefault":true,"input":[],"monitors":["standard_output"],"applicationName":"exabyteml","executableName":"train"}}}},"nwchem":{"nwchem":{"isDefault":true,"hasAdvancedComputeOptions":false,"postProcessors":["error_handler"],"monitors":["standard_output"],"results":["total_energy","total_energy_contributions"],"flavors":{"nwchem_total_energy":{"isDefault":true,"input":[{"name":"nwchem_total_energy.inp"}],"results":["total_energy","total_energy_contributions"],"monitors":["standard_output"],"applicationName":"nwchem","executableName":"nwchem"}}}},"python":{"python":{"isDefault":true,"monitors":["standard_output"],"results":["file_content","workflow:pyml_predict"],"flavors":{"hello_world":{"isDefault":true,"input":[{"name":"script.py","templateName":"hello_world.py"},{"name":"requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"espresso_xml_get_qpt_irr":{"input":[{"name":"espresso_xml_get_qpt_irr.py"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"espresso_extract_kpoints":{"input":[{"name":"espresso_extract_kpoints.py"},{"name":"requirements.txt","templateName":"requirements_empty.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"generic:post_processing:plot:matplotlib":{"input":[{"name":"plot.py","templateName":"matplotlib_basic.py"},{"name":"requirements.txt","templateName":"processing_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"generic:processing:find_extrema:scipy":{"input":[{"name":"find_extrema.py","templateName":"find_extrema.py"},{"name":"requirements.txt","templateName":"processing_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:setup_variables_packages":{"input":[{"name":"settings.py","templateName":"pyml_settings.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:custom":{"input":[{"name":"pyml_custom.py","templateName":"pyml_custom.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:data_input:read_csv:pandas":{"input":[{"name":"data_input_read_csv_pandas.py","templateName":"data_input_read_csv_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:data_input:train_test_split:sklearn":{"input":[{"name":"data_input_train_test_split_sklearn.py","templateName":"data_input_train_test_split_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:min_max_scaler:sklearn":{"input":[{"name":"pre_processing_min_max_sklearn.py","templateName":"pre_processing_min_max_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:remove_duplicates:pandas":{"input":[{"name":"pre_processing_remove_duplicates_pandas.py","templateName":"pre_processing_remove_duplicates_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:remove_missing:pandas":{"input":[{"name":"pre_processing_remove_missing_pandas.py","templateName":"pre_processing_remove_missing_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:standardization:sklearn":{"input":[{"name":"pre_processing_standardization_sklearn.py","templateName":"pre_processing_standardization_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:adaboosted_trees_regression:sklearn":{"input":[{"name":"model_adaboosted_trees_regression_sklearn.py","templateName":"model_adaboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"results":["workflow:pyml_predict"],"applicationName":"python","executableName":"python"},"pyml:model:bagged_trees_regression:sklearn":{"input":[{"name":"model_bagged_trees_regression_sklearn.py","templateName":"model_bagged_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:gradboosted_trees_regression:sklearn":{"input":[{"name":"model_gradboosted_trees_regression_sklearn.py","templateName":"model_gradboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:extreme_gradboosted_trees_regression:sklearn":{"input":[{"name":"model_extreme_gradboosted_trees_regression_sklearn.py","templateName":"model_extreme_gradboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:k_means_clustering:sklearn":{"input":[{"name":"model_k_means_clustering_sklearn.py","templateName":"model_k_means_clustering_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:kernel_ridge_regression:sklearn":{"input":[{"name":"model_kernel_ridge_regression_sklearn.py","templateName":"model_kernel_ridge_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:lasso_regression:sklearn":{"input":[{"name":"model_lasso_regression_sklearn.py","templateName":"model_lasso_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:multilayer_perceptron:sklearn":{"input":[{"name":"model_mlp_sklearn.py","templateName":"model_mlp_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:random_forest_classification:sklearn":{"input":[{"name":"model_random_forest_classification_sklearn.py","templateName":"model_random_forest_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:gradboosted_trees_classification:sklearn":{"input":[{"name":"model_gradboosted_trees_classification_sklearn.py","templateName":"model_gradboosted_trees_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:extreme_gradboosted_trees_classification:sklearn":{"input":[{"name":"model_extreme_gradboosted_trees_classification_sklearn.py","templateName":"model_extreme_gradboosted_trees_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:random_forest_regression:sklearn":{"input":[{"name":"model_random_forest_regression_sklearn.py","templateName":"model_random_forest_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:ridge_regression:sklearn":{"input":[{"name":"model_ridge_regression_sklearn.py","templateName":"model_ridge_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:parity_plot:matplotlib":{"input":[{"name":"post_processing_parity_plot_matplotlib.py","templateName":"post_processing_parity_plot_matplotlib.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:pca_2d_clusters:matplotlib":{"input":[{"name":"post_processing_pca_2d_clusters_matplotlib.py","templateName":"post_processing_pca_2d_clusters_matplotlib.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:roc_curve:sklearn":{"input":[{"name":"post_processing_roc_curve_sklearn.py","templateName":"post_processing_roc_curve_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"}}}},"shell":{"sh":{"isDefault":true,"monitors":["standard_output"],"results":["atomic_forces","band_gaps","band_structure","density_of_states","fermi_energy","phonon_dispersions","phonon_dos","pressure","stress_tensor","total_energy","total_energy_contributions","total_force","zero_point_energy","final_structure","magnetic_moments","reaction_energy_barrier","reaction_energy_profile","potential_profile","charge_density_profile"],"flavors":{"hello_world":{"isDefault":true,"input":[{"name":"hello_world.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"job_espresso_pw_scf":{"input":[{"name":"job_espresso_pw_scf.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"espresso_link_outdir_save":{"input":[{"name":"espresso_link_outdir_save.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"espresso_collect_dynmat":{"input":[{"name":"espresso_collect_dynmat.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"bash_vasp_prepare_neb_images":{"isMultiMaterial":true,"input":[{"name":"bash_vasp_prepare_neb_images.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"}}}},"vasp":{"vasp":{"isDefault":true,"postProcessors":["error_handler","prepare_restart","remove_non_zero_weight_kpoints"],"monitors":["standard_output","convergence_ionic","convergence_electronic"],"results":["atomic_forces","band_gaps","band_structure","density_of_states","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force","zero_point_energy","final_structure","magnetic_moments","reaction_energy_barrier","reaction_energy_profile","potential_profile","charge_density_profile"],"flavors":{"vasp":{"isDefault":true,"input":[{"name":"INCAR"},{"name":"KPOINTS"},{"name":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_bands":{"input":[{"name":"INCAR","templateName":"INCAR_BANDS"},{"name":"KPOINTS","templateName":"KPOINTS_BANDS"},{"name":"POSCAR","templateName":""}],"results":["band_structure"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_nscf":{"input":[{"name":"INCAR","templateName":"INCAR_BANDS"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["band_gaps","fermi_energy"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_HSE"},{"name":"KPOINTS"},{"name":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_bands_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_BANDS_HSE"},{"name":"KPOINTS","templateName":"KPOINTS_BANDS"},{"name":"POSCAR","templateName":""}],"results":["band_structure"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_nscf_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_BANDS_HSE"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["band_gaps","fermi_energy"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_relax":{"input":[{"name":"INCAR","templateName":"INCAR_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","atomic_forces","fermi_energy","pressure","stress_tensor","total_force","final_structure"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"postProcessors":["prepare_restart"],"applicationName":"vasp","executableName":"vasp"},"vasp_vc_relax":{"input":[{"name":"INCAR","templateName":"INCAR_VC_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","atomic_forces","fermi_energy","pressure","stress_tensor","total_force","final_structure"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"postProcessors":["prepare_restart"],"applicationName":"vasp","executableName":"vasp"},"vasp_zpe":{"input":[{"name":"INCAR","templateName":"INCAR_ZPE"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","fermi_energy","pressure","atomic_forces","stress_tensor","total_force","zero_point_energy"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"applicationName":"vasp","executableName":"vasp"},"vasp_kpt_conv":{"input":[{"name":"INCAR","templateName":"INCAR"},{"name":"KPOINTS","templateName":"KPOINTS_CONV"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_vc_relax_conv":{"input":[{"name":"INCAR","templateName":"INCAR_VC_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS_CONV"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB"},{"name":"KPOINTS","templateName":"KPOINTS"}],"results":["reaction_energy_barrier","reaction_energy_profile"],"monitors":["standard_output"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb_initial":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB_INITIAL_FINAL"},{"name":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR_NEB_INITIAL"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb_final":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB_INITIAL_FINAL"},{"name":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR_NEB_FINAL"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"}}}}}} From e0c89693d84150932f61ba5ab96e7260d86e028f Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Sat, 20 Jan 2024 09:50:38 +0800 Subject: [PATCH 16/20] SOF-7194: set orthogonalization method and ortho_eps in cp template --- assets/espresso/cp.j2.in | 2 ++ src/js/data/templates.js | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/assets/espresso/cp.j2.in b/assets/espresso/cp.j2.in index 0068d9be..d0e50787 100644 --- a/assets/espresso/cp.j2.in +++ b/assets/espresso/cp.j2.in @@ -27,6 +27,8 @@ electron_dynamics = 'verlet' electron_velocities = 'zero' emass = {{ dynamics.electronMass }} + orthogonalization = 'ortho' + ortho_eps = 1d-11 / &IONS ion_dynamics = 'verlet' diff --git a/src/js/data/templates.js b/src/js/data/templates.js index 92352a97..9a28d076 100644 --- a/src/js/data/templates.js +++ b/src/js/data/templates.js @@ -1,2 +1,2 @@ /* eslint-disable */ -module.exports = {allTemplates: [{"content":"import dpdata\n\n# load cp data files\n# https://docs.deepmodeling.com/projects/dpdata/en/master/formats/QECPTrajFormat.html\nsystem = dpdata.LabeledSystem(\"cp\", fmt=\"qe/cp/traj\")\n\n# convert dpdata to lammps format\n# below procedure will convert input QE structure to lammps format, user may\n# want to generate supercell or other complex structure for lammps calculation\nsystem.to_lmp(\"system.lmp\")\n","name":"qe_to_lmp_structure.py","contextProviders":[],"applicationName":"deepmd","executableName":"python"},{"content":"# LAMMPS input, deepmd-kit version. Built from bulk water calculation.\n\nunits metal\nboundary p p p\natom_style atomic\n\nneighbor 2.0 bin\nneigh_modify every 10 delay 0 check no\n\nread_data system.lmp\nmass 1 16\nmass 2 2\n\npair_style\t deepmd ./graph.pb\npair_coeff * *\n\nvelocity all create 330.0 23456789\n\nfix 1 all nvt temp 330.0 330.0 0.5\ntimestep 0.0005\nthermo_style custom step pe ke etotal temp press vol\nthermo 100\ndump 1 all custom 100 system.dump id type x y z\n\nrun 100\n","name":"in.lammps","contextProviders":[],"applicationName":"deepmd","executableName":"lmp"},{"content":"import dpdata\nimport numpy as np\n\n# https://docs.deepmodeling.com/projects/dpdata/en/master/formats/QECPTrajFormat.html\ndata = dpdata.LabeledSystem(\"cp\", fmt=\"qe/cp/traj\")\nprint(\"Dataset contains total {0} frames\".format(len(data)))\n\n# randomly choose 20% index for validation_data\nsize = len(data)\nsize_validation = round(size * 0.2)\n\nindex_validation = np.random.choice(size, size=size_validation, replace=False)\nindex_training = list(set(range(size)) - set(index_validation))\n\ndata_training = data.sub_system(index_training)\ndata_validation = data.sub_system(index_validation)\n\nprint(\"Using {0} frames as training set\".format(len(data_training)))\nprint(\"Using {0} frames as validation set\".format(len(data_validation)))\n\n# save training and validation sets\ndata_training.to_deepmd_npy(\"./training\")\ndata_validation.to_deepmd_npy(\"./validation\")\n","name":"qe_cp_to_deepmd.py","contextProviders":[],"applicationName":"deepmd","executableName":"python"},{"content":"{\n \"model\": {\n \"type_map\": [\n \"O\",\n \"H\"\n ],\n \"descriptor\": {\n \"type\": \"se_e2_r\",\n \"sel\": [\n 23,\n 46\n ],\n \"rcut_smth\": 0.50,\n \"rcut\": 5.00,\n \"neuron\": [\n 5,\n 5,\n 5\n ],\n \"resnet_dt\": false,\n \"seed\": 1\n },\n \"fitting_net\": {\n \"neuron\": [\n 60,\n 60,\n 60\n ],\n \"resnet_dt\": true,\n \"seed\": 1\n }\n },\n \"learning_rate\": {\n \"type\": \"exp\",\n \"decay_steps\": 1000,\n \"start_lr\": 0.005,\n \"stop_lr\": 3.51e-6\n },\n \"loss\": {\n \"start_pref_e\": 0.02,\n \"limit_pref_e\": 1,\n \"start_pref_f\": 1000,\n \"limit_pref_f\": 1,\n \"start_pref_v\": 0,\n \"limit_pref_v\": 0\n },\n \"training\": {\n \"training_data\": {\n \"systems\": [\n \"./training/\"\n ],\n \"batch_size\": \"auto\"\n },\n \"validation_data\": {\n \"systems\": [\n \"./validation/\"\n ],\n \"batch_size\": \"auto\"\n },\n \"numb_steps\": 301,\n \"seed\": 1,\n \"disp_file\": \"lcurve.out\",\n \"disp_freq\": 10,\n \"numb_test\": 1,\n \"save_freq\": 100\n }\n}\n","name":"dp_train_se_e2_r.json","contextProviders":[],"applicationName":"deepmd","executableName":"dp"},{"content":"1\npp.dat\n1.0\n3000\n3\n3.0000\n","name":"average.in","contextProviders":[],"applicationName":"espresso","executableName":"average.x"},{"content":"&BANDS\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n filband = {% raw %}'{{ JOB_WORK_DIR }}/bands.dat'{% endraw %}\n no_overlap = .true.\n/\n\n","name":"bands.in","contextProviders":[],"applicationName":"espresso","executableName":"bands.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'cp'\n restart_mode = '{{ input.RESTART_MODE }}'\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}'{% endraw %}\n prefix = 'cp'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n nstep = {{ dynamics.numberOfSteps }}\n iprint = 1\n dt = {{ dynamics.timeStep }}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n nr1b = 20\n nr2b = 20\n nr3b = 20\n/\n&ELECTRONS\n electron_dynamics = 'verlet'\n electron_velocities = 'zero'\n emass = {{ dynamics.electronMass }}\n/\n&IONS\n ion_dynamics = 'verlet'\n ion_velocities = 'zero'\n tempw = {{ dynamics.temperature }}\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\n","name":"cp.in","contextProviders":[{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"IonDynamicsContextProvider"}],"applicationName":"espresso","executableName":"cp.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'cp-wf'\n restart_mode = '{{ input.RESTART_MODE }}'\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}'{% endraw %}\n prefix = 'cp_wf'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n nstep = {{ dynamics.numberOfSteps }}\n iprint = 1\n dt = {{ dynamics.timeStep }}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n! TODO: figure out the runtime error when `ecutrho` is set. \n! In the meantime, using Norm-conserving pseudopotentials works.\n! ecutrho = {{ cutoffs.density }}\n nr1b = 20\n nr2b = 20\n nr3b = 20\n/\n&ELECTRONS\n electron_dynamics = 'verlet'\n electron_velocities = 'zero'\n emass = {{ dynamics.electronMass }}\n/\n&IONS\n ion_dynamics = 'verlet'\n ion_velocities = 'zero'\n tempw = {{ dynamics.temperature }}\n/\n&CELL\n/\n&WANNIER\n nit = 60,\n calwf = 3,\n tolw = 1.D-6,\n nsteps = 50,\n adapt = .FALSE.\n wfdt = 2.0D0,\n wf_q = 500.D0,\n wf_friction = 0.3d0,\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\n","name":"cp_wf.in","contextProviders":[{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"IonDynamicsContextProvider"}],"applicationName":"espresso","executableName":"cp.x"},{"content":"&DOS\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n degauss = 0.01\n/\n\n","name":"dos.in","contextProviders":[],"applicationName":"espresso","executableName":"dos.x"},{"content":"&INPUT\n asr = 'simple'\n flfrc = 'force_constants.fc'\n flfrq = 'frequencies.freq'\n dos = .true.\n fldos = 'phonon_dos.out'\n deltaE = 1.d0\n {% for d in igrid.dimensions -%}\n nk{{loop.index}} = {{d}}\n {% endfor %}\n /\n","name":"dynmat_grid.in","contextProviders":[{"name":"IGridFormDataManager"}],"applicationName":"espresso","executableName":"dynmat.x"},{"content":"&inputpp\n calculation = \"eps\"\n prefix = \"__prefix__\"\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n/\n\n&energy_grid\n smeartype = \"gauss\"\n intersmear = 0.2\n intrasmear = 0.0\n wmin = 0.0\n wmax = 30.0\n nw = 500\n shift = 0.0\n/\n\n","name":"epsilon.in","contextProviders":[],"applicationName":"espresso","executableName":"epsilon.x"},{"content":"&gw_input\n\n ! see http://www.sternheimergw.org for more information.\n\n ! config of the scf run\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n\n ! the grid used for the linear response\n kpt_grid = {{ kgrid.dimensions|join(', ') }}\n qpt_grid = {{ qgrid.dimensions|join(', ') }}\n\n ! truncation (used for both correlation and exchange)\n truncation = '2d'\n\n ! number of bands for which the GW correction is calculated\n num_band = 8\n\n ! configuration of the Coulomb solver\n thres_coul = 1.0d-2\n\n ! configuration of W in the convolution\n model_coul = 'godby-needs'\n max_freq_coul = 120\n num_freq_coul = 35\n\n ! configuration of the Green solver\n thres_green = 1.0d-3\n max_iter_green = 300\n\n ! configuration for the correlation self energy\n ecut_corr = 5.0\n max_freq_corr = 100.0\n num_freq_corr = 11\n\n ! configuration for the exchange self energy\n ecut_exch = 20.0\n\n ! configuration for the output\n eta = 0.1\n min_freq_wind = -30.0\n max_freq_wind = 30.0\n num_freq_wind = 601\n/\n\n&gw_output\n/\n\nFREQUENCIES\n2\n 0.0 0.0\n 0.0 10.0\n/\n\nK_points\n{{ explicitKPath2PIBA.length }}\n{% for point in explicitKPath2PIBA -%}\n{% for coordinate in point.coordinates %}{{ coordinate }}{% endfor %}\n{% endfor %}\n/\n\n","name":"gw_bands_plasmon_pole.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QGridFormDataManager"},{"name":"ExplicitKPath2PIBAFormDataManager"}],"applicationName":"espresso","executableName":"gw.x"},{"content":"&gw_input\n\n ! see http://www.sternheimergw.org for more information.\n\n ! config of the scf run\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n\n ! the grid used for the linear response\n kpt_grid = {{ kgrid.dimensions|join(', ') }}\n qpt_grid = {{ qgrid.dimensions|join(', ') }}\n\n ! number of bands for which the GW correction is calculated\n num_band = 8\n\n ! configuration of W in the convolution\n max_freq_coul = 200\n num_freq_coul = 51\n\n ! configuration for the correlation self energy\n ecut_corr = 6.0\n\n ! configuration for the exchange self energy\n ecut_exch = 15.0\n/\n\n&gw_output\n/\n\nFREQUENCIES\n35\n 0.0 0.0\n 0.0 0.3\n 0.0 0.9\n 0.0 1.8\n 0.0 3.0\n 0.0 4.5\n 0.0 6.3\n 0.0 8.4\n 0.0 10.8\n 0.0 13.5\n 0.0 16.5\n 0.0 19.8\n 0.0 23.4\n 0.0 27.3\n 0.0 31.5\n 0.0 36.0\n 0.0 40.8\n 0.0 45.9\n 0.0 51.3\n 0.0 57.0\n 0.0 63.0\n 0.0 69.3\n 0.0 75.9\n 0.0 82.8\n 0.0 90.0\n 0.0 97.5\n 0.0 105.3\n 0.0 113.4\n 0.0 121.8\n 0.0 130.5\n 0.0 139.5\n 0.0 148.8\n 0.0 158.4\n 0.0 168.3\n 0.0 178.5\n/\n\nK_points\n{{ explicitKPath2PIBA.length }}\n{% for point in explicitKPath2PIBA -%}\n{% for coordinate in point.coordinates %}{{ coordinate }}{% endfor %}\n{% endfor %}\n/\n\n","name":"gw_bands_full_frequency.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QGridFormDataManager"},{"name":"ExplicitKPath2PIBAFormDataManager"}],"applicationName":"espresso","executableName":"gw.x"},{"content":"&inputhp\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n {%- for d in qgrid.dimensions %}\n nq{{ loop.index }} = {{ d }}\n {%- endfor %}\n/\n","name":"hp.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"hp.x"},{"content":"&INPUT\n asr = 'simple'\n flfrc = 'force_constants.fc'\n flfrq = 'frequencies.freq'\n dos = .true.\n fldos = 'phonon_dos.out'\n deltaE = 1.d0\n {% for d in igrid.dimensions -%}\n nk{{loop.index}} = {{d}}\n {% endfor %}\n /\n","name":"matdyn_grid.in","contextProviders":[{"name":"IGridFormDataManager"}],"applicationName":"espresso","executableName":"matdyn.x"},{"content":"&INPUT\n asr = 'simple'\n flfrc ='force_constants.fc'\n flfrq ='frequencies.freq'\n flvec ='normal_modes.out'\n q_in_band_form = .true.\n /\n{{ipath.length}}\n{% for point in ipath -%}\n{% for d in point.coordinates %}{{d}} {% endfor -%}{{point.steps}}\n{% endfor %}\n","name":"matdyn_path.in","contextProviders":[{"name":"IPathFormDataManager"}],"applicationName":"espresso","executableName":"matdyn.x"},{"content":"BEGIN\nBEGIN_PATH_INPUT\n&PATH\n restart_mode = 'from_scratch'\n string_method = 'neb',\n nstep_path = 50,\n ds = 2.D0,\n opt_scheme = \"broyden\",\n num_of_images = {{ 2 + (input.INTERMEDIATE_IMAGES.length || neb.nImages) }},\n k_max = 0.3D0,\n k_min = 0.2D0,\n CI_scheme = \"auto\",\n path_thr = 0.1D0,\n/\nEND_PATH_INPUT\nBEGIN_ENGINE_INPUT\n&CONTROL\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.03\n nspin = 2\n starting_magnetization = 0.5\n/\n&ELECTRONS\n conv_thr = 1.D-8\n mixing_beta = 0.3\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nBEGIN_POSITIONS\nFIRST_IMAGE\nATOMIC_POSITIONS crystal\n{{ input.FIRST_IMAGE }}\n{%- for IMAGE in input.INTERMEDIATE_IMAGES %}\nINTERMEDIATE_IMAGE\nATOMIC_POSITIONS crystal\n{{ IMAGE }}\n{%- endfor %}\nLAST_IMAGE\nATOMIC_POSITIONS crystal\n{{ input.LAST_IMAGE }}\nEND_POSITIONS\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nEND_ENGINE_INPUT\nEND\n","name":"neb.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"NEBFormDataManager"},{"name":"QENEBInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"neb.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-12\n asr = .true.\n search_sym = .false.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n fildyn = 'dyn'\n ldisp = .true.\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_grid.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-12\n asr = .true.\n search_sym = .false.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n/\n{% for point in qpath -%}\n{% for d in point.coordinates %}{{d}} {% endfor %}\n{% endfor %}\n","name":"ph_path.in","contextProviders":[{"name":"QPathFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-12\n asr = .true.\n search_sym = .false.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n/\n0 0 0\n","name":"ph_gamma.in","contextProviders":[],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-18,\n recover = .false.\n start_irr = 0\n last_irr = 0\n ldisp = .true.\n fildyn = 'dyn0'\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_init_qpoints.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-18,\n recover = .true.\n ldisp = .true.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n fildyn = 'dyn'\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_grid_restart.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-18\n ldisp = .true.\n {% raw -%}\n start_q = {{MAP_DATA.qpoint}}\n last_q = {{MAP_DATA.qpoint}}\n start_irr = {{MAP_DATA.irr}}\n last_irr= {{MAP_DATA.irr}}\n {%- endraw %}\n recover = .true.\n fildyn = 'dyn'\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_SCRATCH_DIR }}/outdir'{% endraw %}\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_single_irr_qpt.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPP\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n filplot = 'pp.dat'\n plot_num = 0\n/\n&PLOT\n iflag = 3\n output_format = 5\n fileout ='density.xsf'\n/\n\n","name":"pp_density.in","contextProviders":[],"applicationName":"espresso","executableName":"pp.x"},{"content":"&INPUTPP\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n filplot = 'pp.dat'\n plot_num = 11\n/\n","name":"pp_electrostatic_potential.in","contextProviders":[],"applicationName":"espresso","executableName":"pp.x"},{"content":"&PROJWFC\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n degauss = 0.01\n deltaE = 0.05\n/\n","name":"projwfc.in","contextProviders":[],"applicationName":"espresso","executableName":"projwfc.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_scf.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n input_dft = 'hse',\n {% for d in qgrid.dimensions -%}\n nqx{{loop.index}} = {{d}}\n {% endfor %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS crystal\n{{ '{{' }} {{ explicitKPath.length }} {% raw %} + KPOINTS|length }} {% endraw %}\n{%- raw %}\n{% for point in KPOINTS -%}\n {% for d in point.coordinates %}{{ \"%14.9f\"|format(d) }} {% endfor -%}{{ point.weight }}\n{% endfor %}\n{% endraw -%}\n{% for point in explicitKPath -%}\n{% for d in point.coordinates %}{{d}} {% endfor -%}0.0000001\n{% endfor %}\n","name":"pw_scf_bands_hse.in","contextProviders":[{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"QGridFormDataManager"},{"name":"ExplicitKPathFormDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n assume_isolated = 'esm'\n esm_bc = '{{ boundaryConditions.type }}'\n fcp_mu = {{ boundaryConditions.targetFermiEnergy }}\n esm_w = {{ boundaryConditions.offset }}\n esm_efield = {{ boundaryConditions.electricField }}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_esm.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"BoundaryConditionsFormDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'relax'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n assume_isolated = 'esm'\n esm_bc = '{{ boundaryConditions.type }}'\n fcp_mu = {{ boundaryConditions.targetFermiEnergy }}\n esm_w = {{ boundaryConditions.offset }}\n esm_efield = {{ boundaryConditions.electricField }}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_esm_relax.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"BoundaryConditionsFormDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% raw %}{{PARAMETER | default('1')}} {{PARAMETER | default('1')}} {{PARAMETER | default('1')}} 0 0 0{% endraw %}\n","name":"pw_scf_kpt_conv.in","contextProviders":[{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'nscf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{input.RESTART_MODE}}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n{%- if subworkflowContext.NO_SYMMETRY_NO_INVERSION %}\n nosym = .true.\n noinv = .true.\n{%- endif %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_nscf.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'relax'\n nstep = 50\n title = ''\n verbosity = 'low'\n restart_mode = 'from_scratch'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_relax.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'vc-relax'\n title = ''\n verbosity = 'low'\n restart_mode = 'from_scratch'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_vc_relax.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'bands'\n title = ''\n verbosity = 'low'\n restart_mode = '{{input.RESTART_MODE}}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS crystal_b\n{{kpath.length}}\n{% for point in kpath -%}\n{% for d in point.coordinates %}{{d}} {% endfor -%}{{point.steps}}\n{% endfor %}\n","name":"pw_bands.in","contextProviders":[{"name":"KPathFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n ecutfock = 100\n occupations = 'smearing'\n degauss = 0.005\n input_dft='hse',\n nqx1 = {% if kgrid.dimensions[0]%2 == 0 %}{{kgrid.dimensions[0]/2}}{% else %}{{(kgrid.dimensions[0]+1)/2}}{% endif %}, nqx2 = {% if kgrid.dimensions[1]%2 == 0 %}{{kgrid.dimensions[1]/2}}{% else %}{{(kgrid.dimensions[1]+1)/2}}{% endif %}, nqx3 = {% if kgrid.dimensions[2]%2 == 0 %}{{kgrid.dimensions[2]/2}}{% else %}{{(kgrid.dimensions[2]+1)/2}}{% endif %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{% if d%2 == 0 %}{{d}} {% else %}{{d+1}} {% endif %}{% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_scf_hse.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nHUBBARD {ortho-atomic}\n{% for row in hubbard_u -%}\nU {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.hubbardUValue }}\n{% endfor -%}\n","name":"pw_scf_dft_u.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardUContextManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nHUBBARD {ortho-atomic}\n{% for row in hubbard_u -%}\nU {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.hubbardUValue }}\n{% endfor -%}\n{% for row in hubbard_v -%}\nV {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.atomicSpecies2 }}-{{ row.atomicOrbital2 }} {{ row.siteIndex }} {{ row.siteIndex2 }} {{ row.hubbardVValue }}\n{% endfor -%}\n","name":"pw_scf_dft_v.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardUContextManager"},{"name":"HubbardVContextManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nHUBBARD {ortho-atomic}\n{% for row in hubbard_j -%}\n{{ row.paramType }} {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.value }}\n{% endfor -%}\n","name":"pw_scf_dft_j.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardJContextManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n lda_plus_u = .true.\n lda_plus_u_kind = 0\n U_projection_type = 'ortho-atomic'\n {%- for row in hubbard_legacy %}\n Hubbard_U({{ row.atomicSpeciesIndex }}) = {{ row.hubbardUValue }}\n {%- endfor %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_scf_dft_u_legacy.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardContextManagerLegacy"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&INPUT\n fildyn = 'dyn'\n zasr = 'simple'\n flfrc = 'force_constants.fc'\n/\n","name":"q2r.in","contextProviders":[],"applicationName":"espresso","executableName":"q2r.x"},{"content":"# ------------------------------------------------------------------------------- #\n# #\n# Example JupyterLab requirements #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. A Python virtual environment is created #\n# - in /scratch/$USER/$JOB_ID (for build: 'Default') #\n# - in /export/share/python/ (for build: 'with-pre-installed-packages') #\n# 3. This list is used to populate a Python virtual environment #\n# 4. JupyterLab is started #\n# #\n# For more information visit: #\n# - https://jupyterlab.readthedocs.io/en/stable/index.html #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# Note: With the JupyterLab build 'with-pre-installed-packages', packages #\n# cannot be added during the notebook runtime. #\n# #\n# ------------------------------------------------------------------------------- #\n\njupyterlab==3.0.3\nnotebook>=6.2.0\nexabyte-api-client>=2020.10.19\nnumpy>=1.17.3\npandas>=1.1.4\nurllib3<2\n","name":"requirements.txt","contextProviders":[],"applicationName":"jupyterLab","executableName":"jupyter"},{"content":" start nwchem\n title \"Test\"\n charge {{ input.CHARGE }}\n geometry units au noautosym\n {{ input.ATOMIC_POSITIONS }}\n end\n basis\n * library {{ input.BASIS }}\n end\n dft\n xc {{ input.FUNCTIONAL }}\n mult {{ input.MULT }}\n end\n task dft energy\n","name":"nwchem_total_energy.inp","contextProviders":[{"name":"NWChemInputDataManager"}],"applicationName":"nwchem","executableName":"nwchem"},{"content":"# ---------------------------------------------------------------- #\n# #\n# Example python script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. runtime directory for this calculation is created #\n# 2. requirements.txt is used to create a virtual environment #\n# 3. virtual environment is activated #\n# 4. python process running this script is started #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\nimport pymatgen as mg\n\nsi = mg.Element(\"Si\")\n\nprint(si.atomic_mass)\n","name":"hello_world.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2\n","name":"requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------------ #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# Please add any packages required for this unit below following #\n# the requirements.txt specification: #\n# https://pip.pypa.io/en/stable/reference/requirements-file-format/ #\n# ------------------------------------------------------------------ #\n","name":"requirements_empty.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# -------------------------------------------------------------------------------\n# This script contains a few helpful commands for basic plotting with matplotlib.\n# The commented out blocks are optional suggestions and included for convenience.\n# -------------------------------------------------------------------------------\nimport matplotlib.pyplot as plt\nimport matplotlib.ticker as ticker\nimport numpy as np\n\n\n# Plot Settings\n# -------------\nfigure_size = (6.4, 4.8) # width, height [inches]\ndpi = 100 # resolution [dots-per-inch]\nfont_size_title = 16 # font size of title\nfont_size_axis = 12 # font size of axis label\nfont_size_tick = 12 # font size of tick label\nfont_size_legend = 14 # font size of legend\nx_axis_label = None # label for x-axis\ny_axis_label = None # label for y-axis\ntitle = None # figure title\nshow_legend = False # whether to show legend\nsave_name = \"plot.pdf\" # output filename (with suffix), e.g. 'plot.pdf'\nx_view_limits = {\"left\": None, \"right\": None} # view limits for x-axis\ny_view_limits = {\"top\": None, \"bottom\": None} # view limits for y-axis\nx_tick_spacing = None # custom tick spacing for x-axis (optional)\ny_tick_spacing = None # custom tick spacing for y-axis (optional)\nx_tick_labels = None # custom tick labels for x-axis (optional)\ny_tick_labels = None # custom tick labels for y-axis (optional)\n\n\n# Figure & axes objects\n# ---------------------\nfig = plt.figure(figsize=figure_size, dpi=dpi)\nax = fig.add_subplot(111)\n\n# Example plot (REPLACE ACCORDINGLY)\n# ------------\nx = np.linspace(0, 7, num=100)\ny = np.sin(x)\nax.plot(x, y, \"g-\", zorder=3)\n\n\n# Help lines\n# ----------\n# ax.axhline(y=0, color=\"0.25\", linewidth=0.6, zorder=1)\n# ax.axvline(x=0, color=\"0.25\", linewidth=0.6, zorder=1)\n\n\n# View limits\n# -----------\nax.set_xlim(**x_view_limits)\nax.set_ylim(**y_view_limits)\n\n\n# Grid lines\n# ----------\n# grid_style = {\n# \"linestyle\" : \"dotted\",\n# \"linewidth\" : 0.6,\n# \"color\" : \"0.25\",\n# }\n# ax.grid(**grid_style)\n\n# Custom tick spacing\n# -------------------\n# ax.xaxis.set_major_locator(ticker.MultipleLocator(x_tick_spacing))\n# ax.yaxis.set_major_locator(ticker.MultipleLocator(y_tick_spacing))\n\n# Custom tick labels\n# ------------------\nif x_tick_labels is not None:\n ax.set_xticklabels(x_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\nif y_tick_labels is not None:\n ax.set_yticklabels(y_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\n\n# Other tick settings\n# -------------------\n# ax.tick_params(axis=\"both\", which=\"major\", labelsize=font_size_tick, direction=\"in\")\n# ax.tick_params(axis=\"x\", which=\"major\", pad=10)\n# ax.tick_params(axis=\"x\", which=\"minor\", bottom=False, top=False)\n\n\n# Axis labels\n# -----------\nif x_axis_label is not None:\n ax.set_xlabel(x_axis_label, size=font_size_axis)\nif y_axis_label is not None:\n ax.set_ylabel(y_axis_label, size=font_size_axis)\n\n# Figure title\n# ------------\nif title is not None:\n ax.set_title(title, fontsize=font_size_title)\n\n# Legend\n# ------\nif show_legend:\n ax.legend(prop={'size': font_size_legend})\n\n# Save figure\n# -----------\nif save_name is not None:\n save_format = save_name.split(\".\")[-1]\n fig.savefig(save_name, format=save_format, bbox_inches=\"tight\")\n","name":"matplotlib_basic.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ---------------------------------------------------------- #\n# #\n# This script extracts q-points and irreducible #\n# representations from Quantum ESPRESSO xml data. #\n# #\n# Expects control_ph.xml and patterns.?.xml files to exist #\n# #\n# ---------------------------------------------------------- #\nfrom __future__ import print_function\nimport json\nfrom xml.dom import minidom\n\n{# JOB_WORK_DIR will be initialized at runtime => avoid substituion below #}\n{%- raw -%}\nCONTROL_PH_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/control_ph.xml\"\nPATTERNS_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/patterns.{}.xml\"\n{%- endraw -%}\n\n# get integer content of an xml tag in a document\ndef get_int_by_tag_name(doc, tag_name):\n element = doc.getElementsByTagName(tag_name)\n return int(element[0].firstChild.nodeValue)\n\nvalues = []\n\n# get number of q-points and cycle through them\nxmldoc = minidom.parse(CONTROL_PH_FILENAME)\nnumber_of_qpoints = get_int_by_tag_name(xmldoc, \"NUMBER_OF_Q_POINTS\")\n\nfor i in range(number_of_qpoints):\n # get number of irreducible representations per qpoint\n xmldoc = minidom.parse(PATTERNS_FILENAME.format(i+1))\n number_of_irr_per_qpoint = get_int_by_tag_name(xmldoc, \"NUMBER_IRR_REP\")\n # add each distinct combination of qpoint and irr as a separate entry\n for j in range(number_of_irr_per_qpoint):\n values.append({\n \"qpoint\": i + 1,\n \"irr\": j + 1\n })\n\n# store final values in standard output (STDOUT)\nprint(json.dumps(values, indent=4))\n","name":"espresso_xml_get_qpt_irr.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"import re\nimport json\n\ndouble_regex = r'[-+]?\\d*\\.\\d+(?:[eE][-+]?\\d+)?'\nregex = r\"\\s+k\\(\\s+\\d*\\)\\s+=\\s+\\(\\s+({0})\\s+({0})\\s+({0})\\),\\s+wk\\s+=\\s+({0}).+?\\n\".format(double_regex)\n\nwith open(\"pw_scf.out\") as f:\n text = f.read()\n\npattern = re.compile(regex, re.I | re.MULTILINE)\nmatch = pattern.findall(text[text.rfind(\" cryst. coord.\"):])\nkpoints = [{\"coordinates\": list(map(float, m[:3])), \"weight\": float(m[3])} for m in match]\nprint(json.dumps({\"name\": \"KPOINTS\", \"value\": kpoints, \"scope\": \"global\"}, indent=4))\n","name":"espresso_extract_kpoints.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------- #\n# This script aims to determine extrema for a given array. #\n# Please adjust the parameters according to your data. #\n# Note: This template expects the array to be defined in the #\n# context as 'array_from_context' (see details below). #\n# ----------------------------------------------------------- #\nimport numpy as np\nfrom scipy.signal import find_peaks\nimport json\nfrom munch import Munch\n\n# Data From Context\n# -----------------\n# The array 'array_from_context' is a 1D list (float or int) that has to be defined in\n# a preceding assignment unit in order to be extracted from the context.\n# Example: [0.0, 1.0, 4.0, 3.0]\n# Upon rendering the following Jinja template the extracted array will be inserted.\n{% raw %}Y = np.array({{array_from_context}}){% endraw %}\n\n# Settings\n# --------\nprominence = 0.3 # required prominence in the unit of the data array\n\n# Find Extrema\n# ------------\nmax_indices, _ = find_peaks(Y, prominence=prominence)\nmin_indices, _ = find_peaks(-1 * Y, prominence=prominence)\n\nresult = {\n \"maxima\": Y[max_indices].tolist(),\n \"minima\": Y[min_indices].tolist(),\n}\n\n# print final values to standard output (STDOUT),\n# so that they can be read by a subsequent assignment unit (using value=STDOUT)\nprint(json.dumps(result, indent=4))\n","name":"find_extrema.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# ----------------------------------------------------------------- #\n\n\nmunch==2.5.0\nnumpy>=1.19.5\nscipy>=1.5.4\nmatplotlib>=3.0.0\n","name":"processing_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# PythonML Package Requirements for use on the Exabyte.io Platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscikit-learn==0.24.1\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2;python_version>=\"3.6\"\n","name":"pyml_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# General settings for PythonML jobs on the Exabyte.io Platform #\n# #\n# This file generally shouldn't be modified directly by users. #\n# The \"datafile\" and \"is_workflow_running_to_predict\" variables #\n# are defined in the head subworkflow, and are templated into #\n# this file. This helps facilitate the workflow's behavior #\n# differing whether it is in a \"train\" or \"predict\" mode. #\n# #\n# Also in this file is the \"Context\" object, which helps maintain #\n# certain Python objects between workflow units, and between #\n# predict runs. #\n# #\n# Whenever a python object needs to be stored for subsequent runs #\n# (such as in the case of a trained model), context.save() can be #\n# called to save it. The object can then be loaded again by using #\n# context.load(). #\n# ----------------------------------------------------------------- #\n\n\nimport pickle, os\n\n# ==================================================\n# Variables modified in the Important Settings menu\n# ==================================================\n# Variables in this section can (and oftentimes need to) be modified by the user in the \"Important Settings\" tab\n# of a workflow.\n\n# Target_column_name is used during training to identify the variable the model is traing to predict.\n# For example, consider a CSV containing three columns, \"Y\", \"X1\", and \"X2\". If the goal is to train a model\n# that will predict the value of \"Y,\" then target_column_name would be set to \"Y\"\ntarget_column_name = \"{{ mlSettings.target_column_name }}\"\n\n# The type of ML problem being performed. Can be either \"regression\", \"classification,\" or \"clustering.\"\nproblem_category = \"{{ mlSettings.problem_category }}\"\n\n# =============================\n# Non user-modifiable variables\n# =============================\n# Variables in this section generally do not need to be modified.\n\n# The problem category, regression or classification or clustering. In regression, the target (predicted) variable\n# is continues. In classification, it is categorical. In clustering, there is no target - a set of labels is\n# automatically generated.\nis_regression = is_classification = is_clustering = False\nif problem_category.lower() == \"regression\":\n is_regression = True\nelif problem_category.lower() == \"classification\":\n is_classification = True\nelif problem_category.lower() == \"clustering\":\n is_clustering = True\nelse:\n raise ValueError(\n \"Variable 'problem_category' must be either 'regression', 'classification', or 'clustering'. Check settings.py\")\n\n# The variables \"is_workflow_running_to_predict\" and \"is_workflow_running_to_train\" are used to control whether\n# the workflow is in a \"training\" mode or a \"prediction\" mode. The \"IS_WORKFLOW_RUNNING_TO_PREDICT\" variable is set by\n# an assignment unit in the \"Set Up the Job\" subworkflow that executes at the start of the job. It is automatically\n# changed when the predict workflow is generated, so users should not need to modify this variable.\nis_workflow_running_to_predict = {% raw %}{{IS_WORKFLOW_RUNNING_TO_PREDICT}}{% endraw %}\nis_workflow_running_to_train = not is_workflow_running_to_predict\n\n# Sets the datafile variable. The \"datafile\" is the data that will be read in, and will be used by subsequent\n# workflow units for either training or prediction, depending on the workflow mode.\nif is_workflow_running_to_predict:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\nelse:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\n\n# The \"Context\" class allows for data to be saved and loaded between units, and between train and predict runs.\n# Variables which have been saved using the \"Save\" method are written to disk, and the predict workflow is automatically\n# configured to obtain these files when it starts.\n#\n# IMPORTANT NOTE: Do *not* adjust the value of \"context_dir_pathname\" in the Context object. If the value is changed, then\n# files will not be correctly copied into the generated predict workflow. This will cause the predict workflow to be\n# generated in a broken state, and it will not be able to make any predictions.\nclass Context(object):\n \"\"\"\n Saves and loads objects from the disk, useful for preserving data between workflow units\n\n Attributes:\n context_paths (dict): Dictionary of the format {variable_name: path}, that governs where\n pickle saves files.\n\n Methods:\n save: Used to save objects to the context directory\n load: Used to load objects from the context directory\n \"\"\"\n\n def __init__(self, context_file_basename=\"workflow_context_file_mapping\"):\n \"\"\"\n Constructor for Context objects\n\n Args:\n context_file_basename (str): Name of the file to store context paths in\n \"\"\"\n\n # Warning: DO NOT modify the context_dir_pathname variable below\n # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n context_dir_pathname = \"{% raw %}{{ CONTEXT_DIR_RELATIVE_PATH }}{% endraw %}\"\n # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n self._context_dir_pathname = context_dir_pathname\n self._context_file = os.path.join(context_dir_pathname, context_file_basename)\n\n # Make context dir if it does not exist\n if not os.path.exists(context_dir_pathname):\n os.makedirs(context_dir_pathname)\n\n # Read in the context sources dictionary, if it exists\n if os.path.exists(self._context_file):\n with open(self._context_file, \"rb\") as file_handle:\n self.context_paths: dict = pickle.load(file_handle)\n else:\n # Items is a dictionary of {varname: path}\n self.context_paths = {}\n\n def __enter__(self):\n return self\n\n def __exit__(self, exc_type, exc_value, traceback):\n self._update_context()\n\n def __contains__(self, item):\n return item in self.context_paths\n\n def _update_context(self):\n with open(self._context_file, \"wb\") as file_handle:\n pickle.dump(self.context_paths, file_handle)\n\n def load(self, name: str):\n \"\"\"\n Returns a contextd object\n\n Args:\n name (str): The name in self.context_paths of the object\n \"\"\"\n path = self.context_paths[name]\n with open(path, \"rb\") as file_handle:\n obj = pickle.load(file_handle)\n return obj\n\n def save(self, obj: object, name: str):\n \"\"\"\n Saves an object to disk using pickle\n\n Args:\n name (str): Friendly name for the object, used for lookup in load() method\n obj (object): Object to store on disk\n \"\"\"\n path = os.path.join(self._context_dir_pathname, f\"{name}.pkl\")\n self.context_paths[name] = path\n with open(path, \"wb\") as file_handle:\n pickle.dump(obj, file_handle)\n self._update_context()\n\n# Generate a context object, so that the \"with settings.context\" can be used by other units in this workflow.\ncontext = Context()\n\nis_using_train_test_split = \"is_using_train_test_split\" in context and (context.load(\"is_using_train_test_split\"))\n\n# Create a Class for a DummyScaler()\nclass DummyScaler:\n \"\"\"\n This class is a 'DummyScaler' which trivially acts on data by returning it unchanged.\n \"\"\"\n\n def fit(self, X):\n return self\n\n def transform(self, X):\n return X\n\n def fit_transform(self, X):\n return X\n\n def inverse_transform(self, X):\n return X\n\nif 'target_scaler' not in context:\n context.save(DummyScaler(), 'target_scaler')\n","name":"pyml_settings.py","contextProviders":[{"name":"MLSettingsDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Custom workflow unit template for the Exabyte.io platform #\n# #\n# This file imports a set of workflow-specific context variables #\n# from settings.py. It then uses a context manager to save and #\n# load Python objects. When saved, these objects can then be #\n# loaded either later in the same workflow, or by subsequent #\n# predict jobs. #\n# #\n# Any pickle-able Python object can be saved using #\n# settings.context. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport settings\n\n# The context manager exists to facilitate\n# saving and loading objects across Python units within a workflow.\n\n# To load an object, simply do to \\`context.load(\"name-of-the-saved-object\")\\`\n# To save an object, simply do \\`context.save(\"name-for-the-object\", object_here)\\`\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Do some transformations to the data here\n\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n descriptors = context.load(\"descriptors\")\n\n # Do some predictions or transformation to the data here\n","name":"pyml_custom.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to read in data for the ML workflow. #\n# #\n# Also showcased here is the concept of branching based on #\n# whether the workflow is in \"train\" or \"predict\" mode. #\n# #\n# If the workflow is in \"training\" mode, it will read in the data #\n# before converting it to a Numpy array and save it for use #\n# later. During training, we already have values for the output, #\n# and this gets saved to \"target.\" #\n# #\n# Finally, whether the workflow is in training or predict mode, #\n# it will always read in a set of descriptors from a datafile #\n# defined in settings.py #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport sklearn.preprocessing\nimport settings\n\nwith settings.context as context:\n data = pandas.read_csv(settings.datafile)\n\n # Train\n # By default, we don't do train/test splitting: the train and test represent the same dataset at first.\n # Other units (such as a train/test splitter) down the line can adjust this as-needed.\n if settings.is_workflow_running_to_train:\n\n # Handle the case where we are clustering\n if settings.is_clustering:\n target = data.to_numpy()[:, 0] # Just get the first column, it's not going to get used anyway\n else:\n target = data.pop(settings.target_column_name).to_numpy()\n\n # Handle the case where we are classifying. In this case, we must convert any labels provided to be categorical.\n # Specifically, labels are encoded with values between 0 and (N_Classes - 1)\n if settings.is_classification:\n label_encoder = sklearn.preprocessing.LabelEncoder()\n target = label_encoder.fit_transform(target)\n context.save(label_encoder, \"label_encoder\")\n\n target = target.reshape(-1, 1) # Reshape array from a row vector into a column vector\n\n context.save(target, \"train_target\")\n context.save(target, \"test_target\")\n\n descriptors = data.to_numpy()\n\n context.save(descriptors, \"train_descriptors\")\n context.save(descriptors, \"test_descriptors\")\n\n else:\n descriptors = data.to_numpy()\n context.save(descriptors, \"descriptors\")\n","name":"data_input_read_csv_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to perform a train/test split #\n# #\n# Splits the dataset into a training and testing set. The #\n# variable `percent_held_as_test` controls how much of the #\n# input dataset is removed for use as a testing set. By default, #\n# this unit puts 20% of the dataset into the testing set, and #\n# places the remaining 80% into the training set. #\n# #\n# Does nothing in the case of predictions. #\n# #\n# ----------------------------------------------------------------- #\n\nimport sklearn.model_selection\nimport numpy as np\nimport settings\n\n# `percent_held_as_test` is the amount of the dataset held out as the testing set. If it is set to 0.2,\n# then 20% of the dataset is held out as a testing set. The remaining 80% is the training set.\npercent_held_as_test = {{ mlTrainTestSplit.fraction_held_as_test_set }}\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Load training data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n\n # Combine datasets to facilitate train/test split\n\n # Do train/test split\n train_descriptors, test_descriptors, train_target, test_target = sklearn.model_selection.train_test_split(\n train_descriptors, train_target, test_size=percent_held_as_test)\n\n # Set the flag for using a train/test split\n context.save(True, \"is_using_train_test_split\")\n\n # Save training data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"data_input_train_test_split_sklearn.py","contextProviders":[{"name":"MLTrainTestSplitDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn MinMax Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it is on interval #\n# [0,1]. It then saves the data for use further down #\n# the road in the workflow, for use in un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the min and max of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor MinMax Scaler\n scaler = sklearn.preprocessing.MinMaxScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_min_max_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Duplicates workflow unit #\n# #\n# This workflow unit drops all duplicate rows, if it is running #\n# in the \"train\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop duplicates from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n df = df.drop_duplicates()\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop duplicates from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.drop_duplicates()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_duplicates_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Missing Workflow Unit #\n# #\n# This workflow unit allows missing rows and/or columns to be #\n# dropped from the dataset by configuring the `to_drop` #\n# parameter. #\n# #\n# Valid values for `to_drop`: #\n# - \"rows\": rows with missing values will be removed #\n# - \"columns\": columns with missing values will be removed #\n# - \"both\": rows and columns with missing values will be removed #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\n# `to_drop` can either be \"rows\" or \"columns\"\n# If it is set to \"rows\" (by default), then all rows with missing values will be dropped.\n# If it is set to \"columns\", then all columns with missing values will be dropped.\n# If it is set to \"both\", then all rows and columns with missing values will be dropped.\nto_drop = \"rows\"\n\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop missing from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n\n directions = {\n \"rows\": (\"index\",),\n \"columns\": (\"columns\",),\n \"both\": (\"index\", \"columns\"),\n }[to_drop]\n for direction in directions:\n df = df.dropna(direction)\n\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop missing from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.dropna()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_missing_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn Standard Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it a mean of 0 and #\n# a standard deviation of 1. It then saves the data for use #\n# further down the road in the workflow, for use in #\n# un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the mean and variance of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor Scaler\n scaler = sklearn.preprocessing.StandardScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_standardization_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge-regression model in Scikit-Learn. #\n# Alpha is taken from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.AdaBoostRegressor(\n n_estimators=50,\n learning_rate=1,\n loss=\"linear\",\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"adaboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"adaboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_adaboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a bagged trees regression model with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.BaggingRegressor(\n n_estimators=10,\n max_samples=1.0,\n max_features=1.0,\n bootstrap=True,\n bootstrap_features=False,\n oob_score=False,\n verbose=0,\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"bagged_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"bagged_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_bagged_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for gradient-boosted tree regression with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. Note: In the gradient- #\n# boosted trees ensemble used, the weak learners used as #\n# estimators cannot be tuned with the same level of fidelity #\n# allowed in the adaptive-boosted trees ensemble. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.GradientBoostingRegressor(\n loss=\"ls\",\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion=\"friedman_mse\",\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n max_features=None,\n alpha=0.9,\n verbose=0,\n max_leaf_nodes=None,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees regression #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBRegressor(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='reg:squarederror',\n eval_metric='rmse')\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_regression\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_regression\")\n\n # Make some predictions and unscale\n predictions = model.predict(descriptors)\n predictions = predictions.reshape(-1, 1)\n target_scaler = context.load(\"target_scaler\")\n\n predictions = target_scaler.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\")\n","name":"model_extreme_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for k-means clustering. #\n# #\n# In k-means clustering, the labels are not provided ahead of #\n# time. Instead, one supplies the number of groups the #\n# algorithm should split the dataset into. Here, we set our #\n# own default of 4 groups (fewer than sklearn's default of 8). #\n# Otherwise, the default parameters of the clustering method #\n# are the same as in sklearn. #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.cluster\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Initialize the Model\n model = sklearn.cluster.KMeans(\n n_clusters=4,\n init=\"k-means++\",\n n_init=10,\n max_iter=300,\n tol=0.0001,\n copy_x=True,\n algorithm=\"auto\",\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors)\n context.save(model, \"k_means\")\n train_labels = model.predict(train_descriptors)\n test_labels = model.predict(test_descriptors)\n\n context.save(train_labels, \"train_labels\")\n context.save(test_labels, \"test_labels\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"k_means\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_k_means_clustering_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a kernelized ridge-regression model with #\n# Scikit-Learn. Model parameters are derived from Scikit- #\n# Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.kernel_ridge\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.kernel_ridge.KernelRidge(\n alpha=1.0,\n kernel=\"linear\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"kernel_ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"kernel_ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_kernel_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a LASSO-regression model with Scikit- #\n# Learn. Model parameters derived from Scikit-Learn's #\n# Defaults. Alpha has been lowered from the default of 1.0, to #\n# 0.1. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Lasso(\n alpha=0.1,\n fit_intercept=True,\n normalize=False,\n precompute=False,\n tol=0.0001,\n positive=True,\n selection=\"cyclic\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"LASSO\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"LASSO\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_lasso_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit to train a simple feedforward neural network #\n# model on a regression problem using scikit-learn. In this #\n# template, we use the default values for hidden_layer_sizes, #\n# activation, solver, and learning rate. Other parameters are #\n# available (consult the sklearn docs), but in this case, we #\n# only include those relevant to the Adam optimizer. Sklearn #\n# Docs: Sklearn docs:http://scikit-learn.org/stable/modules/ge #\n# nerated/sklearn.neural_network.MLPRegressor.html #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.neural_network\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.neural_network.MLPRegressor(\n hidden_layer_sizes=(100,),\n activation=\"relu\",\n solver=\"adam\",\n max_iter=300,\n early_stopping=False,\n validation_fraction=0.1,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"multilayer_perceptron\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"multilayer_perceptron\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_mlp_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a random forest classification model with #\n# Scikit-Learn. Parameters derived from Scikit-Learn's #\n# defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. When #\n# the workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a filee named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestClassifier(\n n_estimators=100,\n criterion=\"gini\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n oob_score=False,\n verbose=0,\n class_weight=None,\n ccp_alpha=0.0,\n max_samples=None,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target, test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for a gradient boosted classification model with #\n# Scikit-Learn. Parameters derived from sklearn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = sklearn.ensemble.GradientBoostingClassifier(loss='deviance',\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion='friedman_mse',\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n min_impurity_split=None,\n init=None,\n random_state=None,\n max_features=None,\n verbose=0,\n max_leaf_nodes=None,\n warm_start=False,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees classification #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBClassifier(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='binary:logistic',\n eval_metric='logloss',\n use_label_encoder=False)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_extreme_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow for a random forest regression model with Scikit- #\n# Learn. Parameters are derived from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestRegressor(\n n_estimators=100,\n criterion=\"mse\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n max_samples=None,\n oob_score=False,\n ccp_alpha=0.0,\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge regression model with Scikit- #\n# Learn. Alpha is taken from Scikit-Learn's default #\n# parameters. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Ridge(\n alpha=1.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Parity plot generation unit #\n# #\n# This unit generates a parity plot based on the known values #\n# in the training data, and the predicted values generated #\n# using the training data. #\n# #\n# Because this metric compares predictions versus a ground truth, #\n# it doesn't make sense to generate the plot when a predict #\n# workflow is being run (because in that case, we generally don't #\n# know the ground truth for the values being predicted). Hence, #\n# this unit does nothing if the workflow is in \"predict\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_predictions = context.load(\"train_predictions\")\n test_target = context.load(\"test_target\")\n test_predictions = context.load(\"test_predictions\")\n\n # Un-transform the data\n target_scaler = context.load(\"target_scaler\")\n train_target = target_scaler.inverse_transform(train_target)\n train_predictions = target_scaler.inverse_transform(train_predictions)\n test_target = target_scaler.inverse_transform(test_target)\n test_predictions = target_scaler.inverse_transform(test_predictions)\n\n # Plot the data\n plt.scatter(train_target, train_predictions, c=\"#203d78\", label=\"Training Set\")\n if settings.is_using_train_test_split:\n plt.scatter(test_target, test_predictions, c=\"#67ac5b\", label=\"Testing Set\")\n plt.xlabel(\"Actual Value\")\n plt.ylabel(\"Predicted Value\")\n\n # Scale the plot\n target_range = (min(min(train_target), min(test_target)),\n max(max(train_target), max(test_target)))\n predictions_range = (min(min(train_predictions), min(test_predictions)),\n max(max(train_predictions), max(test_predictions)))\n\n limits = (min(min(target_range), min(target_range)),\n max(max(predictions_range), max(predictions_range)))\n plt.xlim = (limits[0], limits[1])\n plt.ylim = (limits[0], limits[1])\n\n # Draw a parity line, as a guide to the eye\n plt.plot((limits[0], limits[1]), (limits[0], limits[1]), c=\"black\", linestyle=\"dotted\", label=\"Parity\")\n plt.legend()\n\n # Save the figure\n plt.tight_layout()\n plt.savefig(\"my_parity_plot.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_parity_plot_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Cluster Visualization #\n# #\n# This unit takes an N-dimensional feature space, and uses #\n# Principal-component Analysis (PCA) to project into a 2D space #\n# to facilitate plotting on a scatter plot. #\n# #\n# The 2D space we project into are the first two principal #\n# components identified in PCA, which are the two vectors with #\n# the highest variance. #\n# #\n# Wikipedia Article on PCA: #\n# https://en.wikipedia.org/wiki/Principal_component_analysis #\n# #\n# We then plot the labels assigned to the train an test set, #\n# and color by class. #\n# #\n# ----------------------------------------------------------------- #\n\nimport pandas as pd\nimport matplotlib.cm\nimport matplotlib.lines\nimport matplotlib.pyplot as plt\nimport sklearn.decomposition\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_labels = context.load(\"train_labels\")\n train_descriptors = context.load(\"train_descriptors\")\n test_labels = context.load(\"test_labels\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Unscale the descriptors\n descriptor_scaler = context.load(\"descriptor_scaler\")\n train_descriptors = descriptor_scaler.inverse_transform(train_descriptors)\n test_descriptors = descriptor_scaler.inverse_transform(test_descriptors)\n\n # We need at least 2 dimensions, exit if the dataset is 1D\n if train_descriptors.ndim < 2:\n raise ValueError(\"The train descriptors do not have enough dimensions to be plot in 2D\")\n\n # The data could be multidimensional. Let's do some PCA to get things into 2 dimensions.\n pca = sklearn.decomposition.PCA(n_components=2)\n train_descriptors = pca.fit_transform(train_descriptors)\n test_descriptors = pca.transform(test_descriptors)\n xlabel = \"Principle Component 1\"\n ylabel = \"Principle Component 2\"\n\n # Determine the labels we're going to be using, and generate their colors\n labels = set(train_labels)\n colors = {}\n for count, label in enumerate(labels):\n cm = matplotlib.cm.get_cmap('jet', len(labels))\n color = cm(count / len(labels))\n colors[label] = color\n train_colors = [colors[label] for label in train_labels]\n test_colors = [colors[label] for label in test_labels]\n\n # Train / Test Split Visualization\n plt.title(\"Train Test Split Visualization\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=\"#33548c\", marker=\"o\", label=\"Training Set\")\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=\"#F0B332\", marker=\"o\", label=\"Testing Set\")\n xmin, xmax, ymin, ymax = plt.axis()\n plt.legend()\n plt.tight_layout()\n plt.savefig(\"train_test_split.png\", dpi=600)\n plt.close()\n\n def clusters_legend(cluster_colors):\n \"\"\"\n Helper function that creates a legend, given the coloration by clusters.\n Args:\n cluster_colors: A dictionary of the form {cluster_number : color_value}\n\n Returns:\n None; just creates the legend and puts it on the plot\n \"\"\"\n legend_symbols = []\n for group, color in cluster_colors.items():\n label = f\"Cluster {group}\"\n legend_symbols.append(matplotlib.lines.Line2D([], [], color=color, marker=\"o\",\n linewidth=0, label=label))\n plt.legend(handles=legend_symbols)\n\n # Training Set Clusters\n plt.title(\"Training Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=train_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"train_clusters.png\", dpi=600)\n plt.close()\n\n # Testing Set Clusters\n plt.title(\"Testing Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=test_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"test_clusters.png\", dpi=600)\n plt.close()\n\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_pca_2d_clusters_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# ROC Curve Generator #\n# #\n# Computes and displays the Receiver Operating Characteristic #\n# (ROC) curve. This is restricted to binary classification tasks. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\nimport matplotlib.collections\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n test_target = context.load(\"test_target\").flatten()\n # Slice the first column because Sklearn's ROC curve prefers probabilities for the positive class\n test_probabilities = context.load(\"test_probabilities\")[:, 1]\n\n # Exit if there's more than one label in the predictions\n if len(set(test_target)) > 2:\n exit()\n\n # ROC curve function in sklearn prefers the positive class\n false_positive_rate, true_positive_rate, thresholds = sklearn.metrics.roc_curve(test_target, test_probabilities,\n pos_label=1)\n thresholds[0] -= 1 # Sklearn arbitrarily adds 1 to the first threshold\n roc_auc = np.round(sklearn.metrics.auc(false_positive_rate, true_positive_rate), 3)\n\n # Plot the curve\n fig, ax = plt.subplots()\n points = np.array([false_positive_rate, true_positive_rate]).T.reshape(-1, 1, 2)\n segments = np.concatenate([points[:-1], points[1:]], axis=1)\n norm = plt.Normalize(thresholds.min(), thresholds.max())\n lc = matplotlib.collections.LineCollection(segments, cmap='jet', norm=norm, linewidths=2)\n lc.set_array(thresholds)\n line = ax.add_collection(lc)\n fig.colorbar(line, ax=ax).set_label('Threshold')\n\n # Padding to ensure we see the line\n ax.margins(0.01)\n\n plt.title(f\"ROC curve, AUC={roc_auc}\")\n plt.xlabel(\"False Positive Rate\")\n plt.ylabel(\"True Positive Rate\")\n plt.tight_layout()\n plt.savefig(\"my_roc_curve.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_roc_curve_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"#!/bin/bash\n# ---------------------------------------------------------------- #\n# #\n# Example shell script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. shebang line is read from the first line above #\n# 2. based on shebang one of the shell types is selected: #\n# - /bin/bash #\n# - /bin/csh #\n# - /bin/tclsh #\n# - /bin/tcsh #\n# - /bin/zsh #\n# 3. runtime directory for this calculation is created #\n# 4. the content of the script is executed #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\necho \"Hello world!\"\n","name":"hello_world.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ---------------------------------------------------------------- #\n# #\n# Example job submission script for Exabyte.io platform #\n# #\n# Shows resource manager directives for: #\n# #\n# 1. the name of the job (-N) #\n# 2. the number of nodes to be used (-l nodes=) #\n# 3. the number of processors per node (-l ppn=) #\n# 4. the walltime in dd:hh:mm:ss format (-l walltime=) #\n# 5. queue (-q) D, OR, OF, SR, SF #\n# 6. merging standard output and error (-j oe) #\n# 7. email about job abort, begin, end (-m abe) #\n# 8. email address to use (-M) #\n# #\n# For more information visit https://docs.exabyte.io/cli/jobs #\n# ---------------------------------------------------------------- #\n\n#PBS -N ESPRESSO-TEST\n#PBS -j oe\n#PBS -l nodes=1\n#PBS -l ppn=1\n#PBS -l walltime=00:00:10:00\n#PBS -q D\n#PBS -m abe\n#PBS -M info@exabyte.io\n\n# load module\nmodule add espresso/540-i-174-impi-044\n\n# go to the job working directory\ncd $PBS_O_WORKDIR\n\n# create input file\ncat > pw.in < pw.out\n","name":"job_espresso_pw_scf.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\nmkdir -p {{ JOB_SCRATCH_DIR }}/outdir/_ph0\ncd {{ JOB_SCRATCH_DIR }}/outdir\ncp -r {{ JOB_WORK_DIR }}/../outdir/__prefix__.* .\n{%- endraw -%}\n","name":"espresso_link_outdir_save.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\ncp {{ JOB_SCRATCH_DIR }}/outdir/_ph0/__prefix__.phsave/dynmat* {{ JOB_WORK_DIR }}/../outdir/_ph0/__prefix__.phsave\n{%- endraw -%}\n","name":"espresso_collect_dynmat.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ------------------------------------------------------------------ #\n# This script prepares necessary directories to run VASP NEB\n# calculation. It puts initial POSCAR into directory 00, final into 0N\n# and intermediate images in 01 to 0(N-1). It is assumed that SCF\n# calculations for initial and final structures are already done in\n# previous subworkflows and their standard outputs are written into\n# \"vasp_neb_initial.out\" and \"vasp_neb_final.out\" files respectively.\n# These outputs are here copied into initial (00) and final (0N)\n# directories to calculate the reaction energy profile.\n# ------------------------------------------------------------------ #\n\n{% raw -%}\ncd {{ JOB_WORK_DIR }}\n{%- endraw %}\n\n# Prepare First Directory\nmkdir -p 00\ncat > 00/POSCAR < 0{{ input.INTERMEDIATE_IMAGES.length + 1 }}/POSCAR < 0{{ loop.index }}/POSCAR <=6.2.0\nexabyte-api-client>=2020.10.19\nnumpy>=1.17.3\npandas>=1.1.4\nurllib3<2\n","name":"requirements.txt","contextProviders":[],"applicationName":"jupyterLab","executableName":"jupyter"},{"content":" start nwchem\n title \"Test\"\n charge {{ input.CHARGE }}\n geometry units au noautosym\n {{ input.ATOMIC_POSITIONS }}\n end\n basis\n * library {{ input.BASIS }}\n end\n dft\n xc {{ input.FUNCTIONAL }}\n mult {{ input.MULT }}\n end\n task dft energy\n","name":"nwchem_total_energy.inp","contextProviders":[{"name":"NWChemInputDataManager"}],"applicationName":"nwchem","executableName":"nwchem"},{"content":"# ---------------------------------------------------------------- #\n# #\n# Example python script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. runtime directory for this calculation is created #\n# 2. requirements.txt is used to create a virtual environment #\n# 3. virtual environment is activated #\n# 4. python process running this script is started #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\nimport pymatgen as mg\n\nsi = mg.Element(\"Si\")\n\nprint(si.atomic_mass)\n","name":"hello_world.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2\n","name":"requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------------ #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# Please add any packages required for this unit below following #\n# the requirements.txt specification: #\n# https://pip.pypa.io/en/stable/reference/requirements-file-format/ #\n# ------------------------------------------------------------------ #\n","name":"requirements_empty.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# -------------------------------------------------------------------------------\n# This script contains a few helpful commands for basic plotting with matplotlib.\n# The commented out blocks are optional suggestions and included for convenience.\n# -------------------------------------------------------------------------------\nimport matplotlib.pyplot as plt\nimport matplotlib.ticker as ticker\nimport numpy as np\n\n\n# Plot Settings\n# -------------\nfigure_size = (6.4, 4.8) # width, height [inches]\ndpi = 100 # resolution [dots-per-inch]\nfont_size_title = 16 # font size of title\nfont_size_axis = 12 # font size of axis label\nfont_size_tick = 12 # font size of tick label\nfont_size_legend = 14 # font size of legend\nx_axis_label = None # label for x-axis\ny_axis_label = None # label for y-axis\ntitle = None # figure title\nshow_legend = False # whether to show legend\nsave_name = \"plot.pdf\" # output filename (with suffix), e.g. 'plot.pdf'\nx_view_limits = {\"left\": None, \"right\": None} # view limits for x-axis\ny_view_limits = {\"top\": None, \"bottom\": None} # view limits for y-axis\nx_tick_spacing = None # custom tick spacing for x-axis (optional)\ny_tick_spacing = None # custom tick spacing for y-axis (optional)\nx_tick_labels = None # custom tick labels for x-axis (optional)\ny_tick_labels = None # custom tick labels for y-axis (optional)\n\n\n# Figure & axes objects\n# ---------------------\nfig = plt.figure(figsize=figure_size, dpi=dpi)\nax = fig.add_subplot(111)\n\n# Example plot (REPLACE ACCORDINGLY)\n# ------------\nx = np.linspace(0, 7, num=100)\ny = np.sin(x)\nax.plot(x, y, \"g-\", zorder=3)\n\n\n# Help lines\n# ----------\n# ax.axhline(y=0, color=\"0.25\", linewidth=0.6, zorder=1)\n# ax.axvline(x=0, color=\"0.25\", linewidth=0.6, zorder=1)\n\n\n# View limits\n# -----------\nax.set_xlim(**x_view_limits)\nax.set_ylim(**y_view_limits)\n\n\n# Grid lines\n# ----------\n# grid_style = {\n# \"linestyle\" : \"dotted\",\n# \"linewidth\" : 0.6,\n# \"color\" : \"0.25\",\n# }\n# ax.grid(**grid_style)\n\n# Custom tick spacing\n# -------------------\n# ax.xaxis.set_major_locator(ticker.MultipleLocator(x_tick_spacing))\n# ax.yaxis.set_major_locator(ticker.MultipleLocator(y_tick_spacing))\n\n# Custom tick labels\n# ------------------\nif x_tick_labels is not None:\n ax.set_xticklabels(x_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\nif y_tick_labels is not None:\n ax.set_yticklabels(y_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\n\n# Other tick settings\n# -------------------\n# ax.tick_params(axis=\"both\", which=\"major\", labelsize=font_size_tick, direction=\"in\")\n# ax.tick_params(axis=\"x\", which=\"major\", pad=10)\n# ax.tick_params(axis=\"x\", which=\"minor\", bottom=False, top=False)\n\n\n# Axis labels\n# -----------\nif x_axis_label is not None:\n ax.set_xlabel(x_axis_label, size=font_size_axis)\nif y_axis_label is not None:\n ax.set_ylabel(y_axis_label, size=font_size_axis)\n\n# Figure title\n# ------------\nif title is not None:\n ax.set_title(title, fontsize=font_size_title)\n\n# Legend\n# ------\nif show_legend:\n ax.legend(prop={'size': font_size_legend})\n\n# Save figure\n# -----------\nif save_name is not None:\n save_format = save_name.split(\".\")[-1]\n fig.savefig(save_name, format=save_format, bbox_inches=\"tight\")\n","name":"matplotlib_basic.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ---------------------------------------------------------- #\n# #\n# This script extracts q-points and irreducible #\n# representations from Quantum ESPRESSO xml data. #\n# #\n# Expects control_ph.xml and patterns.?.xml files to exist #\n# #\n# ---------------------------------------------------------- #\nfrom __future__ import print_function\nimport json\nfrom xml.dom import minidom\n\n{# JOB_WORK_DIR will be initialized at runtime => avoid substituion below #}\n{%- raw -%}\nCONTROL_PH_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/control_ph.xml\"\nPATTERNS_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/patterns.{}.xml\"\n{%- endraw -%}\n\n# get integer content of an xml tag in a document\ndef get_int_by_tag_name(doc, tag_name):\n element = doc.getElementsByTagName(tag_name)\n return int(element[0].firstChild.nodeValue)\n\nvalues = []\n\n# get number of q-points and cycle through them\nxmldoc = minidom.parse(CONTROL_PH_FILENAME)\nnumber_of_qpoints = get_int_by_tag_name(xmldoc, \"NUMBER_OF_Q_POINTS\")\n\nfor i in range(number_of_qpoints):\n # get number of irreducible representations per qpoint\n xmldoc = minidom.parse(PATTERNS_FILENAME.format(i+1))\n number_of_irr_per_qpoint = get_int_by_tag_name(xmldoc, \"NUMBER_IRR_REP\")\n # add each distinct combination of qpoint and irr as a separate entry\n for j in range(number_of_irr_per_qpoint):\n values.append({\n \"qpoint\": i + 1,\n \"irr\": j + 1\n })\n\n# store final values in standard output (STDOUT)\nprint(json.dumps(values, indent=4))\n","name":"espresso_xml_get_qpt_irr.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"import re\nimport json\n\ndouble_regex = r'[-+]?\\d*\\.\\d+(?:[eE][-+]?\\d+)?'\nregex = r\"\\s+k\\(\\s+\\d*\\)\\s+=\\s+\\(\\s+({0})\\s+({0})\\s+({0})\\),\\s+wk\\s+=\\s+({0}).+?\\n\".format(double_regex)\n\nwith open(\"pw_scf.out\") as f:\n text = f.read()\n\npattern = re.compile(regex, re.I | re.MULTILINE)\nmatch = pattern.findall(text[text.rfind(\" cryst. coord.\"):])\nkpoints = [{\"coordinates\": list(map(float, m[:3])), \"weight\": float(m[3])} for m in match]\nprint(json.dumps({\"name\": \"KPOINTS\", \"value\": kpoints, \"scope\": \"global\"}, indent=4))\n","name":"espresso_extract_kpoints.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------- #\n# This script aims to determine extrema for a given array. #\n# Please adjust the parameters according to your data. #\n# Note: This template expects the array to be defined in the #\n# context as 'array_from_context' (see details below). #\n# ----------------------------------------------------------- #\nimport numpy as np\nfrom scipy.signal import find_peaks\nimport json\nfrom munch import Munch\n\n# Data From Context\n# -----------------\n# The array 'array_from_context' is a 1D list (float or int) that has to be defined in\n# a preceding assignment unit in order to be extracted from the context.\n# Example: [0.0, 1.0, 4.0, 3.0]\n# Upon rendering the following Jinja template the extracted array will be inserted.\n{% raw %}Y = np.array({{array_from_context}}){% endraw %}\n\n# Settings\n# --------\nprominence = 0.3 # required prominence in the unit of the data array\n\n# Find Extrema\n# ------------\nmax_indices, _ = find_peaks(Y, prominence=prominence)\nmin_indices, _ = find_peaks(-1 * Y, prominence=prominence)\n\nresult = {\n \"maxima\": Y[max_indices].tolist(),\n \"minima\": Y[min_indices].tolist(),\n}\n\n# print final values to standard output (STDOUT),\n# so that they can be read by a subsequent assignment unit (using value=STDOUT)\nprint(json.dumps(result, indent=4))\n","name":"find_extrema.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# ----------------------------------------------------------------- #\n\n\nmunch==2.5.0\nnumpy>=1.19.5\nscipy>=1.5.4\nmatplotlib>=3.0.0\n","name":"processing_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# PythonML Package Requirements for use on the Exabyte.io Platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscikit-learn==0.24.1\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2;python_version>=\"3.6\"\n","name":"pyml_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# General settings for PythonML jobs on the Exabyte.io Platform #\n# #\n# This file generally shouldn't be modified directly by users. #\n# The \"datafile\" and \"is_workflow_running_to_predict\" variables #\n# are defined in the head subworkflow, and are templated into #\n# this file. This helps facilitate the workflow's behavior #\n# differing whether it is in a \"train\" or \"predict\" mode. #\n# #\n# Also in this file is the \"Context\" object, which helps maintain #\n# certain Python objects between workflow units, and between #\n# predict runs. #\n# #\n# Whenever a python object needs to be stored for subsequent runs #\n# (such as in the case of a trained model), context.save() can be #\n# called to save it. The object can then be loaded again by using #\n# context.load(). #\n# ----------------------------------------------------------------- #\n\n\nimport pickle, os\n\n# ==================================================\n# Variables modified in the Important Settings menu\n# ==================================================\n# Variables in this section can (and oftentimes need to) be modified by the user in the \"Important Settings\" tab\n# of a workflow.\n\n# Target_column_name is used during training to identify the variable the model is traing to predict.\n# For example, consider a CSV containing three columns, \"Y\", \"X1\", and \"X2\". If the goal is to train a model\n# that will predict the value of \"Y,\" then target_column_name would be set to \"Y\"\ntarget_column_name = \"{{ mlSettings.target_column_name }}\"\n\n# The type of ML problem being performed. Can be either \"regression\", \"classification,\" or \"clustering.\"\nproblem_category = \"{{ mlSettings.problem_category }}\"\n\n# =============================\n# Non user-modifiable variables\n# =============================\n# Variables in this section generally do not need to be modified.\n\n# The problem category, regression or classification or clustering. In regression, the target (predicted) variable\n# is continues. In classification, it is categorical. In clustering, there is no target - a set of labels is\n# automatically generated.\nis_regression = is_classification = is_clustering = False\nif problem_category.lower() == \"regression\":\n is_regression = True\nelif problem_category.lower() == \"classification\":\n is_classification = True\nelif problem_category.lower() == \"clustering\":\n is_clustering = True\nelse:\n raise ValueError(\n \"Variable 'problem_category' must be either 'regression', 'classification', or 'clustering'. Check settings.py\")\n\n# The variables \"is_workflow_running_to_predict\" and \"is_workflow_running_to_train\" are used to control whether\n# the workflow is in a \"training\" mode or a \"prediction\" mode. The \"IS_WORKFLOW_RUNNING_TO_PREDICT\" variable is set by\n# an assignment unit in the \"Set Up the Job\" subworkflow that executes at the start of the job. It is automatically\n# changed when the predict workflow is generated, so users should not need to modify this variable.\nis_workflow_running_to_predict = {% raw %}{{IS_WORKFLOW_RUNNING_TO_PREDICT}}{% endraw %}\nis_workflow_running_to_train = not is_workflow_running_to_predict\n\n# Sets the datafile variable. The \"datafile\" is the data that will be read in, and will be used by subsequent\n# workflow units for either training or prediction, depending on the workflow mode.\nif is_workflow_running_to_predict:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\nelse:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\n\n# The \"Context\" class allows for data to be saved and loaded between units, and between train and predict runs.\n# Variables which have been saved using the \"Save\" method are written to disk, and the predict workflow is automatically\n# configured to obtain these files when it starts.\n#\n# IMPORTANT NOTE: Do *not* adjust the value of \"context_dir_pathname\" in the Context object. If the value is changed, then\n# files will not be correctly copied into the generated predict workflow. This will cause the predict workflow to be\n# generated in a broken state, and it will not be able to make any predictions.\nclass Context(object):\n \"\"\"\n Saves and loads objects from the disk, useful for preserving data between workflow units\n\n Attributes:\n context_paths (dict): Dictionary of the format {variable_name: path}, that governs where\n pickle saves files.\n\n Methods:\n save: Used to save objects to the context directory\n load: Used to load objects from the context directory\n \"\"\"\n\n def __init__(self, context_file_basename=\"workflow_context_file_mapping\"):\n \"\"\"\n Constructor for Context objects\n\n Args:\n context_file_basename (str): Name of the file to store context paths in\n \"\"\"\n\n # Warning: DO NOT modify the context_dir_pathname variable below\n # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n context_dir_pathname = \"{% raw %}{{ CONTEXT_DIR_RELATIVE_PATH }}{% endraw %}\"\n # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n self._context_dir_pathname = context_dir_pathname\n self._context_file = os.path.join(context_dir_pathname, context_file_basename)\n\n # Make context dir if it does not exist\n if not os.path.exists(context_dir_pathname):\n os.makedirs(context_dir_pathname)\n\n # Read in the context sources dictionary, if it exists\n if os.path.exists(self._context_file):\n with open(self._context_file, \"rb\") as file_handle:\n self.context_paths: dict = pickle.load(file_handle)\n else:\n # Items is a dictionary of {varname: path}\n self.context_paths = {}\n\n def __enter__(self):\n return self\n\n def __exit__(self, exc_type, exc_value, traceback):\n self._update_context()\n\n def __contains__(self, item):\n return item in self.context_paths\n\n def _update_context(self):\n with open(self._context_file, \"wb\") as file_handle:\n pickle.dump(self.context_paths, file_handle)\n\n def load(self, name: str):\n \"\"\"\n Returns a contextd object\n\n Args:\n name (str): The name in self.context_paths of the object\n \"\"\"\n path = self.context_paths[name]\n with open(path, \"rb\") as file_handle:\n obj = pickle.load(file_handle)\n return obj\n\n def save(self, obj: object, name: str):\n \"\"\"\n Saves an object to disk using pickle\n\n Args:\n name (str): Friendly name for the object, used for lookup in load() method\n obj (object): Object to store on disk\n \"\"\"\n path = os.path.join(self._context_dir_pathname, f\"{name}.pkl\")\n self.context_paths[name] = path\n with open(path, \"wb\") as file_handle:\n pickle.dump(obj, file_handle)\n self._update_context()\n\n# Generate a context object, so that the \"with settings.context\" can be used by other units in this workflow.\ncontext = Context()\n\nis_using_train_test_split = \"is_using_train_test_split\" in context and (context.load(\"is_using_train_test_split\"))\n\n# Create a Class for a DummyScaler()\nclass DummyScaler:\n \"\"\"\n This class is a 'DummyScaler' which trivially acts on data by returning it unchanged.\n \"\"\"\n\n def fit(self, X):\n return self\n\n def transform(self, X):\n return X\n\n def fit_transform(self, X):\n return X\n\n def inverse_transform(self, X):\n return X\n\nif 'target_scaler' not in context:\n context.save(DummyScaler(), 'target_scaler')\n","name":"pyml_settings.py","contextProviders":[{"name":"MLSettingsDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Custom workflow unit template for the Exabyte.io platform #\n# #\n# This file imports a set of workflow-specific context variables #\n# from settings.py. It then uses a context manager to save and #\n# load Python objects. When saved, these objects can then be #\n# loaded either later in the same workflow, or by subsequent #\n# predict jobs. #\n# #\n# Any pickle-able Python object can be saved using #\n# settings.context. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport settings\n\n# The context manager exists to facilitate\n# saving and loading objects across Python units within a workflow.\n\n# To load an object, simply do to \\`context.load(\"name-of-the-saved-object\")\\`\n# To save an object, simply do \\`context.save(\"name-for-the-object\", object_here)\\`\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Do some transformations to the data here\n\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n descriptors = context.load(\"descriptors\")\n\n # Do some predictions or transformation to the data here\n","name":"pyml_custom.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to read in data for the ML workflow. #\n# #\n# Also showcased here is the concept of branching based on #\n# whether the workflow is in \"train\" or \"predict\" mode. #\n# #\n# If the workflow is in \"training\" mode, it will read in the data #\n# before converting it to a Numpy array and save it for use #\n# later. During training, we already have values for the output, #\n# and this gets saved to \"target.\" #\n# #\n# Finally, whether the workflow is in training or predict mode, #\n# it will always read in a set of descriptors from a datafile #\n# defined in settings.py #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport sklearn.preprocessing\nimport settings\n\nwith settings.context as context:\n data = pandas.read_csv(settings.datafile)\n\n # Train\n # By default, we don't do train/test splitting: the train and test represent the same dataset at first.\n # Other units (such as a train/test splitter) down the line can adjust this as-needed.\n if settings.is_workflow_running_to_train:\n\n # Handle the case where we are clustering\n if settings.is_clustering:\n target = data.to_numpy()[:, 0] # Just get the first column, it's not going to get used anyway\n else:\n target = data.pop(settings.target_column_name).to_numpy()\n\n # Handle the case where we are classifying. In this case, we must convert any labels provided to be categorical.\n # Specifically, labels are encoded with values between 0 and (N_Classes - 1)\n if settings.is_classification:\n label_encoder = sklearn.preprocessing.LabelEncoder()\n target = label_encoder.fit_transform(target)\n context.save(label_encoder, \"label_encoder\")\n\n target = target.reshape(-1, 1) # Reshape array from a row vector into a column vector\n\n context.save(target, \"train_target\")\n context.save(target, \"test_target\")\n\n descriptors = data.to_numpy()\n\n context.save(descriptors, \"train_descriptors\")\n context.save(descriptors, \"test_descriptors\")\n\n else:\n descriptors = data.to_numpy()\n context.save(descriptors, \"descriptors\")\n","name":"data_input_read_csv_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to perform a train/test split #\n# #\n# Splits the dataset into a training and testing set. The #\n# variable `percent_held_as_test` controls how much of the #\n# input dataset is removed for use as a testing set. By default, #\n# this unit puts 20% of the dataset into the testing set, and #\n# places the remaining 80% into the training set. #\n# #\n# Does nothing in the case of predictions. #\n# #\n# ----------------------------------------------------------------- #\n\nimport sklearn.model_selection\nimport numpy as np\nimport settings\n\n# `percent_held_as_test` is the amount of the dataset held out as the testing set. If it is set to 0.2,\n# then 20% of the dataset is held out as a testing set. The remaining 80% is the training set.\npercent_held_as_test = {{ mlTrainTestSplit.fraction_held_as_test_set }}\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Load training data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n\n # Combine datasets to facilitate train/test split\n\n # Do train/test split\n train_descriptors, test_descriptors, train_target, test_target = sklearn.model_selection.train_test_split(\n train_descriptors, train_target, test_size=percent_held_as_test)\n\n # Set the flag for using a train/test split\n context.save(True, \"is_using_train_test_split\")\n\n # Save training data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"data_input_train_test_split_sklearn.py","contextProviders":[{"name":"MLTrainTestSplitDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn MinMax Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it is on interval #\n# [0,1]. It then saves the data for use further down #\n# the road in the workflow, for use in un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the min and max of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor MinMax Scaler\n scaler = sklearn.preprocessing.MinMaxScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_min_max_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Duplicates workflow unit #\n# #\n# This workflow unit drops all duplicate rows, if it is running #\n# in the \"train\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop duplicates from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n df = df.drop_duplicates()\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop duplicates from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.drop_duplicates()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_duplicates_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Missing Workflow Unit #\n# #\n# This workflow unit allows missing rows and/or columns to be #\n# dropped from the dataset by configuring the `to_drop` #\n# parameter. #\n# #\n# Valid values for `to_drop`: #\n# - \"rows\": rows with missing values will be removed #\n# - \"columns\": columns with missing values will be removed #\n# - \"both\": rows and columns with missing values will be removed #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\n# `to_drop` can either be \"rows\" or \"columns\"\n# If it is set to \"rows\" (by default), then all rows with missing values will be dropped.\n# If it is set to \"columns\", then all columns with missing values will be dropped.\n# If it is set to \"both\", then all rows and columns with missing values will be dropped.\nto_drop = \"rows\"\n\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop missing from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n\n directions = {\n \"rows\": (\"index\",),\n \"columns\": (\"columns\",),\n \"both\": (\"index\", \"columns\"),\n }[to_drop]\n for direction in directions:\n df = df.dropna(direction)\n\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop missing from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.dropna()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_missing_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn Standard Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it a mean of 0 and #\n# a standard deviation of 1. It then saves the data for use #\n# further down the road in the workflow, for use in #\n# un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the mean and variance of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor Scaler\n scaler = sklearn.preprocessing.StandardScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_standardization_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge-regression model in Scikit-Learn. #\n# Alpha is taken from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.AdaBoostRegressor(\n n_estimators=50,\n learning_rate=1,\n loss=\"linear\",\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"adaboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"adaboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_adaboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a bagged trees regression model with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.BaggingRegressor(\n n_estimators=10,\n max_samples=1.0,\n max_features=1.0,\n bootstrap=True,\n bootstrap_features=False,\n oob_score=False,\n verbose=0,\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"bagged_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"bagged_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_bagged_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for gradient-boosted tree regression with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. Note: In the gradient- #\n# boosted trees ensemble used, the weak learners used as #\n# estimators cannot be tuned with the same level of fidelity #\n# allowed in the adaptive-boosted trees ensemble. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.GradientBoostingRegressor(\n loss=\"ls\",\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion=\"friedman_mse\",\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n max_features=None,\n alpha=0.9,\n verbose=0,\n max_leaf_nodes=None,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees regression #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBRegressor(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='reg:squarederror',\n eval_metric='rmse')\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_regression\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_regression\")\n\n # Make some predictions and unscale\n predictions = model.predict(descriptors)\n predictions = predictions.reshape(-1, 1)\n target_scaler = context.load(\"target_scaler\")\n\n predictions = target_scaler.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\")\n","name":"model_extreme_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for k-means clustering. #\n# #\n# In k-means clustering, the labels are not provided ahead of #\n# time. Instead, one supplies the number of groups the #\n# algorithm should split the dataset into. Here, we set our #\n# own default of 4 groups (fewer than sklearn's default of 8). #\n# Otherwise, the default parameters of the clustering method #\n# are the same as in sklearn. #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.cluster\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Initialize the Model\n model = sklearn.cluster.KMeans(\n n_clusters=4,\n init=\"k-means++\",\n n_init=10,\n max_iter=300,\n tol=0.0001,\n copy_x=True,\n algorithm=\"auto\",\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors)\n context.save(model, \"k_means\")\n train_labels = model.predict(train_descriptors)\n test_labels = model.predict(test_descriptors)\n\n context.save(train_labels, \"train_labels\")\n context.save(test_labels, \"test_labels\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"k_means\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_k_means_clustering_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a kernelized ridge-regression model with #\n# Scikit-Learn. Model parameters are derived from Scikit- #\n# Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.kernel_ridge\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.kernel_ridge.KernelRidge(\n alpha=1.0,\n kernel=\"linear\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"kernel_ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"kernel_ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_kernel_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a LASSO-regression model with Scikit- #\n# Learn. Model parameters derived from Scikit-Learn's #\n# Defaults. Alpha has been lowered from the default of 1.0, to #\n# 0.1. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Lasso(\n alpha=0.1,\n fit_intercept=True,\n normalize=False,\n precompute=False,\n tol=0.0001,\n positive=True,\n selection=\"cyclic\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"LASSO\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"LASSO\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_lasso_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit to train a simple feedforward neural network #\n# model on a regression problem using scikit-learn. In this #\n# template, we use the default values for hidden_layer_sizes, #\n# activation, solver, and learning rate. Other parameters are #\n# available (consult the sklearn docs), but in this case, we #\n# only include those relevant to the Adam optimizer. Sklearn #\n# Docs: Sklearn docs:http://scikit-learn.org/stable/modules/ge #\n# nerated/sklearn.neural_network.MLPRegressor.html #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.neural_network\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.neural_network.MLPRegressor(\n hidden_layer_sizes=(100,),\n activation=\"relu\",\n solver=\"adam\",\n max_iter=300,\n early_stopping=False,\n validation_fraction=0.1,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"multilayer_perceptron\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"multilayer_perceptron\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_mlp_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a random forest classification model with #\n# Scikit-Learn. Parameters derived from Scikit-Learn's #\n# defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. When #\n# the workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a filee named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestClassifier(\n n_estimators=100,\n criterion=\"gini\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n oob_score=False,\n verbose=0,\n class_weight=None,\n ccp_alpha=0.0,\n max_samples=None,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target, test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for a gradient boosted classification model with #\n# Scikit-Learn. Parameters derived from sklearn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = sklearn.ensemble.GradientBoostingClassifier(loss='deviance',\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion='friedman_mse',\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n min_impurity_split=None,\n init=None,\n random_state=None,\n max_features=None,\n verbose=0,\n max_leaf_nodes=None,\n warm_start=False,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees classification #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBClassifier(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='binary:logistic',\n eval_metric='logloss',\n use_label_encoder=False)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_extreme_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow for a random forest regression model with Scikit- #\n# Learn. Parameters are derived from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestRegressor(\n n_estimators=100,\n criterion=\"mse\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n max_samples=None,\n oob_score=False,\n ccp_alpha=0.0,\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge regression model with Scikit- #\n# Learn. Alpha is taken from Scikit-Learn's default #\n# parameters. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Ridge(\n alpha=1.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Parity plot generation unit #\n# #\n# This unit generates a parity plot based on the known values #\n# in the training data, and the predicted values generated #\n# using the training data. #\n# #\n# Because this metric compares predictions versus a ground truth, #\n# it doesn't make sense to generate the plot when a predict #\n# workflow is being run (because in that case, we generally don't #\n# know the ground truth for the values being predicted). Hence, #\n# this unit does nothing if the workflow is in \"predict\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_predictions = context.load(\"train_predictions\")\n test_target = context.load(\"test_target\")\n test_predictions = context.load(\"test_predictions\")\n\n # Un-transform the data\n target_scaler = context.load(\"target_scaler\")\n train_target = target_scaler.inverse_transform(train_target)\n train_predictions = target_scaler.inverse_transform(train_predictions)\n test_target = target_scaler.inverse_transform(test_target)\n test_predictions = target_scaler.inverse_transform(test_predictions)\n\n # Plot the data\n plt.scatter(train_target, train_predictions, c=\"#203d78\", label=\"Training Set\")\n if settings.is_using_train_test_split:\n plt.scatter(test_target, test_predictions, c=\"#67ac5b\", label=\"Testing Set\")\n plt.xlabel(\"Actual Value\")\n plt.ylabel(\"Predicted Value\")\n\n # Scale the plot\n target_range = (min(min(train_target), min(test_target)),\n max(max(train_target), max(test_target)))\n predictions_range = (min(min(train_predictions), min(test_predictions)),\n max(max(train_predictions), max(test_predictions)))\n\n limits = (min(min(target_range), min(target_range)),\n max(max(predictions_range), max(predictions_range)))\n plt.xlim = (limits[0], limits[1])\n plt.ylim = (limits[0], limits[1])\n\n # Draw a parity line, as a guide to the eye\n plt.plot((limits[0], limits[1]), (limits[0], limits[1]), c=\"black\", linestyle=\"dotted\", label=\"Parity\")\n plt.legend()\n\n # Save the figure\n plt.tight_layout()\n plt.savefig(\"my_parity_plot.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_parity_plot_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Cluster Visualization #\n# #\n# This unit takes an N-dimensional feature space, and uses #\n# Principal-component Analysis (PCA) to project into a 2D space #\n# to facilitate plotting on a scatter plot. #\n# #\n# The 2D space we project into are the first two principal #\n# components identified in PCA, which are the two vectors with #\n# the highest variance. #\n# #\n# Wikipedia Article on PCA: #\n# https://en.wikipedia.org/wiki/Principal_component_analysis #\n# #\n# We then plot the labels assigned to the train an test set, #\n# and color by class. #\n# #\n# ----------------------------------------------------------------- #\n\nimport pandas as pd\nimport matplotlib.cm\nimport matplotlib.lines\nimport matplotlib.pyplot as plt\nimport sklearn.decomposition\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_labels = context.load(\"train_labels\")\n train_descriptors = context.load(\"train_descriptors\")\n test_labels = context.load(\"test_labels\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Unscale the descriptors\n descriptor_scaler = context.load(\"descriptor_scaler\")\n train_descriptors = descriptor_scaler.inverse_transform(train_descriptors)\n test_descriptors = descriptor_scaler.inverse_transform(test_descriptors)\n\n # We need at least 2 dimensions, exit if the dataset is 1D\n if train_descriptors.ndim < 2:\n raise ValueError(\"The train descriptors do not have enough dimensions to be plot in 2D\")\n\n # The data could be multidimensional. Let's do some PCA to get things into 2 dimensions.\n pca = sklearn.decomposition.PCA(n_components=2)\n train_descriptors = pca.fit_transform(train_descriptors)\n test_descriptors = pca.transform(test_descriptors)\n xlabel = \"Principle Component 1\"\n ylabel = \"Principle Component 2\"\n\n # Determine the labels we're going to be using, and generate their colors\n labels = set(train_labels)\n colors = {}\n for count, label in enumerate(labels):\n cm = matplotlib.cm.get_cmap('jet', len(labels))\n color = cm(count / len(labels))\n colors[label] = color\n train_colors = [colors[label] for label in train_labels]\n test_colors = [colors[label] for label in test_labels]\n\n # Train / Test Split Visualization\n plt.title(\"Train Test Split Visualization\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=\"#33548c\", marker=\"o\", label=\"Training Set\")\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=\"#F0B332\", marker=\"o\", label=\"Testing Set\")\n xmin, xmax, ymin, ymax = plt.axis()\n plt.legend()\n plt.tight_layout()\n plt.savefig(\"train_test_split.png\", dpi=600)\n plt.close()\n\n def clusters_legend(cluster_colors):\n \"\"\"\n Helper function that creates a legend, given the coloration by clusters.\n Args:\n cluster_colors: A dictionary of the form {cluster_number : color_value}\n\n Returns:\n None; just creates the legend and puts it on the plot\n \"\"\"\n legend_symbols = []\n for group, color in cluster_colors.items():\n label = f\"Cluster {group}\"\n legend_symbols.append(matplotlib.lines.Line2D([], [], color=color, marker=\"o\",\n linewidth=0, label=label))\n plt.legend(handles=legend_symbols)\n\n # Training Set Clusters\n plt.title(\"Training Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=train_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"train_clusters.png\", dpi=600)\n plt.close()\n\n # Testing Set Clusters\n plt.title(\"Testing Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=test_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"test_clusters.png\", dpi=600)\n plt.close()\n\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_pca_2d_clusters_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# ROC Curve Generator #\n# #\n# Computes and displays the Receiver Operating Characteristic #\n# (ROC) curve. This is restricted to binary classification tasks. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\nimport matplotlib.collections\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n test_target = context.load(\"test_target\").flatten()\n # Slice the first column because Sklearn's ROC curve prefers probabilities for the positive class\n test_probabilities = context.load(\"test_probabilities\")[:, 1]\n\n # Exit if there's more than one label in the predictions\n if len(set(test_target)) > 2:\n exit()\n\n # ROC curve function in sklearn prefers the positive class\n false_positive_rate, true_positive_rate, thresholds = sklearn.metrics.roc_curve(test_target, test_probabilities,\n pos_label=1)\n thresholds[0] -= 1 # Sklearn arbitrarily adds 1 to the first threshold\n roc_auc = np.round(sklearn.metrics.auc(false_positive_rate, true_positive_rate), 3)\n\n # Plot the curve\n fig, ax = plt.subplots()\n points = np.array([false_positive_rate, true_positive_rate]).T.reshape(-1, 1, 2)\n segments = np.concatenate([points[:-1], points[1:]], axis=1)\n norm = plt.Normalize(thresholds.min(), thresholds.max())\n lc = matplotlib.collections.LineCollection(segments, cmap='jet', norm=norm, linewidths=2)\n lc.set_array(thresholds)\n line = ax.add_collection(lc)\n fig.colorbar(line, ax=ax).set_label('Threshold')\n\n # Padding to ensure we see the line\n ax.margins(0.01)\n\n plt.title(f\"ROC curve, AUC={roc_auc}\")\n plt.xlabel(\"False Positive Rate\")\n plt.ylabel(\"True Positive Rate\")\n plt.tight_layout()\n plt.savefig(\"my_roc_curve.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_roc_curve_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"#!/bin/bash\n# ---------------------------------------------------------------- #\n# #\n# Example shell script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. shebang line is read from the first line above #\n# 2. based on shebang one of the shell types is selected: #\n# - /bin/bash #\n# - /bin/csh #\n# - /bin/tclsh #\n# - /bin/tcsh #\n# - /bin/zsh #\n# 3. runtime directory for this calculation is created #\n# 4. the content of the script is executed #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\necho \"Hello world!\"\n","name":"hello_world.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ---------------------------------------------------------------- #\n# #\n# Example job submission script for Exabyte.io platform #\n# #\n# Shows resource manager directives for: #\n# #\n# 1. the name of the job (-N) #\n# 2. the number of nodes to be used (-l nodes=) #\n# 3. the number of processors per node (-l ppn=) #\n# 4. the walltime in dd:hh:mm:ss format (-l walltime=) #\n# 5. queue (-q) D, OR, OF, SR, SF #\n# 6. merging standard output and error (-j oe) #\n# 7. email about job abort, begin, end (-m abe) #\n# 8. email address to use (-M) #\n# #\n# For more information visit https://docs.exabyte.io/cli/jobs #\n# ---------------------------------------------------------------- #\n\n#PBS -N ESPRESSO-TEST\n#PBS -j oe\n#PBS -l nodes=1\n#PBS -l ppn=1\n#PBS -l walltime=00:00:10:00\n#PBS -q D\n#PBS -m abe\n#PBS -M info@exabyte.io\n\n# load module\nmodule add espresso/540-i-174-impi-044\n\n# go to the job working directory\ncd $PBS_O_WORKDIR\n\n# create input file\ncat > pw.in < pw.out\n","name":"job_espresso_pw_scf.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\nmkdir -p {{ JOB_SCRATCH_DIR }}/outdir/_ph0\ncd {{ JOB_SCRATCH_DIR }}/outdir\ncp -r {{ JOB_WORK_DIR }}/../outdir/__prefix__.* .\n{%- endraw -%}\n","name":"espresso_link_outdir_save.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\ncp {{ JOB_SCRATCH_DIR }}/outdir/_ph0/__prefix__.phsave/dynmat* {{ JOB_WORK_DIR }}/../outdir/_ph0/__prefix__.phsave\n{%- endraw -%}\n","name":"espresso_collect_dynmat.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ------------------------------------------------------------------ #\n# This script prepares necessary directories to run VASP NEB\n# calculation. It puts initial POSCAR into directory 00, final into 0N\n# and intermediate images in 01 to 0(N-1). It is assumed that SCF\n# calculations for initial and final structures are already done in\n# previous subworkflows and their standard outputs are written into\n# \"vasp_neb_initial.out\" and \"vasp_neb_final.out\" files respectively.\n# These outputs are here copied into initial (00) and final (0N)\n# directories to calculate the reaction energy profile.\n# ------------------------------------------------------------------ #\n\n{% raw -%}\ncd {{ JOB_WORK_DIR }}\n{%- endraw %}\n\n# Prepare First Directory\nmkdir -p 00\ncat > 00/POSCAR < 0{{ input.INTERMEDIATE_IMAGES.length + 1 }}/POSCAR < 0{{ loop.index }}/POSCAR < Date: Sat, 20 Jan 2024 09:52:36 +0800 Subject: [PATCH 17/20] SOF-7194: set a default flavor for cp.x executable --- executables/espresso/cp.x.yml | 1 + src/js/data/tree.js | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/executables/espresso/cp.x.yml b/executables/espresso/cp.x.yml index a9a2017c..c600efc8 100644 --- a/executables/espresso/cp.x.yml +++ b/executables/espresso/cp.x.yml @@ -3,6 +3,7 @@ monitors: results: [] flavors: cp: + isDefault: true input: - name: cp.in results: [] diff --git a/src/js/data/tree.js b/src/js/data/tree.js index b951ae66..08ebb7a8 100644 --- a/src/js/data/tree.js +++ b/src/js/data/tree.js @@ -1,2 +1,2 @@ /* eslint-disable */ -module.exports = {applicationTree: {"deepmd":{"dp":{"isDefault":false,"monitors":["standard_output"],"results":[],"flavors":{"dp_train_se_e2_r":{"input":[{"name":"dp_train_se_e2_r.json"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"dp"}}},"lmp":{"isDefault":false,"monitors":["standard_output"],"results":[],"flavors":{"lammps":{"input":[{"name":"in.lammps"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"lmp"}}},"python":{"isDefault":true,"monitors":["standard_output"],"results":[],"flavors":{"qe_cp_to_deepmd":{"isDefault":true,"input":[{"name":"qe_cp_to_deepmd.py"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"python"},"qe_to_lmp_structure":{"isDefault":false,"input":[{"name":"qe_to_lmp_structure.py"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"python"}}}},"espresso":{"average.x":{"monitors":["standard_output"],"results":["average_potential_profile"],"flavors":{"average":{"input":[{"name":"average.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"average.x"},"average_potential":{"input":[{"name":"average.in"}],"results":["average_potential_profile"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"average.x"}}},"bands.x":{"monitors":["standard_output"],"results":["band_structure"],"flavors":{"bands":{"input":[{"name":"bands.in"}],"results":["band_structure"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"bands.x"}}},"cp.x":{"monitors":["standard_output"],"results":[],"flavors":{"cp":{"input":[{"name":"cp.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"cp.x"},"cp_wf":{"input":[{"name":"cp_wf.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"cp.x"}}},"dos.x":{"monitors":["standard_output"],"results":["density_of_states"],"flavors":{"dos":{"input":[{"name":"dos.in"}],"results":["density_of_states"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"dos.x"}}},"dynmat.x":{"monitors":["standard_output"],"results":[],"flavors":{"dynmat":{"input":[{"name":"dynmat_grid.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"dynmat.x"}}},"epsilon.x":{"monitors":["standard_output"],"results":["dielectric_tensor"],"flavors":{"dielectric_tensor":{"input":[{"name":"epsilon.in"}],"results":["dielectric_tensor"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"epsilon.x"}}},"gw.x":{"monitors":["standard_output"],"results":["band_structure","fermi_energy","band_gaps"],"flavors":{"gw_bands_plasmon_pole":{"input":[{"name":"gw_bands_plasmon_pole.in"}],"results":["band_structure","fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"gw.x"},"gw_bands_full_frequency":{"input":[{"name":"gw_bands_full_frequency.in"}],"results":["band_structure","fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"gw.x"}}},"hp.x":{"monitors":["standard_output"],"results":["hubbard_u","hubbard_v_nn","hubbard_v"],"flavors":{"hp":{"input":[{"name":"hp.in"}],"results":["hubbard_u","hubbard_v_nn","hubbard_v"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"hp.x"}},"supportedApplicationVersions":["7.0","7.2"]},"matdyn.x":{"monitors":["standard_output"],"results":["phonon_dos","phonon_dispersions"],"flavors":{"matdyn_grid":{"input":[{"name":"matdyn_grid.in"}],"monitors":["standard_output"],"results":["phonon_dos"],"applicationName":"espresso","executableName":"matdyn.x"},"matdyn_path":{"input":[{"name":"matdyn_path.in"}],"monitors":["standard_output"],"results":["phonon_dispersions"],"applicationName":"espresso","executableName":"matdyn.x"}}},"neb.x":{"monitors":["standard_output"],"results":["reaction_energy_barrier","reaction_energy_profile"],"flavors":{"neb":{"isMultiMaterial":true,"input":[{"name":"neb.in"}],"results":["reaction_energy_barrier","reaction_energy_profile"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"neb.x"}}},"ph.x":{"monitors":["standard_output"],"results":["phonon_dos","phonon_dispersions","zero_point_energy"],"flavors":{"ph_path":{"input":[{"name":"ph_path.in"}],"results":["phonon_dispersions"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_grid":{"input":[{"name":"ph_grid.in"}],"results":["phonon_dos"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_gamma":{"input":[{"name":"ph_gamma.in"}],"results":["zero_point_energy"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_init_qpoints":{"input":[{"name":"ph_init_qpoints.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_grid_restart":{"input":[{"name":"ph_grid_restart.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_single_irr_qpt":{"input":[{"name":"ph_single_irr_qpt.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"}}},"pp.x":{"monitors":["standard_output"],"results":[],"flavors":{"pp_density":{"input":[{"name":"pp_density.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pp.x"},"pp_electrostatic_potential":{"input":[{"name":"pp_electrostatic_potential.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pp.x"}}},"projwfc.x":{"monitors":["standard_output"],"results":["density_of_states"],"flavors":{"projwfc":{"input":[{"name":"projwfc.in"}],"results":["density_of_states"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"projwfc.x"}}},"pw.x":{"isDefault":true,"hasAdvancedComputeOptions":true,"postProcessors":["remove_non_zero_weight_kpoints"],"monitors":["standard_output","convergence_ionic","convergence_electronic"],"results":["atomic_forces","band_gaps","charge_density_profile","density_of_states","fermi_energy","final_structure","magnetic_moments","potential_profile","pressure","reaction_energy_barrier","reaction_energy_profile","stress_tensor","total_energy","total_energy_contributions","total_force"],"flavors":{"pw_scf":{"isDefault":true,"input":[{"name":"pw_scf.in"}],"results":["atomic_forces","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_bands_hse":{"input":[{"name":"pw_scf_bands_hse.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_hse":{"input":[{"name":"pw_scf_hse.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_dft_u":{"input":[{"name":"pw_scf_dft_u.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u+v":{"input":[{"name":"pw_scf_dft_v.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u+j":{"input":[{"name":"pw_scf_dft_j.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u_legacy":{"input":[{"name":"pw_scf_dft_u_legacy.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["5.2.1","5.4.0","6.0.0","6.3","6.4.1","6.5.0","6.6.0","6.7.0","6.8.0","7.0"]},"pw_esm":{"input":[{"name":"pw_esm.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor","potential_profile","charge_density_profile"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_esm_relax":{"input":[{"name":"pw_esm_relax.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor","potential_profile","charge_density_profile"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_nscf":{"input":[{"name":"pw_nscf.in"}],"results":["fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pw.x"},"pw_bands":{"input":[{"name":"pw_bands.in"}],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pw.x"},"pw_relax":{"input":[{"name":"pw_relax.in"}],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor","final_structure"],"applicationName":"espresso","executableName":"pw.x"},"pw_vc-relax":{"input":[{"name":"pw_vc_relax.in"}],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor","final_structure"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_kpt_conv":{"input":[{"name":"pw_scf_kpt_conv.in"}],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"}}},"q2r.x":{"monitors":["standard_output"],"results":[],"flavors":{"q2r":{"input":[{"name":"q2r.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"q2r.x"}}}},"jupyterLab":{"jupyter":{"isDefault":true,"monitors":["standard_output","jupyter_notebook_endpoint"],"results":[],"flavors":{"notebook":{"isDefault":true,"input":[{"name":"requirements.txt","templateName":"requirements.txt"}],"monitors":["standard_output","jupyter_notebook_endpoint"],"applicationName":"jupyterLab","executableName":"jupyter"}}}},"exabyteml":{"score":{"isDefault":false,"monitors":["standard_output"],"results":["predicted_properties"],"flavors":{"score":{"isDefault":true,"input":[],"monitors":["standard_output"],"applicationName":"exabyteml","executableName":"score"}}},"train":{"isDefault":true,"monitors":["standard_output"],"results":["workflow:ml_predict"],"flavors":{"train":{"isDefault":true,"input":[],"monitors":["standard_output"],"applicationName":"exabyteml","executableName":"train"}}}},"nwchem":{"nwchem":{"isDefault":true,"hasAdvancedComputeOptions":false,"postProcessors":["error_handler"],"monitors":["standard_output"],"results":["total_energy","total_energy_contributions"],"flavors":{"nwchem_total_energy":{"isDefault":true,"input":[{"name":"nwchem_total_energy.inp"}],"results":["total_energy","total_energy_contributions"],"monitors":["standard_output"],"applicationName":"nwchem","executableName":"nwchem"}}}},"python":{"python":{"isDefault":true,"monitors":["standard_output"],"results":["file_content","workflow:pyml_predict"],"flavors":{"hello_world":{"isDefault":true,"input":[{"name":"script.py","templateName":"hello_world.py"},{"name":"requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"espresso_xml_get_qpt_irr":{"input":[{"name":"espresso_xml_get_qpt_irr.py"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"espresso_extract_kpoints":{"input":[{"name":"espresso_extract_kpoints.py"},{"name":"requirements.txt","templateName":"requirements_empty.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"generic:post_processing:plot:matplotlib":{"input":[{"name":"plot.py","templateName":"matplotlib_basic.py"},{"name":"requirements.txt","templateName":"processing_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"generic:processing:find_extrema:scipy":{"input":[{"name":"find_extrema.py","templateName":"find_extrema.py"},{"name":"requirements.txt","templateName":"processing_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:setup_variables_packages":{"input":[{"name":"settings.py","templateName":"pyml_settings.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:custom":{"input":[{"name":"pyml_custom.py","templateName":"pyml_custom.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:data_input:read_csv:pandas":{"input":[{"name":"data_input_read_csv_pandas.py","templateName":"data_input_read_csv_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:data_input:train_test_split:sklearn":{"input":[{"name":"data_input_train_test_split_sklearn.py","templateName":"data_input_train_test_split_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:min_max_scaler:sklearn":{"input":[{"name":"pre_processing_min_max_sklearn.py","templateName":"pre_processing_min_max_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:remove_duplicates:pandas":{"input":[{"name":"pre_processing_remove_duplicates_pandas.py","templateName":"pre_processing_remove_duplicates_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:remove_missing:pandas":{"input":[{"name":"pre_processing_remove_missing_pandas.py","templateName":"pre_processing_remove_missing_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:standardization:sklearn":{"input":[{"name":"pre_processing_standardization_sklearn.py","templateName":"pre_processing_standardization_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:adaboosted_trees_regression:sklearn":{"input":[{"name":"model_adaboosted_trees_regression_sklearn.py","templateName":"model_adaboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"results":["workflow:pyml_predict"],"applicationName":"python","executableName":"python"},"pyml:model:bagged_trees_regression:sklearn":{"input":[{"name":"model_bagged_trees_regression_sklearn.py","templateName":"model_bagged_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:gradboosted_trees_regression:sklearn":{"input":[{"name":"model_gradboosted_trees_regression_sklearn.py","templateName":"model_gradboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:extreme_gradboosted_trees_regression:sklearn":{"input":[{"name":"model_extreme_gradboosted_trees_regression_sklearn.py","templateName":"model_extreme_gradboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:k_means_clustering:sklearn":{"input":[{"name":"model_k_means_clustering_sklearn.py","templateName":"model_k_means_clustering_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:kernel_ridge_regression:sklearn":{"input":[{"name":"model_kernel_ridge_regression_sklearn.py","templateName":"model_kernel_ridge_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:lasso_regression:sklearn":{"input":[{"name":"model_lasso_regression_sklearn.py","templateName":"model_lasso_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:multilayer_perceptron:sklearn":{"input":[{"name":"model_mlp_sklearn.py","templateName":"model_mlp_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:random_forest_classification:sklearn":{"input":[{"name":"model_random_forest_classification_sklearn.py","templateName":"model_random_forest_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:gradboosted_trees_classification:sklearn":{"input":[{"name":"model_gradboosted_trees_classification_sklearn.py","templateName":"model_gradboosted_trees_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:extreme_gradboosted_trees_classification:sklearn":{"input":[{"name":"model_extreme_gradboosted_trees_classification_sklearn.py","templateName":"model_extreme_gradboosted_trees_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:random_forest_regression:sklearn":{"input":[{"name":"model_random_forest_regression_sklearn.py","templateName":"model_random_forest_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:ridge_regression:sklearn":{"input":[{"name":"model_ridge_regression_sklearn.py","templateName":"model_ridge_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:parity_plot:matplotlib":{"input":[{"name":"post_processing_parity_plot_matplotlib.py","templateName":"post_processing_parity_plot_matplotlib.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:pca_2d_clusters:matplotlib":{"input":[{"name":"post_processing_pca_2d_clusters_matplotlib.py","templateName":"post_processing_pca_2d_clusters_matplotlib.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:roc_curve:sklearn":{"input":[{"name":"post_processing_roc_curve_sklearn.py","templateName":"post_processing_roc_curve_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"}}}},"shell":{"sh":{"isDefault":true,"monitors":["standard_output"],"results":["atomic_forces","band_gaps","band_structure","density_of_states","fermi_energy","phonon_dispersions","phonon_dos","pressure","stress_tensor","total_energy","total_energy_contributions","total_force","zero_point_energy","final_structure","magnetic_moments","reaction_energy_barrier","reaction_energy_profile","potential_profile","charge_density_profile"],"flavors":{"hello_world":{"isDefault":true,"input":[{"name":"hello_world.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"job_espresso_pw_scf":{"input":[{"name":"job_espresso_pw_scf.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"espresso_link_outdir_save":{"input":[{"name":"espresso_link_outdir_save.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"espresso_collect_dynmat":{"input":[{"name":"espresso_collect_dynmat.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"bash_vasp_prepare_neb_images":{"isMultiMaterial":true,"input":[{"name":"bash_vasp_prepare_neb_images.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"}}}},"vasp":{"vasp":{"isDefault":true,"postProcessors":["error_handler","prepare_restart","remove_non_zero_weight_kpoints"],"monitors":["standard_output","convergence_ionic","convergence_electronic"],"results":["atomic_forces","band_gaps","band_structure","density_of_states","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force","zero_point_energy","final_structure","magnetic_moments","reaction_energy_barrier","reaction_energy_profile","potential_profile","charge_density_profile"],"flavors":{"vasp":{"isDefault":true,"input":[{"name":"INCAR"},{"name":"KPOINTS"},{"name":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_bands":{"input":[{"name":"INCAR","templateName":"INCAR_BANDS"},{"name":"KPOINTS","templateName":"KPOINTS_BANDS"},{"name":"POSCAR","templateName":""}],"results":["band_structure"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_nscf":{"input":[{"name":"INCAR","templateName":"INCAR_BANDS"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["band_gaps","fermi_energy"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_HSE"},{"name":"KPOINTS"},{"name":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_bands_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_BANDS_HSE"},{"name":"KPOINTS","templateName":"KPOINTS_BANDS"},{"name":"POSCAR","templateName":""}],"results":["band_structure"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_nscf_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_BANDS_HSE"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["band_gaps","fermi_energy"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_relax":{"input":[{"name":"INCAR","templateName":"INCAR_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","atomic_forces","fermi_energy","pressure","stress_tensor","total_force","final_structure"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"postProcessors":["prepare_restart"],"applicationName":"vasp","executableName":"vasp"},"vasp_vc_relax":{"input":[{"name":"INCAR","templateName":"INCAR_VC_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","atomic_forces","fermi_energy","pressure","stress_tensor","total_force","final_structure"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"postProcessors":["prepare_restart"],"applicationName":"vasp","executableName":"vasp"},"vasp_zpe":{"input":[{"name":"INCAR","templateName":"INCAR_ZPE"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","fermi_energy","pressure","atomic_forces","stress_tensor","total_force","zero_point_energy"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"applicationName":"vasp","executableName":"vasp"},"vasp_kpt_conv":{"input":[{"name":"INCAR","templateName":"INCAR"},{"name":"KPOINTS","templateName":"KPOINTS_CONV"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_vc_relax_conv":{"input":[{"name":"INCAR","templateName":"INCAR_VC_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS_CONV"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB"},{"name":"KPOINTS","templateName":"KPOINTS"}],"results":["reaction_energy_barrier","reaction_energy_profile"],"monitors":["standard_output"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb_initial":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB_INITIAL_FINAL"},{"name":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR_NEB_INITIAL"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb_final":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB_INITIAL_FINAL"},{"name":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR_NEB_FINAL"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"}}}}}} +module.exports = {applicationTree: {"deepmd":{"dp":{"isDefault":false,"monitors":["standard_output"],"results":[],"flavors":{"dp_train_se_e2_r":{"input":[{"name":"dp_train_se_e2_r.json"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"dp"}}},"lmp":{"isDefault":false,"monitors":["standard_output"],"results":[],"flavors":{"lammps":{"input":[{"name":"in.lammps"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"lmp"}}},"python":{"isDefault":true,"monitors":["standard_output"],"results":[],"flavors":{"qe_cp_to_deepmd":{"isDefault":true,"input":[{"name":"qe_cp_to_deepmd.py"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"python"},"qe_to_lmp_structure":{"isDefault":false,"input":[{"name":"qe_to_lmp_structure.py"}],"results":[],"monitors":["standard_output"],"applicationName":"deepmd","executableName":"python"}}}},"espresso":{"average.x":{"monitors":["standard_output"],"results":["average_potential_profile"],"flavors":{"average":{"input":[{"name":"average.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"average.x"},"average_potential":{"input":[{"name":"average.in"}],"results":["average_potential_profile"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"average.x"}}},"bands.x":{"monitors":["standard_output"],"results":["band_structure"],"flavors":{"bands":{"input":[{"name":"bands.in"}],"results":["band_structure"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"bands.x"}}},"cp.x":{"monitors":["standard_output"],"results":[],"flavors":{"cp":{"isDefault":true,"input":[{"name":"cp.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"cp.x"},"cp_wf":{"input":[{"name":"cp_wf.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"cp.x"}}},"dos.x":{"monitors":["standard_output"],"results":["density_of_states"],"flavors":{"dos":{"input":[{"name":"dos.in"}],"results":["density_of_states"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"dos.x"}}},"dynmat.x":{"monitors":["standard_output"],"results":[],"flavors":{"dynmat":{"input":[{"name":"dynmat_grid.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"dynmat.x"}}},"epsilon.x":{"monitors":["standard_output"],"results":["dielectric_tensor"],"flavors":{"dielectric_tensor":{"input":[{"name":"epsilon.in"}],"results":["dielectric_tensor"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"epsilon.x"}}},"gw.x":{"monitors":["standard_output"],"results":["band_structure","fermi_energy","band_gaps"],"flavors":{"gw_bands_plasmon_pole":{"input":[{"name":"gw_bands_plasmon_pole.in"}],"results":["band_structure","fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"gw.x"},"gw_bands_full_frequency":{"input":[{"name":"gw_bands_full_frequency.in"}],"results":["band_structure","fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"gw.x"}}},"hp.x":{"monitors":["standard_output"],"results":["hubbard_u","hubbard_v_nn","hubbard_v"],"flavors":{"hp":{"input":[{"name":"hp.in"}],"results":["hubbard_u","hubbard_v_nn","hubbard_v"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"hp.x"}},"supportedApplicationVersions":["7.0","7.2"]},"matdyn.x":{"monitors":["standard_output"],"results":["phonon_dos","phonon_dispersions"],"flavors":{"matdyn_grid":{"input":[{"name":"matdyn_grid.in"}],"monitors":["standard_output"],"results":["phonon_dos"],"applicationName":"espresso","executableName":"matdyn.x"},"matdyn_path":{"input":[{"name":"matdyn_path.in"}],"monitors":["standard_output"],"results":["phonon_dispersions"],"applicationName":"espresso","executableName":"matdyn.x"}}},"neb.x":{"monitors":["standard_output"],"results":["reaction_energy_barrier","reaction_energy_profile"],"flavors":{"neb":{"isMultiMaterial":true,"input":[{"name":"neb.in"}],"results":["reaction_energy_barrier","reaction_energy_profile"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"neb.x"}}},"ph.x":{"monitors":["standard_output"],"results":["phonon_dos","phonon_dispersions","zero_point_energy"],"flavors":{"ph_path":{"input":[{"name":"ph_path.in"}],"results":["phonon_dispersions"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_grid":{"input":[{"name":"ph_grid.in"}],"results":["phonon_dos"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_gamma":{"input":[{"name":"ph_gamma.in"}],"results":["zero_point_energy"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_init_qpoints":{"input":[{"name":"ph_init_qpoints.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_grid_restart":{"input":[{"name":"ph_grid_restart.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"},"ph_single_irr_qpt":{"input":[{"name":"ph_single_irr_qpt.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"ph.x"}}},"pp.x":{"monitors":["standard_output"],"results":[],"flavors":{"pp_density":{"input":[{"name":"pp_density.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pp.x"},"pp_electrostatic_potential":{"input":[{"name":"pp_electrostatic_potential.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pp.x"}}},"projwfc.x":{"monitors":["standard_output"],"results":["density_of_states"],"flavors":{"projwfc":{"input":[{"name":"projwfc.in"}],"results":["density_of_states"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"projwfc.x"}}},"pw.x":{"isDefault":true,"hasAdvancedComputeOptions":true,"postProcessors":["remove_non_zero_weight_kpoints"],"monitors":["standard_output","convergence_ionic","convergence_electronic"],"results":["atomic_forces","band_gaps","charge_density_profile","density_of_states","fermi_energy","final_structure","magnetic_moments","potential_profile","pressure","reaction_energy_barrier","reaction_energy_profile","stress_tensor","total_energy","total_energy_contributions","total_force"],"flavors":{"pw_scf":{"isDefault":true,"input":[{"name":"pw_scf.in"}],"results":["atomic_forces","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_bands_hse":{"input":[{"name":"pw_scf_bands_hse.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_hse":{"input":[{"name":"pw_scf_hse.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_dft_u":{"input":[{"name":"pw_scf_dft_u.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u+v":{"input":[{"name":"pw_scf_dft_v.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u+j":{"input":[{"name":"pw_scf_dft_j.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["7.2"]},"pw_scf_dft_u_legacy":{"input":[{"name":"pw_scf_dft_u_legacy.in"}],"results":["atomic_forces","band_gaps","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x","supportedApplicationVersions":["5.2.1","5.4.0","6.0.0","6.3","6.4.1","6.5.0","6.6.0","6.7.0","6.8.0","7.0"]},"pw_esm":{"input":[{"name":"pw_esm.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor","potential_profile","charge_density_profile"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_esm_relax":{"input":[{"name":"pw_esm_relax.in"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor","potential_profile","charge_density_profile"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"},"pw_nscf":{"input":[{"name":"pw_nscf.in"}],"results":["fermi_energy","band_gaps"],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pw.x"},"pw_bands":{"input":[{"name":"pw_bands.in"}],"monitors":["standard_output"],"applicationName":"espresso","executableName":"pw.x"},"pw_relax":{"input":[{"name":"pw_relax.in"}],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor","final_structure"],"applicationName":"espresso","executableName":"pw.x"},"pw_vc-relax":{"input":[{"name":"pw_vc_relax.in"}],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor","final_structure"],"applicationName":"espresso","executableName":"pw.x"},"pw_scf_kpt_conv":{"input":[{"name":"pw_scf_kpt_conv.in"}],"results":["total_energy","fermi_energy","pressure","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"espresso","executableName":"pw.x"}}},"q2r.x":{"monitors":["standard_output"],"results":[],"flavors":{"q2r":{"input":[{"name":"q2r.in"}],"results":[],"monitors":["standard_output"],"applicationName":"espresso","executableName":"q2r.x"}}}},"jupyterLab":{"jupyter":{"isDefault":true,"monitors":["standard_output","jupyter_notebook_endpoint"],"results":[],"flavors":{"notebook":{"isDefault":true,"input":[{"name":"requirements.txt","templateName":"requirements.txt"}],"monitors":["standard_output","jupyter_notebook_endpoint"],"applicationName":"jupyterLab","executableName":"jupyter"}}}},"exabyteml":{"score":{"isDefault":false,"monitors":["standard_output"],"results":["predicted_properties"],"flavors":{"score":{"isDefault":true,"input":[],"monitors":["standard_output"],"applicationName":"exabyteml","executableName":"score"}}},"train":{"isDefault":true,"monitors":["standard_output"],"results":["workflow:ml_predict"],"flavors":{"train":{"isDefault":true,"input":[],"monitors":["standard_output"],"applicationName":"exabyteml","executableName":"train"}}}},"nwchem":{"nwchem":{"isDefault":true,"hasAdvancedComputeOptions":false,"postProcessors":["error_handler"],"monitors":["standard_output"],"results":["total_energy","total_energy_contributions"],"flavors":{"nwchem_total_energy":{"isDefault":true,"input":[{"name":"nwchem_total_energy.inp"}],"results":["total_energy","total_energy_contributions"],"monitors":["standard_output"],"applicationName":"nwchem","executableName":"nwchem"}}}},"python":{"python":{"isDefault":true,"monitors":["standard_output"],"results":["file_content","workflow:pyml_predict"],"flavors":{"hello_world":{"isDefault":true,"input":[{"name":"script.py","templateName":"hello_world.py"},{"name":"requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"espresso_xml_get_qpt_irr":{"input":[{"name":"espresso_xml_get_qpt_irr.py"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"espresso_extract_kpoints":{"input":[{"name":"espresso_extract_kpoints.py"},{"name":"requirements.txt","templateName":"requirements_empty.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"generic:post_processing:plot:matplotlib":{"input":[{"name":"plot.py","templateName":"matplotlib_basic.py"},{"name":"requirements.txt","templateName":"processing_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"generic:processing:find_extrema:scipy":{"input":[{"name":"find_extrema.py","templateName":"find_extrema.py"},{"name":"requirements.txt","templateName":"processing_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:setup_variables_packages":{"input":[{"name":"settings.py","templateName":"pyml_settings.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:custom":{"input":[{"name":"pyml_custom.py","templateName":"pyml_custom.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:data_input:read_csv:pandas":{"input":[{"name":"data_input_read_csv_pandas.py","templateName":"data_input_read_csv_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:data_input:train_test_split:sklearn":{"input":[{"name":"data_input_train_test_split_sklearn.py","templateName":"data_input_train_test_split_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:min_max_scaler:sklearn":{"input":[{"name":"pre_processing_min_max_sklearn.py","templateName":"pre_processing_min_max_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:remove_duplicates:pandas":{"input":[{"name":"pre_processing_remove_duplicates_pandas.py","templateName":"pre_processing_remove_duplicates_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:remove_missing:pandas":{"input":[{"name":"pre_processing_remove_missing_pandas.py","templateName":"pre_processing_remove_missing_pandas.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:pre_processing:standardization:sklearn":{"input":[{"name":"pre_processing_standardization_sklearn.py","templateName":"pre_processing_standardization_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:adaboosted_trees_regression:sklearn":{"input":[{"name":"model_adaboosted_trees_regression_sklearn.py","templateName":"model_adaboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"monitors":["standard_output"],"results":["workflow:pyml_predict"],"applicationName":"python","executableName":"python"},"pyml:model:bagged_trees_regression:sklearn":{"input":[{"name":"model_bagged_trees_regression_sklearn.py","templateName":"model_bagged_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:gradboosted_trees_regression:sklearn":{"input":[{"name":"model_gradboosted_trees_regression_sklearn.py","templateName":"model_gradboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:extreme_gradboosted_trees_regression:sklearn":{"input":[{"name":"model_extreme_gradboosted_trees_regression_sklearn.py","templateName":"model_extreme_gradboosted_trees_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:k_means_clustering:sklearn":{"input":[{"name":"model_k_means_clustering_sklearn.py","templateName":"model_k_means_clustering_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:kernel_ridge_regression:sklearn":{"input":[{"name":"model_kernel_ridge_regression_sklearn.py","templateName":"model_kernel_ridge_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:lasso_regression:sklearn":{"input":[{"name":"model_lasso_regression_sklearn.py","templateName":"model_lasso_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:multilayer_perceptron:sklearn":{"input":[{"name":"model_mlp_sklearn.py","templateName":"model_mlp_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:random_forest_classification:sklearn":{"input":[{"name":"model_random_forest_classification_sklearn.py","templateName":"model_random_forest_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:gradboosted_trees_classification:sklearn":{"input":[{"name":"model_gradboosted_trees_classification_sklearn.py","templateName":"model_gradboosted_trees_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:extreme_gradboosted_trees_classification:sklearn":{"input":[{"name":"model_extreme_gradboosted_trees_classification_sklearn.py","templateName":"model_extreme_gradboosted_trees_classification_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:random_forest_regression:sklearn":{"input":[{"name":"model_random_forest_regression_sklearn.py","templateName":"model_random_forest_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:model:ridge_regression:sklearn":{"input":[{"name":"model_ridge_regression_sklearn.py","templateName":"model_ridge_regression_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["workflow:pyml_predict"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:parity_plot:matplotlib":{"input":[{"name":"post_processing_parity_plot_matplotlib.py","templateName":"post_processing_parity_plot_matplotlib.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:pca_2d_clusters:matplotlib":{"input":[{"name":"post_processing_pca_2d_clusters_matplotlib.py","templateName":"post_processing_pca_2d_clusters_matplotlib.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"},"pyml:post_processing:roc_curve:sklearn":{"input":[{"name":"post_processing_roc_curve_sklearn.py","templateName":"post_processing_roc_curve_sklearn.py"},{"name":"requirements.txt","templateName":"pyml_requirements.txt"}],"results":["file_content"],"monitors":["standard_output"],"applicationName":"python","executableName":"python"}}}},"shell":{"sh":{"isDefault":true,"monitors":["standard_output"],"results":["atomic_forces","band_gaps","band_structure","density_of_states","fermi_energy","phonon_dispersions","phonon_dos","pressure","stress_tensor","total_energy","total_energy_contributions","total_force","zero_point_energy","final_structure","magnetic_moments","reaction_energy_barrier","reaction_energy_profile","potential_profile","charge_density_profile"],"flavors":{"hello_world":{"isDefault":true,"input":[{"name":"hello_world.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"job_espresso_pw_scf":{"input":[{"name":"job_espresso_pw_scf.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"espresso_link_outdir_save":{"input":[{"name":"espresso_link_outdir_save.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"espresso_collect_dynmat":{"input":[{"name":"espresso_collect_dynmat.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"},"bash_vasp_prepare_neb_images":{"isMultiMaterial":true,"input":[{"name":"bash_vasp_prepare_neb_images.sh"}],"monitors":["standard_output"],"applicationName":"shell","executableName":"sh"}}}},"vasp":{"vasp":{"isDefault":true,"postProcessors":["error_handler","prepare_restart","remove_non_zero_weight_kpoints"],"monitors":["standard_output","convergence_ionic","convergence_electronic"],"results":["atomic_forces","band_gaps","band_structure","density_of_states","fermi_energy","pressure","stress_tensor","total_energy","total_energy_contributions","total_force","zero_point_energy","final_structure","magnetic_moments","reaction_energy_barrier","reaction_energy_profile","potential_profile","charge_density_profile"],"flavors":{"vasp":{"isDefault":true,"input":[{"name":"INCAR"},{"name":"KPOINTS"},{"name":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_bands":{"input":[{"name":"INCAR","templateName":"INCAR_BANDS"},{"name":"KPOINTS","templateName":"KPOINTS_BANDS"},{"name":"POSCAR","templateName":""}],"results":["band_structure"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_nscf":{"input":[{"name":"INCAR","templateName":"INCAR_BANDS"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["band_gaps","fermi_energy"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_HSE"},{"name":"KPOINTS"},{"name":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_bands_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_BANDS_HSE"},{"name":"KPOINTS","templateName":"KPOINTS_BANDS"},{"name":"POSCAR","templateName":""}],"results":["band_structure"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_nscf_hse":{"isDefault":false,"input":[{"name":"INCAR","templateName":"INCAR_BANDS_HSE"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["band_gaps","fermi_energy"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_relax":{"input":[{"name":"INCAR","templateName":"INCAR_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","atomic_forces","fermi_energy","pressure","stress_tensor","total_force","final_structure"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"postProcessors":["prepare_restart"],"applicationName":"vasp","executableName":"vasp"},"vasp_vc_relax":{"input":[{"name":"INCAR","templateName":"INCAR_VC_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","atomic_forces","fermi_energy","pressure","stress_tensor","total_force","final_structure"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"postProcessors":["prepare_restart"],"applicationName":"vasp","executableName":"vasp"},"vasp_zpe":{"input":[{"name":"INCAR","templateName":"INCAR_ZPE"},{"name":"KPOINTS","templateName":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","fermi_energy","pressure","atomic_forces","stress_tensor","total_force","zero_point_energy"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"applicationName":"vasp","executableName":"vasp"},"vasp_kpt_conv":{"input":[{"name":"INCAR","templateName":"INCAR"},{"name":"KPOINTS","templateName":"KPOINTS_CONV"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_vc_relax_conv":{"input":[{"name":"INCAR","templateName":"INCAR_VC_RELAX"},{"name":"KPOINTS","templateName":"KPOINTS_CONV"},{"name":"POSCAR","templateName":"POSCAR"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic","convergence_ionic"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB"},{"name":"KPOINTS","templateName":"KPOINTS"}],"results":["reaction_energy_barrier","reaction_energy_profile"],"monitors":["standard_output"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb_initial":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB_INITIAL_FINAL"},{"name":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR_NEB_INITIAL"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"},"vasp_neb_final":{"isMultiMaterial":true,"input":[{"name":"INCAR","templateName":"INCAR_NEB_INITIAL_FINAL"},{"name":"KPOINTS"},{"name":"POSCAR","templateName":"POSCAR_NEB_FINAL"}],"results":["total_energy","total_energy_contributions","pressure","fermi_energy","atomic_forces","total_force","stress_tensor"],"monitors":["standard_output","convergence_electronic"],"applicationName":"vasp","executableName":"vasp"}}}}}} From 1234dde5dc90276f3672fa3c0349d0ccaa81270d Mon Sep 17 00:00:00 2001 From: Timur Bazhirov Date: Mon, 22 Jan 2024 17:23:53 -0800 Subject: [PATCH 18/20] Update cp.j2.in --- assets/espresso/cp.j2.in | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/assets/espresso/cp.j2.in b/assets/espresso/cp.j2.in index d0e50787..9cb1b5f5 100644 --- a/assets/espresso/cp.j2.in +++ b/assets/espresso/cp.j2.in @@ -27,8 +27,9 @@ electron_dynamics = 'verlet' electron_velocities = 'zero' emass = {{ dynamics.electronMass }} - orthogonalization = 'ortho' - ortho_eps = 1d-11 +!! consider the below parameters if orthogonalization fails +! orthogonalization = 'ortho' +! ortho_eps = 1d-11 / &IONS ion_dynamics = 'verlet' From c76be31b81f4112f14b8f7460fa251324cec8b6f Mon Sep 17 00:00:00 2001 From: Pranab Das <31024886+pranabdas@users.noreply.github.com> Date: Tue, 23 Jan 2024 19:53:11 +0800 Subject: [PATCH 19/20] chore (SOF-7194): some renamings --- ...{qe_cp_to_deepmd.j2.py => espresso_cp_to_deepmd.j2.py} | 0 ...structure.j2.py => espresso_to_lammps_structure.j2.py} | 0 executables/deepmd/lmp.yml | 2 +- executables/deepmd/python.yml | 8 ++++---- src/js/data/templates.js | 2 +- src/js/data/tree.js | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) rename assets/deepmd/{qe_cp_to_deepmd.j2.py => espresso_cp_to_deepmd.j2.py} (100%) rename assets/deepmd/{qe_to_lmp_structure.j2.py => espresso_to_lammps_structure.j2.py} (100%) diff --git a/assets/deepmd/qe_cp_to_deepmd.j2.py b/assets/deepmd/espresso_cp_to_deepmd.j2.py similarity index 100% rename from assets/deepmd/qe_cp_to_deepmd.j2.py rename to assets/deepmd/espresso_cp_to_deepmd.j2.py diff --git a/assets/deepmd/qe_to_lmp_structure.j2.py b/assets/deepmd/espresso_to_lammps_structure.j2.py similarity index 100% rename from assets/deepmd/qe_to_lmp_structure.j2.py rename to assets/deepmd/espresso_to_lammps_structure.j2.py diff --git a/executables/deepmd/lmp.yml b/executables/deepmd/lmp.yml index c1f19948..cd6c5eab 100644 --- a/executables/deepmd/lmp.yml +++ b/executables/deepmd/lmp.yml @@ -3,7 +3,7 @@ monitors: - standard_output results: [] flavors: - lammps: + lammps_md: input: - name: in.lammps results: [] diff --git a/executables/deepmd/python.yml b/executables/deepmd/python.yml index 26f98568..5f0aef0b 100644 --- a/executables/deepmd/python.yml +++ b/executables/deepmd/python.yml @@ -3,20 +3,20 @@ monitors: - standard_output results: [] flavors: - qe_cp_to_deepmd: + espresso_cp_to_deepmd: isDefault: true input: - - name: qe_cp_to_deepmd.py + - name: espresso_cp_to_deepmd.py results: [] monitors: - standard_output applicationName: deepmd executableName: python - qe_to_lmp_structure: + espresso_to_lammps_structure: isDefault: false input: - - name: qe_to_lmp_structure.py + - name: espresso_to_lammps_structure.py results: [] monitors: - standard_output diff --git a/src/js/data/templates.js b/src/js/data/templates.js index 9a28d076..42151745 100644 --- a/src/js/data/templates.js +++ b/src/js/data/templates.js @@ -1,2 +1,2 @@ /* eslint-disable */ -module.exports = {allTemplates: [{"content":"import dpdata\n\n# load cp data files\n# https://docs.deepmodeling.com/projects/dpdata/en/master/formats/QECPTrajFormat.html\nsystem = dpdata.LabeledSystem(\"cp\", fmt=\"qe/cp/traj\")\n\n# convert dpdata to lammps format\n# below procedure will convert input QE structure to lammps format, user may\n# want to generate supercell or other complex structure for lammps calculation\nsystem.to_lmp(\"system.lmp\")\n","name":"qe_to_lmp_structure.py","contextProviders":[],"applicationName":"deepmd","executableName":"python"},{"content":"# LAMMPS input, deepmd-kit version. Built from bulk water calculation.\n\nunits metal\nboundary p p p\natom_style atomic\n\nneighbor 2.0 bin\nneigh_modify every 10 delay 0 check no\n\nread_data system.lmp\nmass 1 16\nmass 2 2\n\npair_style\t deepmd ./graph.pb\npair_coeff * *\n\nvelocity all create 330.0 23456789\n\nfix 1 all nvt temp 330.0 330.0 0.5\ntimestep 0.0005\nthermo_style custom step pe ke etotal temp press vol\nthermo 100\ndump 1 all custom 100 system.dump id type x y z\n\nrun 100\n","name":"in.lammps","contextProviders":[],"applicationName":"deepmd","executableName":"lmp"},{"content":"import dpdata\nimport numpy as np\n\n# https://docs.deepmodeling.com/projects/dpdata/en/master/formats/QECPTrajFormat.html\ndata = dpdata.LabeledSystem(\"cp\", fmt=\"qe/cp/traj\")\nprint(\"Dataset contains total {0} frames\".format(len(data)))\n\n# randomly choose 20% index for validation_data\nsize = len(data)\nsize_validation = round(size * 0.2)\n\nindex_validation = np.random.choice(size, size=size_validation, replace=False)\nindex_training = list(set(range(size)) - set(index_validation))\n\ndata_training = data.sub_system(index_training)\ndata_validation = data.sub_system(index_validation)\n\nprint(\"Using {0} frames as training set\".format(len(data_training)))\nprint(\"Using {0} frames as validation set\".format(len(data_validation)))\n\n# save training and validation sets\ndata_training.to_deepmd_npy(\"./training\")\ndata_validation.to_deepmd_npy(\"./validation\")\n","name":"qe_cp_to_deepmd.py","contextProviders":[],"applicationName":"deepmd","executableName":"python"},{"content":"{\n \"model\": {\n \"type_map\": [\n \"O\",\n \"H\"\n ],\n \"descriptor\": {\n \"type\": \"se_e2_r\",\n \"sel\": [\n 23,\n 46\n ],\n \"rcut_smth\": 0.50,\n \"rcut\": 5.00,\n \"neuron\": [\n 5,\n 5,\n 5\n ],\n \"resnet_dt\": false,\n \"seed\": 1\n },\n \"fitting_net\": {\n \"neuron\": [\n 60,\n 60,\n 60\n ],\n \"resnet_dt\": true,\n \"seed\": 1\n }\n },\n \"learning_rate\": {\n \"type\": \"exp\",\n \"decay_steps\": 1000,\n \"start_lr\": 0.005,\n \"stop_lr\": 3.51e-6\n },\n \"loss\": {\n \"start_pref_e\": 0.02,\n \"limit_pref_e\": 1,\n \"start_pref_f\": 1000,\n \"limit_pref_f\": 1,\n \"start_pref_v\": 0,\n \"limit_pref_v\": 0\n },\n \"training\": {\n \"training_data\": {\n \"systems\": [\n \"./training/\"\n ],\n \"batch_size\": \"auto\"\n },\n \"validation_data\": {\n \"systems\": [\n \"./validation/\"\n ],\n \"batch_size\": \"auto\"\n },\n \"numb_steps\": 301,\n \"seed\": 1,\n \"disp_file\": \"lcurve.out\",\n \"disp_freq\": 10,\n \"numb_test\": 1,\n \"save_freq\": 100\n }\n}\n","name":"dp_train_se_e2_r.json","contextProviders":[],"applicationName":"deepmd","executableName":"dp"},{"content":"1\npp.dat\n1.0\n3000\n3\n3.0000\n","name":"average.in","contextProviders":[],"applicationName":"espresso","executableName":"average.x"},{"content":"&BANDS\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n filband = {% raw %}'{{ JOB_WORK_DIR }}/bands.dat'{% endraw %}\n no_overlap = .true.\n/\n\n","name":"bands.in","contextProviders":[],"applicationName":"espresso","executableName":"bands.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'cp'\n restart_mode = '{{ input.RESTART_MODE }}'\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}'{% endraw %}\n prefix = 'cp'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n nstep = {{ dynamics.numberOfSteps }}\n iprint = 1\n dt = {{ dynamics.timeStep }}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n nr1b = 20\n nr2b = 20\n nr3b = 20\n/\n&ELECTRONS\n electron_dynamics = 'verlet'\n electron_velocities = 'zero'\n emass = {{ dynamics.electronMass }}\n orthogonalization = 'ortho'\n ortho_eps = 1d-11\n/\n&IONS\n ion_dynamics = 'verlet'\n ion_velocities = 'zero'\n tempw = {{ dynamics.temperature }}\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\n","name":"cp.in","contextProviders":[{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"IonDynamicsContextProvider"}],"applicationName":"espresso","executableName":"cp.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'cp-wf'\n restart_mode = '{{ input.RESTART_MODE }}'\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}'{% endraw %}\n prefix = 'cp_wf'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n nstep = {{ dynamics.numberOfSteps }}\n iprint = 1\n dt = {{ dynamics.timeStep }}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n! TODO: figure out the runtime error when `ecutrho` is set. \n! In the meantime, using Norm-conserving pseudopotentials works.\n! ecutrho = {{ cutoffs.density }}\n nr1b = 20\n nr2b = 20\n nr3b = 20\n/\n&ELECTRONS\n electron_dynamics = 'verlet'\n electron_velocities = 'zero'\n emass = {{ dynamics.electronMass }}\n/\n&IONS\n ion_dynamics = 'verlet'\n ion_velocities = 'zero'\n tempw = {{ dynamics.temperature }}\n/\n&CELL\n/\n&WANNIER\n nit = 60,\n calwf = 3,\n tolw = 1.D-6,\n nsteps = 50,\n adapt = .FALSE.\n wfdt = 2.0D0,\n wf_q = 500.D0,\n wf_friction = 0.3d0,\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\n","name":"cp_wf.in","contextProviders":[{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"IonDynamicsContextProvider"}],"applicationName":"espresso","executableName":"cp.x"},{"content":"&DOS\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n degauss = 0.01\n/\n\n","name":"dos.in","contextProviders":[],"applicationName":"espresso","executableName":"dos.x"},{"content":"&INPUT\n asr = 'simple'\n flfrc = 'force_constants.fc'\n flfrq = 'frequencies.freq'\n dos = .true.\n fldos = 'phonon_dos.out'\n deltaE = 1.d0\n {% for d in igrid.dimensions -%}\n nk{{loop.index}} = {{d}}\n {% endfor %}\n /\n","name":"dynmat_grid.in","contextProviders":[{"name":"IGridFormDataManager"}],"applicationName":"espresso","executableName":"dynmat.x"},{"content":"&inputpp\n calculation = \"eps\"\n prefix = \"__prefix__\"\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n/\n\n&energy_grid\n smeartype = \"gauss\"\n intersmear = 0.2\n intrasmear = 0.0\n wmin = 0.0\n wmax = 30.0\n nw = 500\n shift = 0.0\n/\n\n","name":"epsilon.in","contextProviders":[],"applicationName":"espresso","executableName":"epsilon.x"},{"content":"&gw_input\n\n ! see http://www.sternheimergw.org for more information.\n\n ! config of the scf run\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n\n ! the grid used for the linear response\n kpt_grid = {{ kgrid.dimensions|join(', ') }}\n qpt_grid = {{ qgrid.dimensions|join(', ') }}\n\n ! truncation (used for both correlation and exchange)\n truncation = '2d'\n\n ! number of bands for which the GW correction is calculated\n num_band = 8\n\n ! configuration of the Coulomb solver\n thres_coul = 1.0d-2\n\n ! configuration of W in the convolution\n model_coul = 'godby-needs'\n max_freq_coul = 120\n num_freq_coul = 35\n\n ! configuration of the Green solver\n thres_green = 1.0d-3\n max_iter_green = 300\n\n ! configuration for the correlation self energy\n ecut_corr = 5.0\n max_freq_corr = 100.0\n num_freq_corr = 11\n\n ! configuration for the exchange self energy\n ecut_exch = 20.0\n\n ! configuration for the output\n eta = 0.1\n min_freq_wind = -30.0\n max_freq_wind = 30.0\n num_freq_wind = 601\n/\n\n&gw_output\n/\n\nFREQUENCIES\n2\n 0.0 0.0\n 0.0 10.0\n/\n\nK_points\n{{ explicitKPath2PIBA.length }}\n{% for point in explicitKPath2PIBA -%}\n{% for coordinate in point.coordinates %}{{ coordinate }}{% endfor %}\n{% endfor %}\n/\n\n","name":"gw_bands_plasmon_pole.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QGridFormDataManager"},{"name":"ExplicitKPath2PIBAFormDataManager"}],"applicationName":"espresso","executableName":"gw.x"},{"content":"&gw_input\n\n ! see http://www.sternheimergw.org for more information.\n\n ! config of the scf run\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n\n ! the grid used for the linear response\n kpt_grid = {{ kgrid.dimensions|join(', ') }}\n qpt_grid = {{ qgrid.dimensions|join(', ') }}\n\n ! number of bands for which the GW correction is calculated\n num_band = 8\n\n ! configuration of W in the convolution\n max_freq_coul = 200\n num_freq_coul = 51\n\n ! configuration for the correlation self energy\n ecut_corr = 6.0\n\n ! configuration for the exchange self energy\n ecut_exch = 15.0\n/\n\n&gw_output\n/\n\nFREQUENCIES\n35\n 0.0 0.0\n 0.0 0.3\n 0.0 0.9\n 0.0 1.8\n 0.0 3.0\n 0.0 4.5\n 0.0 6.3\n 0.0 8.4\n 0.0 10.8\n 0.0 13.5\n 0.0 16.5\n 0.0 19.8\n 0.0 23.4\n 0.0 27.3\n 0.0 31.5\n 0.0 36.0\n 0.0 40.8\n 0.0 45.9\n 0.0 51.3\n 0.0 57.0\n 0.0 63.0\n 0.0 69.3\n 0.0 75.9\n 0.0 82.8\n 0.0 90.0\n 0.0 97.5\n 0.0 105.3\n 0.0 113.4\n 0.0 121.8\n 0.0 130.5\n 0.0 139.5\n 0.0 148.8\n 0.0 158.4\n 0.0 168.3\n 0.0 178.5\n/\n\nK_points\n{{ explicitKPath2PIBA.length }}\n{% for point in explicitKPath2PIBA -%}\n{% for coordinate in point.coordinates %}{{ coordinate }}{% endfor %}\n{% endfor %}\n/\n\n","name":"gw_bands_full_frequency.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QGridFormDataManager"},{"name":"ExplicitKPath2PIBAFormDataManager"}],"applicationName":"espresso","executableName":"gw.x"},{"content":"&inputhp\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n {%- for d in qgrid.dimensions %}\n nq{{ loop.index }} = {{ d }}\n {%- endfor %}\n/\n","name":"hp.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"hp.x"},{"content":"&INPUT\n asr = 'simple'\n flfrc = 'force_constants.fc'\n flfrq = 'frequencies.freq'\n dos = .true.\n fldos = 'phonon_dos.out'\n deltaE = 1.d0\n {% for d in igrid.dimensions -%}\n nk{{loop.index}} = {{d}}\n {% endfor %}\n /\n","name":"matdyn_grid.in","contextProviders":[{"name":"IGridFormDataManager"}],"applicationName":"espresso","executableName":"matdyn.x"},{"content":"&INPUT\n asr = 'simple'\n flfrc ='force_constants.fc'\n flfrq ='frequencies.freq'\n flvec ='normal_modes.out'\n q_in_band_form = .true.\n /\n{{ipath.length}}\n{% for point in ipath -%}\n{% for d in point.coordinates %}{{d}} {% endfor -%}{{point.steps}}\n{% endfor %}\n","name":"matdyn_path.in","contextProviders":[{"name":"IPathFormDataManager"}],"applicationName":"espresso","executableName":"matdyn.x"},{"content":"BEGIN\nBEGIN_PATH_INPUT\n&PATH\n restart_mode = 'from_scratch'\n string_method = 'neb',\n nstep_path = 50,\n ds = 2.D0,\n opt_scheme = \"broyden\",\n num_of_images = {{ 2 + (input.INTERMEDIATE_IMAGES.length || neb.nImages) }},\n k_max = 0.3D0,\n k_min = 0.2D0,\n CI_scheme = \"auto\",\n path_thr = 0.1D0,\n/\nEND_PATH_INPUT\nBEGIN_ENGINE_INPUT\n&CONTROL\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.03\n nspin = 2\n starting_magnetization = 0.5\n/\n&ELECTRONS\n conv_thr = 1.D-8\n mixing_beta = 0.3\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nBEGIN_POSITIONS\nFIRST_IMAGE\nATOMIC_POSITIONS crystal\n{{ input.FIRST_IMAGE }}\n{%- for IMAGE in input.INTERMEDIATE_IMAGES %}\nINTERMEDIATE_IMAGE\nATOMIC_POSITIONS crystal\n{{ IMAGE }}\n{%- endfor %}\nLAST_IMAGE\nATOMIC_POSITIONS crystal\n{{ input.LAST_IMAGE }}\nEND_POSITIONS\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nEND_ENGINE_INPUT\nEND\n","name":"neb.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"NEBFormDataManager"},{"name":"QENEBInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"neb.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-12\n asr = .true.\n search_sym = .false.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n fildyn = 'dyn'\n ldisp = .true.\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_grid.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-12\n asr = .true.\n search_sym = .false.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n/\n{% for point in qpath -%}\n{% for d in point.coordinates %}{{d}} {% endfor %}\n{% endfor %}\n","name":"ph_path.in","contextProviders":[{"name":"QPathFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-12\n asr = .true.\n search_sym = .false.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n/\n0 0 0\n","name":"ph_gamma.in","contextProviders":[],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-18,\n recover = .false.\n start_irr = 0\n last_irr = 0\n ldisp = .true.\n fildyn = 'dyn0'\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_init_qpoints.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-18,\n recover = .true.\n ldisp = .true.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n fildyn = 'dyn'\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_grid_restart.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-18\n ldisp = .true.\n {% raw -%}\n start_q = {{MAP_DATA.qpoint}}\n last_q = {{MAP_DATA.qpoint}}\n start_irr = {{MAP_DATA.irr}}\n last_irr= {{MAP_DATA.irr}}\n {%- endraw %}\n recover = .true.\n fildyn = 'dyn'\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_SCRATCH_DIR }}/outdir'{% endraw %}\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_single_irr_qpt.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPP\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n filplot = 'pp.dat'\n plot_num = 0\n/\n&PLOT\n iflag = 3\n output_format = 5\n fileout ='density.xsf'\n/\n\n","name":"pp_density.in","contextProviders":[],"applicationName":"espresso","executableName":"pp.x"},{"content":"&INPUTPP\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n filplot = 'pp.dat'\n plot_num = 11\n/\n","name":"pp_electrostatic_potential.in","contextProviders":[],"applicationName":"espresso","executableName":"pp.x"},{"content":"&PROJWFC\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n degauss = 0.01\n deltaE = 0.05\n/\n","name":"projwfc.in","contextProviders":[],"applicationName":"espresso","executableName":"projwfc.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_scf.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n input_dft = 'hse',\n {% for d in qgrid.dimensions -%}\n nqx{{loop.index}} = {{d}}\n {% endfor %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS crystal\n{{ '{{' }} {{ explicitKPath.length }} {% raw %} + KPOINTS|length }} {% endraw %}\n{%- raw %}\n{% for point in KPOINTS -%}\n {% for d in point.coordinates %}{{ \"%14.9f\"|format(d) }} {% endfor -%}{{ point.weight }}\n{% endfor %}\n{% endraw -%}\n{% for point in explicitKPath -%}\n{% for d in point.coordinates %}{{d}} {% endfor -%}0.0000001\n{% endfor %}\n","name":"pw_scf_bands_hse.in","contextProviders":[{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"QGridFormDataManager"},{"name":"ExplicitKPathFormDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n assume_isolated = 'esm'\n esm_bc = '{{ boundaryConditions.type }}'\n fcp_mu = {{ boundaryConditions.targetFermiEnergy }}\n esm_w = {{ boundaryConditions.offset }}\n esm_efield = {{ boundaryConditions.electricField }}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_esm.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"BoundaryConditionsFormDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'relax'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n assume_isolated = 'esm'\n esm_bc = '{{ boundaryConditions.type }}'\n fcp_mu = {{ boundaryConditions.targetFermiEnergy }}\n esm_w = {{ boundaryConditions.offset }}\n esm_efield = {{ boundaryConditions.electricField }}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_esm_relax.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"BoundaryConditionsFormDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% raw %}{{PARAMETER | default('1')}} {{PARAMETER | default('1')}} {{PARAMETER | default('1')}} 0 0 0{% endraw %}\n","name":"pw_scf_kpt_conv.in","contextProviders":[{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'nscf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{input.RESTART_MODE}}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n{%- if subworkflowContext.NO_SYMMETRY_NO_INVERSION %}\n nosym = .true.\n noinv = .true.\n{%- endif %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_nscf.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'relax'\n nstep = 50\n title = ''\n verbosity = 'low'\n restart_mode = 'from_scratch'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_relax.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'vc-relax'\n title = ''\n verbosity = 'low'\n restart_mode = 'from_scratch'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_vc_relax.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'bands'\n title = ''\n verbosity = 'low'\n restart_mode = '{{input.RESTART_MODE}}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS crystal_b\n{{kpath.length}}\n{% for point in kpath -%}\n{% for d in point.coordinates %}{{d}} {% endfor -%}{{point.steps}}\n{% endfor %}\n","name":"pw_bands.in","contextProviders":[{"name":"KPathFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n ecutfock = 100\n occupations = 'smearing'\n degauss = 0.005\n input_dft='hse',\n nqx1 = {% if kgrid.dimensions[0]%2 == 0 %}{{kgrid.dimensions[0]/2}}{% else %}{{(kgrid.dimensions[0]+1)/2}}{% endif %}, nqx2 = {% if kgrid.dimensions[1]%2 == 0 %}{{kgrid.dimensions[1]/2}}{% else %}{{(kgrid.dimensions[1]+1)/2}}{% endif %}, nqx3 = {% if kgrid.dimensions[2]%2 == 0 %}{{kgrid.dimensions[2]/2}}{% else %}{{(kgrid.dimensions[2]+1)/2}}{% endif %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{% if d%2 == 0 %}{{d}} {% else %}{{d+1}} {% endif %}{% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_scf_hse.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nHUBBARD {ortho-atomic}\n{% for row in hubbard_u -%}\nU {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.hubbardUValue }}\n{% endfor -%}\n","name":"pw_scf_dft_u.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardUContextManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nHUBBARD {ortho-atomic}\n{% for row in hubbard_u -%}\nU {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.hubbardUValue }}\n{% endfor -%}\n{% for row in hubbard_v -%}\nV {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.atomicSpecies2 }}-{{ row.atomicOrbital2 }} {{ row.siteIndex }} {{ row.siteIndex2 }} {{ row.hubbardVValue }}\n{% endfor -%}\n","name":"pw_scf_dft_v.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardUContextManager"},{"name":"HubbardVContextManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nHUBBARD {ortho-atomic}\n{% for row in hubbard_j -%}\n{{ row.paramType }} {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.value }}\n{% endfor -%}\n","name":"pw_scf_dft_j.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardJContextManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n lda_plus_u = .true.\n lda_plus_u_kind = 0\n U_projection_type = 'ortho-atomic'\n {%- for row in hubbard_legacy %}\n Hubbard_U({{ row.atomicSpeciesIndex }}) = {{ row.hubbardUValue }}\n {%- endfor %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_scf_dft_u_legacy.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardContextManagerLegacy"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&INPUT\n fildyn = 'dyn'\n zasr = 'simple'\n flfrc = 'force_constants.fc'\n/\n","name":"q2r.in","contextProviders":[],"applicationName":"espresso","executableName":"q2r.x"},{"content":"# ------------------------------------------------------------------------------- #\n# #\n# Example JupyterLab requirements #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. A Python virtual environment is created #\n# - in /scratch/$USER/$JOB_ID (for build: 'Default') #\n# - in /export/share/python/ (for build: 'with-pre-installed-packages') #\n# 3. This list is used to populate a Python virtual environment #\n# 4. JupyterLab is started #\n# #\n# For more information visit: #\n# - https://jupyterlab.readthedocs.io/en/stable/index.html #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# Note: With the JupyterLab build 'with-pre-installed-packages', packages #\n# cannot be added during the notebook runtime. #\n# #\n# ------------------------------------------------------------------------------- #\n\njupyterlab==3.0.3\nnotebook>=6.2.0\nexabyte-api-client>=2020.10.19\nnumpy>=1.17.3\npandas>=1.1.4\nurllib3<2\n","name":"requirements.txt","contextProviders":[],"applicationName":"jupyterLab","executableName":"jupyter"},{"content":" start nwchem\n title \"Test\"\n charge {{ input.CHARGE }}\n geometry units au noautosym\n {{ input.ATOMIC_POSITIONS }}\n end\n basis\n * library {{ input.BASIS }}\n end\n dft\n xc {{ input.FUNCTIONAL }}\n mult {{ input.MULT }}\n end\n task dft energy\n","name":"nwchem_total_energy.inp","contextProviders":[{"name":"NWChemInputDataManager"}],"applicationName":"nwchem","executableName":"nwchem"},{"content":"# ---------------------------------------------------------------- #\n# #\n# Example python script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. runtime directory for this calculation is created #\n# 2. requirements.txt is used to create a virtual environment #\n# 3. virtual environment is activated #\n# 4. python process running this script is started #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\nimport pymatgen as mg\n\nsi = mg.Element(\"Si\")\n\nprint(si.atomic_mass)\n","name":"hello_world.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2\n","name":"requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------------ #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# Please add any packages required for this unit below following #\n# the requirements.txt specification: #\n# https://pip.pypa.io/en/stable/reference/requirements-file-format/ #\n# ------------------------------------------------------------------ #\n","name":"requirements_empty.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# -------------------------------------------------------------------------------\n# This script contains a few helpful commands for basic plotting with matplotlib.\n# The commented out blocks are optional suggestions and included for convenience.\n# -------------------------------------------------------------------------------\nimport matplotlib.pyplot as plt\nimport matplotlib.ticker as ticker\nimport numpy as np\n\n\n# Plot Settings\n# -------------\nfigure_size = (6.4, 4.8) # width, height [inches]\ndpi = 100 # resolution [dots-per-inch]\nfont_size_title = 16 # font size of title\nfont_size_axis = 12 # font size of axis label\nfont_size_tick = 12 # font size of tick label\nfont_size_legend = 14 # font size of legend\nx_axis_label = None # label for x-axis\ny_axis_label = None # label for y-axis\ntitle = None # figure title\nshow_legend = False # whether to show legend\nsave_name = \"plot.pdf\" # output filename (with suffix), e.g. 'plot.pdf'\nx_view_limits = {\"left\": None, \"right\": None} # view limits for x-axis\ny_view_limits = {\"top\": None, \"bottom\": None} # view limits for y-axis\nx_tick_spacing = None # custom tick spacing for x-axis (optional)\ny_tick_spacing = None # custom tick spacing for y-axis (optional)\nx_tick_labels = None # custom tick labels for x-axis (optional)\ny_tick_labels = None # custom tick labels for y-axis (optional)\n\n\n# Figure & axes objects\n# ---------------------\nfig = plt.figure(figsize=figure_size, dpi=dpi)\nax = fig.add_subplot(111)\n\n# Example plot (REPLACE ACCORDINGLY)\n# ------------\nx = np.linspace(0, 7, num=100)\ny = np.sin(x)\nax.plot(x, y, \"g-\", zorder=3)\n\n\n# Help lines\n# ----------\n# ax.axhline(y=0, color=\"0.25\", linewidth=0.6, zorder=1)\n# ax.axvline(x=0, color=\"0.25\", linewidth=0.6, zorder=1)\n\n\n# View limits\n# -----------\nax.set_xlim(**x_view_limits)\nax.set_ylim(**y_view_limits)\n\n\n# Grid lines\n# ----------\n# grid_style = {\n# \"linestyle\" : \"dotted\",\n# \"linewidth\" : 0.6,\n# \"color\" : \"0.25\",\n# }\n# ax.grid(**grid_style)\n\n# Custom tick spacing\n# -------------------\n# ax.xaxis.set_major_locator(ticker.MultipleLocator(x_tick_spacing))\n# ax.yaxis.set_major_locator(ticker.MultipleLocator(y_tick_spacing))\n\n# Custom tick labels\n# ------------------\nif x_tick_labels is not None:\n ax.set_xticklabels(x_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\nif y_tick_labels is not None:\n ax.set_yticklabels(y_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\n\n# Other tick settings\n# -------------------\n# ax.tick_params(axis=\"both\", which=\"major\", labelsize=font_size_tick, direction=\"in\")\n# ax.tick_params(axis=\"x\", which=\"major\", pad=10)\n# ax.tick_params(axis=\"x\", which=\"minor\", bottom=False, top=False)\n\n\n# Axis labels\n# -----------\nif x_axis_label is not None:\n ax.set_xlabel(x_axis_label, size=font_size_axis)\nif y_axis_label is not None:\n ax.set_ylabel(y_axis_label, size=font_size_axis)\n\n# Figure title\n# ------------\nif title is not None:\n ax.set_title(title, fontsize=font_size_title)\n\n# Legend\n# ------\nif show_legend:\n ax.legend(prop={'size': font_size_legend})\n\n# Save figure\n# -----------\nif save_name is not None:\n save_format = save_name.split(\".\")[-1]\n fig.savefig(save_name, format=save_format, bbox_inches=\"tight\")\n","name":"matplotlib_basic.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ---------------------------------------------------------- #\n# #\n# This script extracts q-points and irreducible #\n# representations from Quantum ESPRESSO xml data. #\n# #\n# Expects control_ph.xml and patterns.?.xml files to exist #\n# #\n# ---------------------------------------------------------- #\nfrom __future__ import print_function\nimport json\nfrom xml.dom import minidom\n\n{# JOB_WORK_DIR will be initialized at runtime => avoid substituion below #}\n{%- raw -%}\nCONTROL_PH_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/control_ph.xml\"\nPATTERNS_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/patterns.{}.xml\"\n{%- endraw -%}\n\n# get integer content of an xml tag in a document\ndef get_int_by_tag_name(doc, tag_name):\n element = doc.getElementsByTagName(tag_name)\n return int(element[0].firstChild.nodeValue)\n\nvalues = []\n\n# get number of q-points and cycle through them\nxmldoc = minidom.parse(CONTROL_PH_FILENAME)\nnumber_of_qpoints = get_int_by_tag_name(xmldoc, \"NUMBER_OF_Q_POINTS\")\n\nfor i in range(number_of_qpoints):\n # get number of irreducible representations per qpoint\n xmldoc = minidom.parse(PATTERNS_FILENAME.format(i+1))\n number_of_irr_per_qpoint = get_int_by_tag_name(xmldoc, \"NUMBER_IRR_REP\")\n # add each distinct combination of qpoint and irr as a separate entry\n for j in range(number_of_irr_per_qpoint):\n values.append({\n \"qpoint\": i + 1,\n \"irr\": j + 1\n })\n\n# store final values in standard output (STDOUT)\nprint(json.dumps(values, indent=4))\n","name":"espresso_xml_get_qpt_irr.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"import re\nimport json\n\ndouble_regex = r'[-+]?\\d*\\.\\d+(?:[eE][-+]?\\d+)?'\nregex = r\"\\s+k\\(\\s+\\d*\\)\\s+=\\s+\\(\\s+({0})\\s+({0})\\s+({0})\\),\\s+wk\\s+=\\s+({0}).+?\\n\".format(double_regex)\n\nwith open(\"pw_scf.out\") as f:\n text = f.read()\n\npattern = re.compile(regex, re.I | re.MULTILINE)\nmatch = pattern.findall(text[text.rfind(\" cryst. coord.\"):])\nkpoints = [{\"coordinates\": list(map(float, m[:3])), \"weight\": float(m[3])} for m in match]\nprint(json.dumps({\"name\": \"KPOINTS\", \"value\": kpoints, \"scope\": \"global\"}, indent=4))\n","name":"espresso_extract_kpoints.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------- #\n# This script aims to determine extrema for a given array. #\n# Please adjust the parameters according to your data. #\n# Note: This template expects the array to be defined in the #\n# context as 'array_from_context' (see details below). #\n# ----------------------------------------------------------- #\nimport numpy as np\nfrom scipy.signal import find_peaks\nimport json\nfrom munch import Munch\n\n# Data From Context\n# -----------------\n# The array 'array_from_context' is a 1D list (float or int) that has to be defined in\n# a preceding assignment unit in order to be extracted from the context.\n# Example: [0.0, 1.0, 4.0, 3.0]\n# Upon rendering the following Jinja template the extracted array will be inserted.\n{% raw %}Y = np.array({{array_from_context}}){% endraw %}\n\n# Settings\n# --------\nprominence = 0.3 # required prominence in the unit of the data array\n\n# Find Extrema\n# ------------\nmax_indices, _ = find_peaks(Y, prominence=prominence)\nmin_indices, _ = find_peaks(-1 * Y, prominence=prominence)\n\nresult = {\n \"maxima\": Y[max_indices].tolist(),\n \"minima\": Y[min_indices].tolist(),\n}\n\n# print final values to standard output (STDOUT),\n# so that they can be read by a subsequent assignment unit (using value=STDOUT)\nprint(json.dumps(result, indent=4))\n","name":"find_extrema.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# ----------------------------------------------------------------- #\n\n\nmunch==2.5.0\nnumpy>=1.19.5\nscipy>=1.5.4\nmatplotlib>=3.0.0\n","name":"processing_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# PythonML Package Requirements for use on the Exabyte.io Platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscikit-learn==0.24.1\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2;python_version>=\"3.6\"\n","name":"pyml_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# General settings for PythonML jobs on the Exabyte.io Platform #\n# #\n# This file generally shouldn't be modified directly by users. #\n# The \"datafile\" and \"is_workflow_running_to_predict\" variables #\n# are defined in the head subworkflow, and are templated into #\n# this file. This helps facilitate the workflow's behavior #\n# differing whether it is in a \"train\" or \"predict\" mode. #\n# #\n# Also in this file is the \"Context\" object, which helps maintain #\n# certain Python objects between workflow units, and between #\n# predict runs. #\n# #\n# Whenever a python object needs to be stored for subsequent runs #\n# (such as in the case of a trained model), context.save() can be #\n# called to save it. The object can then be loaded again by using #\n# context.load(). #\n# ----------------------------------------------------------------- #\n\n\nimport pickle, os\n\n# ==================================================\n# Variables modified in the Important Settings menu\n# ==================================================\n# Variables in this section can (and oftentimes need to) be modified by the user in the \"Important Settings\" tab\n# of a workflow.\n\n# Target_column_name is used during training to identify the variable the model is traing to predict.\n# For example, consider a CSV containing three columns, \"Y\", \"X1\", and \"X2\". If the goal is to train a model\n# that will predict the value of \"Y,\" then target_column_name would be set to \"Y\"\ntarget_column_name = \"{{ mlSettings.target_column_name }}\"\n\n# The type of ML problem being performed. Can be either \"regression\", \"classification,\" or \"clustering.\"\nproblem_category = \"{{ mlSettings.problem_category }}\"\n\n# =============================\n# Non user-modifiable variables\n# =============================\n# Variables in this section generally do not need to be modified.\n\n# The problem category, regression or classification or clustering. In regression, the target (predicted) variable\n# is continues. In classification, it is categorical. In clustering, there is no target - a set of labels is\n# automatically generated.\nis_regression = is_classification = is_clustering = False\nif problem_category.lower() == \"regression\":\n is_regression = True\nelif problem_category.lower() == \"classification\":\n is_classification = True\nelif problem_category.lower() == \"clustering\":\n is_clustering = True\nelse:\n raise ValueError(\n \"Variable 'problem_category' must be either 'regression', 'classification', or 'clustering'. Check settings.py\")\n\n# The variables \"is_workflow_running_to_predict\" and \"is_workflow_running_to_train\" are used to control whether\n# the workflow is in a \"training\" mode or a \"prediction\" mode. The \"IS_WORKFLOW_RUNNING_TO_PREDICT\" variable is set by\n# an assignment unit in the \"Set Up the Job\" subworkflow that executes at the start of the job. It is automatically\n# changed when the predict workflow is generated, so users should not need to modify this variable.\nis_workflow_running_to_predict = {% raw %}{{IS_WORKFLOW_RUNNING_TO_PREDICT}}{% endraw %}\nis_workflow_running_to_train = not is_workflow_running_to_predict\n\n# Sets the datafile variable. The \"datafile\" is the data that will be read in, and will be used by subsequent\n# workflow units for either training or prediction, depending on the workflow mode.\nif is_workflow_running_to_predict:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\nelse:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\n\n# The \"Context\" class allows for data to be saved and loaded between units, and between train and predict runs.\n# Variables which have been saved using the \"Save\" method are written to disk, and the predict workflow is automatically\n# configured to obtain these files when it starts.\n#\n# IMPORTANT NOTE: Do *not* adjust the value of \"context_dir_pathname\" in the Context object. If the value is changed, then\n# files will not be correctly copied into the generated predict workflow. This will cause the predict workflow to be\n# generated in a broken state, and it will not be able to make any predictions.\nclass Context(object):\n \"\"\"\n Saves and loads objects from the disk, useful for preserving data between workflow units\n\n Attributes:\n context_paths (dict): Dictionary of the format {variable_name: path}, that governs where\n pickle saves files.\n\n Methods:\n save: Used to save objects to the context directory\n load: Used to load objects from the context directory\n \"\"\"\n\n def __init__(self, context_file_basename=\"workflow_context_file_mapping\"):\n \"\"\"\n Constructor for Context objects\n\n Args:\n context_file_basename (str): Name of the file to store context paths in\n \"\"\"\n\n # Warning: DO NOT modify the context_dir_pathname variable below\n # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n context_dir_pathname = \"{% raw %}{{ CONTEXT_DIR_RELATIVE_PATH }}{% endraw %}\"\n # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n self._context_dir_pathname = context_dir_pathname\n self._context_file = os.path.join(context_dir_pathname, context_file_basename)\n\n # Make context dir if it does not exist\n if not os.path.exists(context_dir_pathname):\n os.makedirs(context_dir_pathname)\n\n # Read in the context sources dictionary, if it exists\n if os.path.exists(self._context_file):\n with open(self._context_file, \"rb\") as file_handle:\n self.context_paths: dict = pickle.load(file_handle)\n else:\n # Items is a dictionary of {varname: path}\n self.context_paths = {}\n\n def __enter__(self):\n return self\n\n def __exit__(self, exc_type, exc_value, traceback):\n self._update_context()\n\n def __contains__(self, item):\n return item in self.context_paths\n\n def _update_context(self):\n with open(self._context_file, \"wb\") as file_handle:\n pickle.dump(self.context_paths, file_handle)\n\n def load(self, name: str):\n \"\"\"\n Returns a contextd object\n\n Args:\n name (str): The name in self.context_paths of the object\n \"\"\"\n path = self.context_paths[name]\n with open(path, \"rb\") as file_handle:\n obj = pickle.load(file_handle)\n return obj\n\n def save(self, obj: object, name: str):\n \"\"\"\n Saves an object to disk using pickle\n\n Args:\n name (str): Friendly name for the object, used for lookup in load() method\n obj (object): Object to store on disk\n \"\"\"\n path = os.path.join(self._context_dir_pathname, f\"{name}.pkl\")\n self.context_paths[name] = path\n with open(path, \"wb\") as file_handle:\n pickle.dump(obj, file_handle)\n self._update_context()\n\n# Generate a context object, so that the \"with settings.context\" can be used by other units in this workflow.\ncontext = Context()\n\nis_using_train_test_split = \"is_using_train_test_split\" in context and (context.load(\"is_using_train_test_split\"))\n\n# Create a Class for a DummyScaler()\nclass DummyScaler:\n \"\"\"\n This class is a 'DummyScaler' which trivially acts on data by returning it unchanged.\n \"\"\"\n\n def fit(self, X):\n return self\n\n def transform(self, X):\n return X\n\n def fit_transform(self, X):\n return X\n\n def inverse_transform(self, X):\n return X\n\nif 'target_scaler' not in context:\n context.save(DummyScaler(), 'target_scaler')\n","name":"pyml_settings.py","contextProviders":[{"name":"MLSettingsDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Custom workflow unit template for the Exabyte.io platform #\n# #\n# This file imports a set of workflow-specific context variables #\n# from settings.py. It then uses a context manager to save and #\n# load Python objects. When saved, these objects can then be #\n# loaded either later in the same workflow, or by subsequent #\n# predict jobs. #\n# #\n# Any pickle-able Python object can be saved using #\n# settings.context. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport settings\n\n# The context manager exists to facilitate\n# saving and loading objects across Python units within a workflow.\n\n# To load an object, simply do to \\`context.load(\"name-of-the-saved-object\")\\`\n# To save an object, simply do \\`context.save(\"name-for-the-object\", object_here)\\`\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Do some transformations to the data here\n\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n descriptors = context.load(\"descriptors\")\n\n # Do some predictions or transformation to the data here\n","name":"pyml_custom.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to read in data for the ML workflow. #\n# #\n# Also showcased here is the concept of branching based on #\n# whether the workflow is in \"train\" or \"predict\" mode. #\n# #\n# If the workflow is in \"training\" mode, it will read in the data #\n# before converting it to a Numpy array and save it for use #\n# later. During training, we already have values for the output, #\n# and this gets saved to \"target.\" #\n# #\n# Finally, whether the workflow is in training or predict mode, #\n# it will always read in a set of descriptors from a datafile #\n# defined in settings.py #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport sklearn.preprocessing\nimport settings\n\nwith settings.context as context:\n data = pandas.read_csv(settings.datafile)\n\n # Train\n # By default, we don't do train/test splitting: the train and test represent the same dataset at first.\n # Other units (such as a train/test splitter) down the line can adjust this as-needed.\n if settings.is_workflow_running_to_train:\n\n # Handle the case where we are clustering\n if settings.is_clustering:\n target = data.to_numpy()[:, 0] # Just get the first column, it's not going to get used anyway\n else:\n target = data.pop(settings.target_column_name).to_numpy()\n\n # Handle the case where we are classifying. In this case, we must convert any labels provided to be categorical.\n # Specifically, labels are encoded with values between 0 and (N_Classes - 1)\n if settings.is_classification:\n label_encoder = sklearn.preprocessing.LabelEncoder()\n target = label_encoder.fit_transform(target)\n context.save(label_encoder, \"label_encoder\")\n\n target = target.reshape(-1, 1) # Reshape array from a row vector into a column vector\n\n context.save(target, \"train_target\")\n context.save(target, \"test_target\")\n\n descriptors = data.to_numpy()\n\n context.save(descriptors, \"train_descriptors\")\n context.save(descriptors, \"test_descriptors\")\n\n else:\n descriptors = data.to_numpy()\n context.save(descriptors, \"descriptors\")\n","name":"data_input_read_csv_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to perform a train/test split #\n# #\n# Splits the dataset into a training and testing set. The #\n# variable `percent_held_as_test` controls how much of the #\n# input dataset is removed for use as a testing set. By default, #\n# this unit puts 20% of the dataset into the testing set, and #\n# places the remaining 80% into the training set. #\n# #\n# Does nothing in the case of predictions. #\n# #\n# ----------------------------------------------------------------- #\n\nimport sklearn.model_selection\nimport numpy as np\nimport settings\n\n# `percent_held_as_test` is the amount of the dataset held out as the testing set. If it is set to 0.2,\n# then 20% of the dataset is held out as a testing set. The remaining 80% is the training set.\npercent_held_as_test = {{ mlTrainTestSplit.fraction_held_as_test_set }}\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Load training data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n\n # Combine datasets to facilitate train/test split\n\n # Do train/test split\n train_descriptors, test_descriptors, train_target, test_target = sklearn.model_selection.train_test_split(\n train_descriptors, train_target, test_size=percent_held_as_test)\n\n # Set the flag for using a train/test split\n context.save(True, \"is_using_train_test_split\")\n\n # Save training data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"data_input_train_test_split_sklearn.py","contextProviders":[{"name":"MLTrainTestSplitDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn MinMax Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it is on interval #\n# [0,1]. It then saves the data for use further down #\n# the road in the workflow, for use in un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the min and max of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor MinMax Scaler\n scaler = sklearn.preprocessing.MinMaxScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_min_max_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Duplicates workflow unit #\n# #\n# This workflow unit drops all duplicate rows, if it is running #\n# in the \"train\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop duplicates from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n df = df.drop_duplicates()\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop duplicates from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.drop_duplicates()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_duplicates_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Missing Workflow Unit #\n# #\n# This workflow unit allows missing rows and/or columns to be #\n# dropped from the dataset by configuring the `to_drop` #\n# parameter. #\n# #\n# Valid values for `to_drop`: #\n# - \"rows\": rows with missing values will be removed #\n# - \"columns\": columns with missing values will be removed #\n# - \"both\": rows and columns with missing values will be removed #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\n# `to_drop` can either be \"rows\" or \"columns\"\n# If it is set to \"rows\" (by default), then all rows with missing values will be dropped.\n# If it is set to \"columns\", then all columns with missing values will be dropped.\n# If it is set to \"both\", then all rows and columns with missing values will be dropped.\nto_drop = \"rows\"\n\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop missing from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n\n directions = {\n \"rows\": (\"index\",),\n \"columns\": (\"columns\",),\n \"both\": (\"index\", \"columns\"),\n }[to_drop]\n for direction in directions:\n df = df.dropna(direction)\n\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop missing from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.dropna()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_missing_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn Standard Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it a mean of 0 and #\n# a standard deviation of 1. It then saves the data for use #\n# further down the road in the workflow, for use in #\n# un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the mean and variance of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor Scaler\n scaler = sklearn.preprocessing.StandardScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_standardization_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge-regression model in Scikit-Learn. #\n# Alpha is taken from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.AdaBoostRegressor(\n n_estimators=50,\n learning_rate=1,\n loss=\"linear\",\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"adaboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"adaboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_adaboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a bagged trees regression model with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.BaggingRegressor(\n n_estimators=10,\n max_samples=1.0,\n max_features=1.0,\n bootstrap=True,\n bootstrap_features=False,\n oob_score=False,\n verbose=0,\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"bagged_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"bagged_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_bagged_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for gradient-boosted tree regression with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. Note: In the gradient- #\n# boosted trees ensemble used, the weak learners used as #\n# estimators cannot be tuned with the same level of fidelity #\n# allowed in the adaptive-boosted trees ensemble. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.GradientBoostingRegressor(\n loss=\"ls\",\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion=\"friedman_mse\",\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n max_features=None,\n alpha=0.9,\n verbose=0,\n max_leaf_nodes=None,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees regression #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBRegressor(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='reg:squarederror',\n eval_metric='rmse')\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_regression\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_regression\")\n\n # Make some predictions and unscale\n predictions = model.predict(descriptors)\n predictions = predictions.reshape(-1, 1)\n target_scaler = context.load(\"target_scaler\")\n\n predictions = target_scaler.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\")\n","name":"model_extreme_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for k-means clustering. #\n# #\n# In k-means clustering, the labels are not provided ahead of #\n# time. Instead, one supplies the number of groups the #\n# algorithm should split the dataset into. Here, we set our #\n# own default of 4 groups (fewer than sklearn's default of 8). #\n# Otherwise, the default parameters of the clustering method #\n# are the same as in sklearn. #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.cluster\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Initialize the Model\n model = sklearn.cluster.KMeans(\n n_clusters=4,\n init=\"k-means++\",\n n_init=10,\n max_iter=300,\n tol=0.0001,\n copy_x=True,\n algorithm=\"auto\",\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors)\n context.save(model, \"k_means\")\n train_labels = model.predict(train_descriptors)\n test_labels = model.predict(test_descriptors)\n\n context.save(train_labels, \"train_labels\")\n context.save(test_labels, \"test_labels\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"k_means\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_k_means_clustering_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a kernelized ridge-regression model with #\n# Scikit-Learn. Model parameters are derived from Scikit- #\n# Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.kernel_ridge\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.kernel_ridge.KernelRidge(\n alpha=1.0,\n kernel=\"linear\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"kernel_ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"kernel_ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_kernel_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a LASSO-regression model with Scikit- #\n# Learn. Model parameters derived from Scikit-Learn's #\n# Defaults. Alpha has been lowered from the default of 1.0, to #\n# 0.1. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Lasso(\n alpha=0.1,\n fit_intercept=True,\n normalize=False,\n precompute=False,\n tol=0.0001,\n positive=True,\n selection=\"cyclic\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"LASSO\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"LASSO\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_lasso_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit to train a simple feedforward neural network #\n# model on a regression problem using scikit-learn. In this #\n# template, we use the default values for hidden_layer_sizes, #\n# activation, solver, and learning rate. Other parameters are #\n# available (consult the sklearn docs), but in this case, we #\n# only include those relevant to the Adam optimizer. Sklearn #\n# Docs: Sklearn docs:http://scikit-learn.org/stable/modules/ge #\n# nerated/sklearn.neural_network.MLPRegressor.html #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.neural_network\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.neural_network.MLPRegressor(\n hidden_layer_sizes=(100,),\n activation=\"relu\",\n solver=\"adam\",\n max_iter=300,\n early_stopping=False,\n validation_fraction=0.1,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"multilayer_perceptron\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"multilayer_perceptron\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_mlp_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a random forest classification model with #\n# Scikit-Learn. Parameters derived from Scikit-Learn's #\n# defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. When #\n# the workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a filee named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestClassifier(\n n_estimators=100,\n criterion=\"gini\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n oob_score=False,\n verbose=0,\n class_weight=None,\n ccp_alpha=0.0,\n max_samples=None,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target, test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for a gradient boosted classification model with #\n# Scikit-Learn. Parameters derived from sklearn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = sklearn.ensemble.GradientBoostingClassifier(loss='deviance',\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion='friedman_mse',\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n min_impurity_split=None,\n init=None,\n random_state=None,\n max_features=None,\n verbose=0,\n max_leaf_nodes=None,\n warm_start=False,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees classification #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBClassifier(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='binary:logistic',\n eval_metric='logloss',\n use_label_encoder=False)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_extreme_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow for a random forest regression model with Scikit- #\n# Learn. Parameters are derived from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestRegressor(\n n_estimators=100,\n criterion=\"mse\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n max_samples=None,\n oob_score=False,\n ccp_alpha=0.0,\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge regression model with Scikit- #\n# Learn. Alpha is taken from Scikit-Learn's default #\n# parameters. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Ridge(\n alpha=1.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Parity plot generation unit #\n# #\n# This unit generates a parity plot based on the known values #\n# in the training data, and the predicted values generated #\n# using the training data. #\n# #\n# Because this metric compares predictions versus a ground truth, #\n# it doesn't make sense to generate the plot when a predict #\n# workflow is being run (because in that case, we generally don't #\n# know the ground truth for the values being predicted). Hence, #\n# this unit does nothing if the workflow is in \"predict\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_predictions = context.load(\"train_predictions\")\n test_target = context.load(\"test_target\")\n test_predictions = context.load(\"test_predictions\")\n\n # Un-transform the data\n target_scaler = context.load(\"target_scaler\")\n train_target = target_scaler.inverse_transform(train_target)\n train_predictions = target_scaler.inverse_transform(train_predictions)\n test_target = target_scaler.inverse_transform(test_target)\n test_predictions = target_scaler.inverse_transform(test_predictions)\n\n # Plot the data\n plt.scatter(train_target, train_predictions, c=\"#203d78\", label=\"Training Set\")\n if settings.is_using_train_test_split:\n plt.scatter(test_target, test_predictions, c=\"#67ac5b\", label=\"Testing Set\")\n plt.xlabel(\"Actual Value\")\n plt.ylabel(\"Predicted Value\")\n\n # Scale the plot\n target_range = (min(min(train_target), min(test_target)),\n max(max(train_target), max(test_target)))\n predictions_range = (min(min(train_predictions), min(test_predictions)),\n max(max(train_predictions), max(test_predictions)))\n\n limits = (min(min(target_range), min(target_range)),\n max(max(predictions_range), max(predictions_range)))\n plt.xlim = (limits[0], limits[1])\n plt.ylim = (limits[0], limits[1])\n\n # Draw a parity line, as a guide to the eye\n plt.plot((limits[0], limits[1]), (limits[0], limits[1]), c=\"black\", linestyle=\"dotted\", label=\"Parity\")\n plt.legend()\n\n # Save the figure\n plt.tight_layout()\n plt.savefig(\"my_parity_plot.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_parity_plot_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Cluster Visualization #\n# #\n# This unit takes an N-dimensional feature space, and uses #\n# Principal-component Analysis (PCA) to project into a 2D space #\n# to facilitate plotting on a scatter plot. #\n# #\n# The 2D space we project into are the first two principal #\n# components identified in PCA, which are the two vectors with #\n# the highest variance. #\n# #\n# Wikipedia Article on PCA: #\n# https://en.wikipedia.org/wiki/Principal_component_analysis #\n# #\n# We then plot the labels assigned to the train an test set, #\n# and color by class. #\n# #\n# ----------------------------------------------------------------- #\n\nimport pandas as pd\nimport matplotlib.cm\nimport matplotlib.lines\nimport matplotlib.pyplot as plt\nimport sklearn.decomposition\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_labels = context.load(\"train_labels\")\n train_descriptors = context.load(\"train_descriptors\")\n test_labels = context.load(\"test_labels\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Unscale the descriptors\n descriptor_scaler = context.load(\"descriptor_scaler\")\n train_descriptors = descriptor_scaler.inverse_transform(train_descriptors)\n test_descriptors = descriptor_scaler.inverse_transform(test_descriptors)\n\n # We need at least 2 dimensions, exit if the dataset is 1D\n if train_descriptors.ndim < 2:\n raise ValueError(\"The train descriptors do not have enough dimensions to be plot in 2D\")\n\n # The data could be multidimensional. Let's do some PCA to get things into 2 dimensions.\n pca = sklearn.decomposition.PCA(n_components=2)\n train_descriptors = pca.fit_transform(train_descriptors)\n test_descriptors = pca.transform(test_descriptors)\n xlabel = \"Principle Component 1\"\n ylabel = \"Principle Component 2\"\n\n # Determine the labels we're going to be using, and generate their colors\n labels = set(train_labels)\n colors = {}\n for count, label in enumerate(labels):\n cm = matplotlib.cm.get_cmap('jet', len(labels))\n color = cm(count / len(labels))\n colors[label] = color\n train_colors = [colors[label] for label in train_labels]\n test_colors = [colors[label] for label in test_labels]\n\n # Train / Test Split Visualization\n plt.title(\"Train Test Split Visualization\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=\"#33548c\", marker=\"o\", label=\"Training Set\")\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=\"#F0B332\", marker=\"o\", label=\"Testing Set\")\n xmin, xmax, ymin, ymax = plt.axis()\n plt.legend()\n plt.tight_layout()\n plt.savefig(\"train_test_split.png\", dpi=600)\n plt.close()\n\n def clusters_legend(cluster_colors):\n \"\"\"\n Helper function that creates a legend, given the coloration by clusters.\n Args:\n cluster_colors: A dictionary of the form {cluster_number : color_value}\n\n Returns:\n None; just creates the legend and puts it on the plot\n \"\"\"\n legend_symbols = []\n for group, color in cluster_colors.items():\n label = f\"Cluster {group}\"\n legend_symbols.append(matplotlib.lines.Line2D([], [], color=color, marker=\"o\",\n linewidth=0, label=label))\n plt.legend(handles=legend_symbols)\n\n # Training Set Clusters\n plt.title(\"Training Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=train_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"train_clusters.png\", dpi=600)\n plt.close()\n\n # Testing Set Clusters\n plt.title(\"Testing Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=test_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"test_clusters.png\", dpi=600)\n plt.close()\n\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_pca_2d_clusters_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# ROC Curve Generator #\n# #\n# Computes and displays the Receiver Operating Characteristic #\n# (ROC) curve. This is restricted to binary classification tasks. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\nimport matplotlib.collections\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n test_target = context.load(\"test_target\").flatten()\n # Slice the first column because Sklearn's ROC curve prefers probabilities for the positive class\n test_probabilities = context.load(\"test_probabilities\")[:, 1]\n\n # Exit if there's more than one label in the predictions\n if len(set(test_target)) > 2:\n exit()\n\n # ROC curve function in sklearn prefers the positive class\n false_positive_rate, true_positive_rate, thresholds = sklearn.metrics.roc_curve(test_target, test_probabilities,\n pos_label=1)\n thresholds[0] -= 1 # Sklearn arbitrarily adds 1 to the first threshold\n roc_auc = np.round(sklearn.metrics.auc(false_positive_rate, true_positive_rate), 3)\n\n # Plot the curve\n fig, ax = plt.subplots()\n points = np.array([false_positive_rate, true_positive_rate]).T.reshape(-1, 1, 2)\n segments = np.concatenate([points[:-1], points[1:]], axis=1)\n norm = plt.Normalize(thresholds.min(), thresholds.max())\n lc = matplotlib.collections.LineCollection(segments, cmap='jet', norm=norm, linewidths=2)\n lc.set_array(thresholds)\n line = ax.add_collection(lc)\n fig.colorbar(line, ax=ax).set_label('Threshold')\n\n # Padding to ensure we see the line\n ax.margins(0.01)\n\n plt.title(f\"ROC curve, AUC={roc_auc}\")\n plt.xlabel(\"False Positive Rate\")\n plt.ylabel(\"True Positive Rate\")\n plt.tight_layout()\n plt.savefig(\"my_roc_curve.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_roc_curve_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"#!/bin/bash\n# ---------------------------------------------------------------- #\n# #\n# Example shell script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. shebang line is read from the first line above #\n# 2. based on shebang one of the shell types is selected: #\n# - /bin/bash #\n# - /bin/csh #\n# - /bin/tclsh #\n# - /bin/tcsh #\n# - /bin/zsh #\n# 3. runtime directory for this calculation is created #\n# 4. the content of the script is executed #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\necho \"Hello world!\"\n","name":"hello_world.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ---------------------------------------------------------------- #\n# #\n# Example job submission script for Exabyte.io platform #\n# #\n# Shows resource manager directives for: #\n# #\n# 1. the name of the job (-N) #\n# 2. the number of nodes to be used (-l nodes=) #\n# 3. the number of processors per node (-l ppn=) #\n# 4. the walltime in dd:hh:mm:ss format (-l walltime=) #\n# 5. queue (-q) D, OR, OF, SR, SF #\n# 6. merging standard output and error (-j oe) #\n# 7. email about job abort, begin, end (-m abe) #\n# 8. email address to use (-M) #\n# #\n# For more information visit https://docs.exabyte.io/cli/jobs #\n# ---------------------------------------------------------------- #\n\n#PBS -N ESPRESSO-TEST\n#PBS -j oe\n#PBS -l nodes=1\n#PBS -l ppn=1\n#PBS -l walltime=00:00:10:00\n#PBS -q D\n#PBS -m abe\n#PBS -M info@exabyte.io\n\n# load module\nmodule add espresso/540-i-174-impi-044\n\n# go to the job working directory\ncd $PBS_O_WORKDIR\n\n# create input file\ncat > pw.in < pw.out\n","name":"job_espresso_pw_scf.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\nmkdir -p {{ JOB_SCRATCH_DIR }}/outdir/_ph0\ncd {{ JOB_SCRATCH_DIR }}/outdir\ncp -r {{ JOB_WORK_DIR }}/../outdir/__prefix__.* .\n{%- endraw -%}\n","name":"espresso_link_outdir_save.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\ncp {{ JOB_SCRATCH_DIR }}/outdir/_ph0/__prefix__.phsave/dynmat* {{ JOB_WORK_DIR }}/../outdir/_ph0/__prefix__.phsave\n{%- endraw -%}\n","name":"espresso_collect_dynmat.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ------------------------------------------------------------------ #\n# This script prepares necessary directories to run VASP NEB\n# calculation. It puts initial POSCAR into directory 00, final into 0N\n# and intermediate images in 01 to 0(N-1). It is assumed that SCF\n# calculations for initial and final structures are already done in\n# previous subworkflows and their standard outputs are written into\n# \"vasp_neb_initial.out\" and \"vasp_neb_final.out\" files respectively.\n# These outputs are here copied into initial (00) and final (0N)\n# directories to calculate the reaction energy profile.\n# ------------------------------------------------------------------ #\n\n{% raw -%}\ncd {{ JOB_WORK_DIR }}\n{%- endraw %}\n\n# Prepare First Directory\nmkdir -p 00\ncat > 00/POSCAR < 0{{ input.INTERMEDIATE_IMAGES.length + 1 }}/POSCAR < 0{{ loop.index }}/POSCAR <=6.2.0\nexabyte-api-client>=2020.10.19\nnumpy>=1.17.3\npandas>=1.1.4\nurllib3<2\n","name":"requirements.txt","contextProviders":[],"applicationName":"jupyterLab","executableName":"jupyter"},{"content":" start nwchem\n title \"Test\"\n charge {{ input.CHARGE }}\n geometry units au noautosym\n {{ input.ATOMIC_POSITIONS }}\n end\n basis\n * library {{ input.BASIS }}\n end\n dft\n xc {{ input.FUNCTIONAL }}\n mult {{ input.MULT }}\n end\n task dft energy\n","name":"nwchem_total_energy.inp","contextProviders":[{"name":"NWChemInputDataManager"}],"applicationName":"nwchem","executableName":"nwchem"},{"content":"# ---------------------------------------------------------------- #\n# #\n# Example python script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. runtime directory for this calculation is created #\n# 2. requirements.txt is used to create a virtual environment #\n# 3. virtual environment is activated #\n# 4. python process running this script is started #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\nimport pymatgen as mg\n\nsi = mg.Element(\"Si\")\n\nprint(si.atomic_mass)\n","name":"hello_world.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2\n","name":"requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------------ #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# Please add any packages required for this unit below following #\n# the requirements.txt specification: #\n# https://pip.pypa.io/en/stable/reference/requirements-file-format/ #\n# ------------------------------------------------------------------ #\n","name":"requirements_empty.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# -------------------------------------------------------------------------------\n# This script contains a few helpful commands for basic plotting with matplotlib.\n# The commented out blocks are optional suggestions and included for convenience.\n# -------------------------------------------------------------------------------\nimport matplotlib.pyplot as plt\nimport matplotlib.ticker as ticker\nimport numpy as np\n\n\n# Plot Settings\n# -------------\nfigure_size = (6.4, 4.8) # width, height [inches]\ndpi = 100 # resolution [dots-per-inch]\nfont_size_title = 16 # font size of title\nfont_size_axis = 12 # font size of axis label\nfont_size_tick = 12 # font size of tick label\nfont_size_legend = 14 # font size of legend\nx_axis_label = None # label for x-axis\ny_axis_label = None # label for y-axis\ntitle = None # figure title\nshow_legend = False # whether to show legend\nsave_name = \"plot.pdf\" # output filename (with suffix), e.g. 'plot.pdf'\nx_view_limits = {\"left\": None, \"right\": None} # view limits for x-axis\ny_view_limits = {\"top\": None, \"bottom\": None} # view limits for y-axis\nx_tick_spacing = None # custom tick spacing for x-axis (optional)\ny_tick_spacing = None # custom tick spacing for y-axis (optional)\nx_tick_labels = None # custom tick labels for x-axis (optional)\ny_tick_labels = None # custom tick labels for y-axis (optional)\n\n\n# Figure & axes objects\n# ---------------------\nfig = plt.figure(figsize=figure_size, dpi=dpi)\nax = fig.add_subplot(111)\n\n# Example plot (REPLACE ACCORDINGLY)\n# ------------\nx = np.linspace(0, 7, num=100)\ny = np.sin(x)\nax.plot(x, y, \"g-\", zorder=3)\n\n\n# Help lines\n# ----------\n# ax.axhline(y=0, color=\"0.25\", linewidth=0.6, zorder=1)\n# ax.axvline(x=0, color=\"0.25\", linewidth=0.6, zorder=1)\n\n\n# View limits\n# -----------\nax.set_xlim(**x_view_limits)\nax.set_ylim(**y_view_limits)\n\n\n# Grid lines\n# ----------\n# grid_style = {\n# \"linestyle\" : \"dotted\",\n# \"linewidth\" : 0.6,\n# \"color\" : \"0.25\",\n# }\n# ax.grid(**grid_style)\n\n# Custom tick spacing\n# -------------------\n# ax.xaxis.set_major_locator(ticker.MultipleLocator(x_tick_spacing))\n# ax.yaxis.set_major_locator(ticker.MultipleLocator(y_tick_spacing))\n\n# Custom tick labels\n# ------------------\nif x_tick_labels is not None:\n ax.set_xticklabels(x_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\nif y_tick_labels is not None:\n ax.set_yticklabels(y_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\n\n# Other tick settings\n# -------------------\n# ax.tick_params(axis=\"both\", which=\"major\", labelsize=font_size_tick, direction=\"in\")\n# ax.tick_params(axis=\"x\", which=\"major\", pad=10)\n# ax.tick_params(axis=\"x\", which=\"minor\", bottom=False, top=False)\n\n\n# Axis labels\n# -----------\nif x_axis_label is not None:\n ax.set_xlabel(x_axis_label, size=font_size_axis)\nif y_axis_label is not None:\n ax.set_ylabel(y_axis_label, size=font_size_axis)\n\n# Figure title\n# ------------\nif title is not None:\n ax.set_title(title, fontsize=font_size_title)\n\n# Legend\n# ------\nif show_legend:\n ax.legend(prop={'size': font_size_legend})\n\n# Save figure\n# -----------\nif save_name is not None:\n save_format = save_name.split(\".\")[-1]\n fig.savefig(save_name, format=save_format, bbox_inches=\"tight\")\n","name":"matplotlib_basic.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ---------------------------------------------------------- #\n# #\n# This script extracts q-points and irreducible #\n# representations from Quantum ESPRESSO xml data. #\n# #\n# Expects control_ph.xml and patterns.?.xml files to exist #\n# #\n# ---------------------------------------------------------- #\nfrom __future__ import print_function\nimport json\nfrom xml.dom import minidom\n\n{# JOB_WORK_DIR will be initialized at runtime => avoid substituion below #}\n{%- raw -%}\nCONTROL_PH_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/control_ph.xml\"\nPATTERNS_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/patterns.{}.xml\"\n{%- endraw -%}\n\n# get integer content of an xml tag in a document\ndef get_int_by_tag_name(doc, tag_name):\n element = doc.getElementsByTagName(tag_name)\n return int(element[0].firstChild.nodeValue)\n\nvalues = []\n\n# get number of q-points and cycle through them\nxmldoc = minidom.parse(CONTROL_PH_FILENAME)\nnumber_of_qpoints = get_int_by_tag_name(xmldoc, \"NUMBER_OF_Q_POINTS\")\n\nfor i in range(number_of_qpoints):\n # get number of irreducible representations per qpoint\n xmldoc = minidom.parse(PATTERNS_FILENAME.format(i+1))\n number_of_irr_per_qpoint = get_int_by_tag_name(xmldoc, \"NUMBER_IRR_REP\")\n # add each distinct combination of qpoint and irr as a separate entry\n for j in range(number_of_irr_per_qpoint):\n values.append({\n \"qpoint\": i + 1,\n \"irr\": j + 1\n })\n\n# store final values in standard output (STDOUT)\nprint(json.dumps(values, indent=4))\n","name":"espresso_xml_get_qpt_irr.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"import re\nimport json\n\ndouble_regex = r'[-+]?\\d*\\.\\d+(?:[eE][-+]?\\d+)?'\nregex = r\"\\s+k\\(\\s+\\d*\\)\\s+=\\s+\\(\\s+({0})\\s+({0})\\s+({0})\\),\\s+wk\\s+=\\s+({0}).+?\\n\".format(double_regex)\n\nwith open(\"pw_scf.out\") as f:\n text = f.read()\n\npattern = re.compile(regex, re.I | re.MULTILINE)\nmatch = pattern.findall(text[text.rfind(\" cryst. coord.\"):])\nkpoints = [{\"coordinates\": list(map(float, m[:3])), \"weight\": float(m[3])} for m in match]\nprint(json.dumps({\"name\": \"KPOINTS\", \"value\": kpoints, \"scope\": \"global\"}, indent=4))\n","name":"espresso_extract_kpoints.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------- #\n# This script aims to determine extrema for a given array. #\n# Please adjust the parameters according to your data. #\n# Note: This template expects the array to be defined in the #\n# context as 'array_from_context' (see details below). #\n# ----------------------------------------------------------- #\nimport numpy as np\nfrom scipy.signal import find_peaks\nimport json\nfrom munch import Munch\n\n# Data From Context\n# -----------------\n# The array 'array_from_context' is a 1D list (float or int) that has to be defined in\n# a preceding assignment unit in order to be extracted from the context.\n# Example: [0.0, 1.0, 4.0, 3.0]\n# Upon rendering the following Jinja template the extracted array will be inserted.\n{% raw %}Y = np.array({{array_from_context}}){% endraw %}\n\n# Settings\n# --------\nprominence = 0.3 # required prominence in the unit of the data array\n\n# Find Extrema\n# ------------\nmax_indices, _ = find_peaks(Y, prominence=prominence)\nmin_indices, _ = find_peaks(-1 * Y, prominence=prominence)\n\nresult = {\n \"maxima\": Y[max_indices].tolist(),\n \"minima\": Y[min_indices].tolist(),\n}\n\n# print final values to standard output (STDOUT),\n# so that they can be read by a subsequent assignment unit (using value=STDOUT)\nprint(json.dumps(result, indent=4))\n","name":"find_extrema.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# ----------------------------------------------------------------- #\n\n\nmunch==2.5.0\nnumpy>=1.19.5\nscipy>=1.5.4\nmatplotlib>=3.0.0\n","name":"processing_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# PythonML Package Requirements for use on the Exabyte.io Platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscikit-learn==0.24.1\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2;python_version>=\"3.6\"\n","name":"pyml_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# General settings for PythonML jobs on the Exabyte.io Platform #\n# #\n# This file generally shouldn't be modified directly by users. #\n# The \"datafile\" and \"is_workflow_running_to_predict\" variables #\n# are defined in the head subworkflow, and are templated into #\n# this file. This helps facilitate the workflow's behavior #\n# differing whether it is in a \"train\" or \"predict\" mode. #\n# #\n# Also in this file is the \"Context\" object, which helps maintain #\n# certain Python objects between workflow units, and between #\n# predict runs. #\n# #\n# Whenever a python object needs to be stored for subsequent runs #\n# (such as in the case of a trained model), context.save() can be #\n# called to save it. The object can then be loaded again by using #\n# context.load(). #\n# ----------------------------------------------------------------- #\n\n\nimport pickle, os\n\n# ==================================================\n# Variables modified in the Important Settings menu\n# ==================================================\n# Variables in this section can (and oftentimes need to) be modified by the user in the \"Important Settings\" tab\n# of a workflow.\n\n# Target_column_name is used during training to identify the variable the model is traing to predict.\n# For example, consider a CSV containing three columns, \"Y\", \"X1\", and \"X2\". If the goal is to train a model\n# that will predict the value of \"Y,\" then target_column_name would be set to \"Y\"\ntarget_column_name = \"{{ mlSettings.target_column_name }}\"\n\n# The type of ML problem being performed. Can be either \"regression\", \"classification,\" or \"clustering.\"\nproblem_category = \"{{ mlSettings.problem_category }}\"\n\n# =============================\n# Non user-modifiable variables\n# =============================\n# Variables in this section generally do not need to be modified.\n\n# The problem category, regression or classification or clustering. In regression, the target (predicted) variable\n# is continues. In classification, it is categorical. In clustering, there is no target - a set of labels is\n# automatically generated.\nis_regression = is_classification = is_clustering = False\nif problem_category.lower() == \"regression\":\n is_regression = True\nelif problem_category.lower() == \"classification\":\n is_classification = True\nelif problem_category.lower() == \"clustering\":\n is_clustering = True\nelse:\n raise ValueError(\n \"Variable 'problem_category' must be either 'regression', 'classification', or 'clustering'. Check settings.py\")\n\n# The variables \"is_workflow_running_to_predict\" and \"is_workflow_running_to_train\" are used to control whether\n# the workflow is in a \"training\" mode or a \"prediction\" mode. The \"IS_WORKFLOW_RUNNING_TO_PREDICT\" variable is set by\n# an assignment unit in the \"Set Up the Job\" subworkflow that executes at the start of the job. It is automatically\n# changed when the predict workflow is generated, so users should not need to modify this variable.\nis_workflow_running_to_predict = {% raw %}{{IS_WORKFLOW_RUNNING_TO_PREDICT}}{% endraw %}\nis_workflow_running_to_train = not is_workflow_running_to_predict\n\n# Sets the datafile variable. The \"datafile\" is the data that will be read in, and will be used by subsequent\n# workflow units for either training or prediction, depending on the workflow mode.\nif is_workflow_running_to_predict:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\nelse:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\n\n# The \"Context\" class allows for data to be saved and loaded between units, and between train and predict runs.\n# Variables which have been saved using the \"Save\" method are written to disk, and the predict workflow is automatically\n# configured to obtain these files when it starts.\n#\n# IMPORTANT NOTE: Do *not* adjust the value of \"context_dir_pathname\" in the Context object. If the value is changed, then\n# files will not be correctly copied into the generated predict workflow. This will cause the predict workflow to be\n# generated in a broken state, and it will not be able to make any predictions.\nclass Context(object):\n \"\"\"\n Saves and loads objects from the disk, useful for preserving data between workflow units\n\n Attributes:\n context_paths (dict): Dictionary of the format {variable_name: path}, that governs where\n pickle saves files.\n\n Methods:\n save: Used to save objects to the context directory\n load: Used to load objects from the context directory\n \"\"\"\n\n def __init__(self, context_file_basename=\"workflow_context_file_mapping\"):\n \"\"\"\n Constructor for Context objects\n\n Args:\n context_file_basename (str): Name of the file to store context paths in\n \"\"\"\n\n # Warning: DO NOT modify the context_dir_pathname variable below\n # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n context_dir_pathname = \"{% raw %}{{ CONTEXT_DIR_RELATIVE_PATH }}{% endraw %}\"\n # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n self._context_dir_pathname = context_dir_pathname\n self._context_file = os.path.join(context_dir_pathname, context_file_basename)\n\n # Make context dir if it does not exist\n if not os.path.exists(context_dir_pathname):\n os.makedirs(context_dir_pathname)\n\n # Read in the context sources dictionary, if it exists\n if os.path.exists(self._context_file):\n with open(self._context_file, \"rb\") as file_handle:\n self.context_paths: dict = pickle.load(file_handle)\n else:\n # Items is a dictionary of {varname: path}\n self.context_paths = {}\n\n def __enter__(self):\n return self\n\n def __exit__(self, exc_type, exc_value, traceback):\n self._update_context()\n\n def __contains__(self, item):\n return item in self.context_paths\n\n def _update_context(self):\n with open(self._context_file, \"wb\") as file_handle:\n pickle.dump(self.context_paths, file_handle)\n\n def load(self, name: str):\n \"\"\"\n Returns a contextd object\n\n Args:\n name (str): The name in self.context_paths of the object\n \"\"\"\n path = self.context_paths[name]\n with open(path, \"rb\") as file_handle:\n obj = pickle.load(file_handle)\n return obj\n\n def save(self, obj: object, name: str):\n \"\"\"\n Saves an object to disk using pickle\n\n Args:\n name (str): Friendly name for the object, used for lookup in load() method\n obj (object): Object to store on disk\n \"\"\"\n path = os.path.join(self._context_dir_pathname, f\"{name}.pkl\")\n self.context_paths[name] = path\n with open(path, \"wb\") as file_handle:\n pickle.dump(obj, file_handle)\n self._update_context()\n\n# Generate a context object, so that the \"with settings.context\" can be used by other units in this workflow.\ncontext = Context()\n\nis_using_train_test_split = \"is_using_train_test_split\" in context and (context.load(\"is_using_train_test_split\"))\n\n# Create a Class for a DummyScaler()\nclass DummyScaler:\n \"\"\"\n This class is a 'DummyScaler' which trivially acts on data by returning it unchanged.\n \"\"\"\n\n def fit(self, X):\n return self\n\n def transform(self, X):\n return X\n\n def fit_transform(self, X):\n return X\n\n def inverse_transform(self, X):\n return X\n\nif 'target_scaler' not in context:\n context.save(DummyScaler(), 'target_scaler')\n","name":"pyml_settings.py","contextProviders":[{"name":"MLSettingsDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Custom workflow unit template for the Exabyte.io platform #\n# #\n# This file imports a set of workflow-specific context variables #\n# from settings.py. It then uses a context manager to save and #\n# load Python objects. When saved, these objects can then be #\n# loaded either later in the same workflow, or by subsequent #\n# predict jobs. #\n# #\n# Any pickle-able Python object can be saved using #\n# settings.context. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport settings\n\n# The context manager exists to facilitate\n# saving and loading objects across Python units within a workflow.\n\n# To load an object, simply do to \\`context.load(\"name-of-the-saved-object\")\\`\n# To save an object, simply do \\`context.save(\"name-for-the-object\", object_here)\\`\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Do some transformations to the data here\n\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n descriptors = context.load(\"descriptors\")\n\n # Do some predictions or transformation to the data here\n","name":"pyml_custom.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to read in data for the ML workflow. #\n# #\n# Also showcased here is the concept of branching based on #\n# whether the workflow is in \"train\" or \"predict\" mode. #\n# #\n# If the workflow is in \"training\" mode, it will read in the data #\n# before converting it to a Numpy array and save it for use #\n# later. During training, we already have values for the output, #\n# and this gets saved to \"target.\" #\n# #\n# Finally, whether the workflow is in training or predict mode, #\n# it will always read in a set of descriptors from a datafile #\n# defined in settings.py #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport sklearn.preprocessing\nimport settings\n\nwith settings.context as context:\n data = pandas.read_csv(settings.datafile)\n\n # Train\n # By default, we don't do train/test splitting: the train and test represent the same dataset at first.\n # Other units (such as a train/test splitter) down the line can adjust this as-needed.\n if settings.is_workflow_running_to_train:\n\n # Handle the case where we are clustering\n if settings.is_clustering:\n target = data.to_numpy()[:, 0] # Just get the first column, it's not going to get used anyway\n else:\n target = data.pop(settings.target_column_name).to_numpy()\n\n # Handle the case where we are classifying. In this case, we must convert any labels provided to be categorical.\n # Specifically, labels are encoded with values between 0 and (N_Classes - 1)\n if settings.is_classification:\n label_encoder = sklearn.preprocessing.LabelEncoder()\n target = label_encoder.fit_transform(target)\n context.save(label_encoder, \"label_encoder\")\n\n target = target.reshape(-1, 1) # Reshape array from a row vector into a column vector\n\n context.save(target, \"train_target\")\n context.save(target, \"test_target\")\n\n descriptors = data.to_numpy()\n\n context.save(descriptors, \"train_descriptors\")\n context.save(descriptors, \"test_descriptors\")\n\n else:\n descriptors = data.to_numpy()\n context.save(descriptors, \"descriptors\")\n","name":"data_input_read_csv_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to perform a train/test split #\n# #\n# Splits the dataset into a training and testing set. The #\n# variable `percent_held_as_test` controls how much of the #\n# input dataset is removed for use as a testing set. By default, #\n# this unit puts 20% of the dataset into the testing set, and #\n# places the remaining 80% into the training set. #\n# #\n# Does nothing in the case of predictions. #\n# #\n# ----------------------------------------------------------------- #\n\nimport sklearn.model_selection\nimport numpy as np\nimport settings\n\n# `percent_held_as_test` is the amount of the dataset held out as the testing set. If it is set to 0.2,\n# then 20% of the dataset is held out as a testing set. The remaining 80% is the training set.\npercent_held_as_test = {{ mlTrainTestSplit.fraction_held_as_test_set }}\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Load training data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n\n # Combine datasets to facilitate train/test split\n\n # Do train/test split\n train_descriptors, test_descriptors, train_target, test_target = sklearn.model_selection.train_test_split(\n train_descriptors, train_target, test_size=percent_held_as_test)\n\n # Set the flag for using a train/test split\n context.save(True, \"is_using_train_test_split\")\n\n # Save training data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"data_input_train_test_split_sklearn.py","contextProviders":[{"name":"MLTrainTestSplitDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn MinMax Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it is on interval #\n# [0,1]. It then saves the data for use further down #\n# the road in the workflow, for use in un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the min and max of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor MinMax Scaler\n scaler = sklearn.preprocessing.MinMaxScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_min_max_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Duplicates workflow unit #\n# #\n# This workflow unit drops all duplicate rows, if it is running #\n# in the \"train\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop duplicates from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n df = df.drop_duplicates()\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop duplicates from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.drop_duplicates()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_duplicates_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Missing Workflow Unit #\n# #\n# This workflow unit allows missing rows and/or columns to be #\n# dropped from the dataset by configuring the `to_drop` #\n# parameter. #\n# #\n# Valid values for `to_drop`: #\n# - \"rows\": rows with missing values will be removed #\n# - \"columns\": columns with missing values will be removed #\n# - \"both\": rows and columns with missing values will be removed #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\n# `to_drop` can either be \"rows\" or \"columns\"\n# If it is set to \"rows\" (by default), then all rows with missing values will be dropped.\n# If it is set to \"columns\", then all columns with missing values will be dropped.\n# If it is set to \"both\", then all rows and columns with missing values will be dropped.\nto_drop = \"rows\"\n\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop missing from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n\n directions = {\n \"rows\": (\"index\",),\n \"columns\": (\"columns\",),\n \"both\": (\"index\", \"columns\"),\n }[to_drop]\n for direction in directions:\n df = df.dropna(direction)\n\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop missing from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.dropna()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_missing_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn Standard Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it a mean of 0 and #\n# a standard deviation of 1. It then saves the data for use #\n# further down the road in the workflow, for use in #\n# un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the mean and variance of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor Scaler\n scaler = sklearn.preprocessing.StandardScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_standardization_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge-regression model in Scikit-Learn. #\n# Alpha is taken from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.AdaBoostRegressor(\n n_estimators=50,\n learning_rate=1,\n loss=\"linear\",\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"adaboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"adaboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_adaboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a bagged trees regression model with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.BaggingRegressor(\n n_estimators=10,\n max_samples=1.0,\n max_features=1.0,\n bootstrap=True,\n bootstrap_features=False,\n oob_score=False,\n verbose=0,\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"bagged_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"bagged_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_bagged_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for gradient-boosted tree regression with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. Note: In the gradient- #\n# boosted trees ensemble used, the weak learners used as #\n# estimators cannot be tuned with the same level of fidelity #\n# allowed in the adaptive-boosted trees ensemble. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.GradientBoostingRegressor(\n loss=\"ls\",\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion=\"friedman_mse\",\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n max_features=None,\n alpha=0.9,\n verbose=0,\n max_leaf_nodes=None,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees regression #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBRegressor(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='reg:squarederror',\n eval_metric='rmse')\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_regression\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_regression\")\n\n # Make some predictions and unscale\n predictions = model.predict(descriptors)\n predictions = predictions.reshape(-1, 1)\n target_scaler = context.load(\"target_scaler\")\n\n predictions = target_scaler.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\")\n","name":"model_extreme_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for k-means clustering. #\n# #\n# In k-means clustering, the labels are not provided ahead of #\n# time. Instead, one supplies the number of groups the #\n# algorithm should split the dataset into. Here, we set our #\n# own default of 4 groups (fewer than sklearn's default of 8). #\n# Otherwise, the default parameters of the clustering method #\n# are the same as in sklearn. #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.cluster\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Initialize the Model\n model = sklearn.cluster.KMeans(\n n_clusters=4,\n init=\"k-means++\",\n n_init=10,\n max_iter=300,\n tol=0.0001,\n copy_x=True,\n algorithm=\"auto\",\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors)\n context.save(model, \"k_means\")\n train_labels = model.predict(train_descriptors)\n test_labels = model.predict(test_descriptors)\n\n context.save(train_labels, \"train_labels\")\n context.save(test_labels, \"test_labels\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"k_means\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_k_means_clustering_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a kernelized ridge-regression model with #\n# Scikit-Learn. Model parameters are derived from Scikit- #\n# Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.kernel_ridge\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.kernel_ridge.KernelRidge(\n alpha=1.0,\n kernel=\"linear\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"kernel_ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"kernel_ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_kernel_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a LASSO-regression model with Scikit- #\n# Learn. Model parameters derived from Scikit-Learn's #\n# Defaults. Alpha has been lowered from the default of 1.0, to #\n# 0.1. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Lasso(\n alpha=0.1,\n fit_intercept=True,\n normalize=False,\n precompute=False,\n tol=0.0001,\n positive=True,\n selection=\"cyclic\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"LASSO\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"LASSO\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_lasso_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit to train a simple feedforward neural network #\n# model on a regression problem using scikit-learn. In this #\n# template, we use the default values for hidden_layer_sizes, #\n# activation, solver, and learning rate. Other parameters are #\n# available (consult the sklearn docs), but in this case, we #\n# only include those relevant to the Adam optimizer. Sklearn #\n# Docs: Sklearn docs:http://scikit-learn.org/stable/modules/ge #\n# nerated/sklearn.neural_network.MLPRegressor.html #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.neural_network\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.neural_network.MLPRegressor(\n hidden_layer_sizes=(100,),\n activation=\"relu\",\n solver=\"adam\",\n max_iter=300,\n early_stopping=False,\n validation_fraction=0.1,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"multilayer_perceptron\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"multilayer_perceptron\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_mlp_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a random forest classification model with #\n# Scikit-Learn. Parameters derived from Scikit-Learn's #\n# defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. When #\n# the workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a filee named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestClassifier(\n n_estimators=100,\n criterion=\"gini\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n oob_score=False,\n verbose=0,\n class_weight=None,\n ccp_alpha=0.0,\n max_samples=None,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target, test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for a gradient boosted classification model with #\n# Scikit-Learn. Parameters derived from sklearn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = sklearn.ensemble.GradientBoostingClassifier(loss='deviance',\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion='friedman_mse',\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n min_impurity_split=None,\n init=None,\n random_state=None,\n max_features=None,\n verbose=0,\n max_leaf_nodes=None,\n warm_start=False,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees classification #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBClassifier(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='binary:logistic',\n eval_metric='logloss',\n use_label_encoder=False)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_extreme_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow for a random forest regression model with Scikit- #\n# Learn. Parameters are derived from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestRegressor(\n n_estimators=100,\n criterion=\"mse\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n max_samples=None,\n oob_score=False,\n ccp_alpha=0.0,\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge regression model with Scikit- #\n# Learn. Alpha is taken from Scikit-Learn's default #\n# parameters. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Ridge(\n alpha=1.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Parity plot generation unit #\n# #\n# This unit generates a parity plot based on the known values #\n# in the training data, and the predicted values generated #\n# using the training data. #\n# #\n# Because this metric compares predictions versus a ground truth, #\n# it doesn't make sense to generate the plot when a predict #\n# workflow is being run (because in that case, we generally don't #\n# know the ground truth for the values being predicted). Hence, #\n# this unit does nothing if the workflow is in \"predict\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_predictions = context.load(\"train_predictions\")\n test_target = context.load(\"test_target\")\n test_predictions = context.load(\"test_predictions\")\n\n # Un-transform the data\n target_scaler = context.load(\"target_scaler\")\n train_target = target_scaler.inverse_transform(train_target)\n train_predictions = target_scaler.inverse_transform(train_predictions)\n test_target = target_scaler.inverse_transform(test_target)\n test_predictions = target_scaler.inverse_transform(test_predictions)\n\n # Plot the data\n plt.scatter(train_target, train_predictions, c=\"#203d78\", label=\"Training Set\")\n if settings.is_using_train_test_split:\n plt.scatter(test_target, test_predictions, c=\"#67ac5b\", label=\"Testing Set\")\n plt.xlabel(\"Actual Value\")\n plt.ylabel(\"Predicted Value\")\n\n # Scale the plot\n target_range = (min(min(train_target), min(test_target)),\n max(max(train_target), max(test_target)))\n predictions_range = (min(min(train_predictions), min(test_predictions)),\n max(max(train_predictions), max(test_predictions)))\n\n limits = (min(min(target_range), min(target_range)),\n max(max(predictions_range), max(predictions_range)))\n plt.xlim = (limits[0], limits[1])\n plt.ylim = (limits[0], limits[1])\n\n # Draw a parity line, as a guide to the eye\n plt.plot((limits[0], limits[1]), (limits[0], limits[1]), c=\"black\", linestyle=\"dotted\", label=\"Parity\")\n plt.legend()\n\n # Save the figure\n plt.tight_layout()\n plt.savefig(\"my_parity_plot.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_parity_plot_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Cluster Visualization #\n# #\n# This unit takes an N-dimensional feature space, and uses #\n# Principal-component Analysis (PCA) to project into a 2D space #\n# to facilitate plotting on a scatter plot. #\n# #\n# The 2D space we project into are the first two principal #\n# components identified in PCA, which are the two vectors with #\n# the highest variance. #\n# #\n# Wikipedia Article on PCA: #\n# https://en.wikipedia.org/wiki/Principal_component_analysis #\n# #\n# We then plot the labels assigned to the train an test set, #\n# and color by class. #\n# #\n# ----------------------------------------------------------------- #\n\nimport pandas as pd\nimport matplotlib.cm\nimport matplotlib.lines\nimport matplotlib.pyplot as plt\nimport sklearn.decomposition\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_labels = context.load(\"train_labels\")\n train_descriptors = context.load(\"train_descriptors\")\n test_labels = context.load(\"test_labels\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Unscale the descriptors\n descriptor_scaler = context.load(\"descriptor_scaler\")\n train_descriptors = descriptor_scaler.inverse_transform(train_descriptors)\n test_descriptors = descriptor_scaler.inverse_transform(test_descriptors)\n\n # We need at least 2 dimensions, exit if the dataset is 1D\n if train_descriptors.ndim < 2:\n raise ValueError(\"The train descriptors do not have enough dimensions to be plot in 2D\")\n\n # The data could be multidimensional. Let's do some PCA to get things into 2 dimensions.\n pca = sklearn.decomposition.PCA(n_components=2)\n train_descriptors = pca.fit_transform(train_descriptors)\n test_descriptors = pca.transform(test_descriptors)\n xlabel = \"Principle Component 1\"\n ylabel = \"Principle Component 2\"\n\n # Determine the labels we're going to be using, and generate their colors\n labels = set(train_labels)\n colors = {}\n for count, label in enumerate(labels):\n cm = matplotlib.cm.get_cmap('jet', len(labels))\n color = cm(count / len(labels))\n colors[label] = color\n train_colors = [colors[label] for label in train_labels]\n test_colors = [colors[label] for label in test_labels]\n\n # Train / Test Split Visualization\n plt.title(\"Train Test Split Visualization\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=\"#33548c\", marker=\"o\", label=\"Training Set\")\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=\"#F0B332\", marker=\"o\", label=\"Testing Set\")\n xmin, xmax, ymin, ymax = plt.axis()\n plt.legend()\n plt.tight_layout()\n plt.savefig(\"train_test_split.png\", dpi=600)\n plt.close()\n\n def clusters_legend(cluster_colors):\n \"\"\"\n Helper function that creates a legend, given the coloration by clusters.\n Args:\n cluster_colors: A dictionary of the form {cluster_number : color_value}\n\n Returns:\n None; just creates the legend and puts it on the plot\n \"\"\"\n legend_symbols = []\n for group, color in cluster_colors.items():\n label = f\"Cluster {group}\"\n legend_symbols.append(matplotlib.lines.Line2D([], [], color=color, marker=\"o\",\n linewidth=0, label=label))\n plt.legend(handles=legend_symbols)\n\n # Training Set Clusters\n plt.title(\"Training Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=train_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"train_clusters.png\", dpi=600)\n plt.close()\n\n # Testing Set Clusters\n plt.title(\"Testing Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=test_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"test_clusters.png\", dpi=600)\n plt.close()\n\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_pca_2d_clusters_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# ROC Curve Generator #\n# #\n# Computes and displays the Receiver Operating Characteristic #\n# (ROC) curve. This is restricted to binary classification tasks. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\nimport matplotlib.collections\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n test_target = context.load(\"test_target\").flatten()\n # Slice the first column because Sklearn's ROC curve prefers probabilities for the positive class\n test_probabilities = context.load(\"test_probabilities\")[:, 1]\n\n # Exit if there's more than one label in the predictions\n if len(set(test_target)) > 2:\n exit()\n\n # ROC curve function in sklearn prefers the positive class\n false_positive_rate, true_positive_rate, thresholds = sklearn.metrics.roc_curve(test_target, test_probabilities,\n pos_label=1)\n thresholds[0] -= 1 # Sklearn arbitrarily adds 1 to the first threshold\n roc_auc = np.round(sklearn.metrics.auc(false_positive_rate, true_positive_rate), 3)\n\n # Plot the curve\n fig, ax = plt.subplots()\n points = np.array([false_positive_rate, true_positive_rate]).T.reshape(-1, 1, 2)\n segments = np.concatenate([points[:-1], points[1:]], axis=1)\n norm = plt.Normalize(thresholds.min(), thresholds.max())\n lc = matplotlib.collections.LineCollection(segments, cmap='jet', norm=norm, linewidths=2)\n lc.set_array(thresholds)\n line = ax.add_collection(lc)\n fig.colorbar(line, ax=ax).set_label('Threshold')\n\n # Padding to ensure we see the line\n ax.margins(0.01)\n\n plt.title(f\"ROC curve, AUC={roc_auc}\")\n plt.xlabel(\"False Positive Rate\")\n plt.ylabel(\"True Positive Rate\")\n plt.tight_layout()\n plt.savefig(\"my_roc_curve.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_roc_curve_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"#!/bin/bash\n# ---------------------------------------------------------------- #\n# #\n# Example shell script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. shebang line is read from the first line above #\n# 2. based on shebang one of the shell types is selected: #\n# - /bin/bash #\n# - /bin/csh #\n# - /bin/tclsh #\n# - /bin/tcsh #\n# - /bin/zsh #\n# 3. runtime directory for this calculation is created #\n# 4. the content of the script is executed #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\necho \"Hello world!\"\n","name":"hello_world.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ---------------------------------------------------------------- #\n# #\n# Example job submission script for Exabyte.io platform #\n# #\n# Shows resource manager directives for: #\n# #\n# 1. the name of the job (-N) #\n# 2. the number of nodes to be used (-l nodes=) #\n# 3. the number of processors per node (-l ppn=) #\n# 4. the walltime in dd:hh:mm:ss format (-l walltime=) #\n# 5. queue (-q) D, OR, OF, SR, SF #\n# 6. merging standard output and error (-j oe) #\n# 7. email about job abort, begin, end (-m abe) #\n# 8. email address to use (-M) #\n# #\n# For more information visit https://docs.exabyte.io/cli/jobs #\n# ---------------------------------------------------------------- #\n\n#PBS -N ESPRESSO-TEST\n#PBS -j oe\n#PBS -l nodes=1\n#PBS -l ppn=1\n#PBS -l walltime=00:00:10:00\n#PBS -q D\n#PBS -m abe\n#PBS -M info@exabyte.io\n\n# load module\nmodule add espresso/540-i-174-impi-044\n\n# go to the job working directory\ncd $PBS_O_WORKDIR\n\n# create input file\ncat > pw.in < pw.out\n","name":"job_espresso_pw_scf.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\nmkdir -p {{ JOB_SCRATCH_DIR }}/outdir/_ph0\ncd {{ JOB_SCRATCH_DIR }}/outdir\ncp -r {{ JOB_WORK_DIR }}/../outdir/__prefix__.* .\n{%- endraw -%}\n","name":"espresso_link_outdir_save.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\ncp {{ JOB_SCRATCH_DIR }}/outdir/_ph0/__prefix__.phsave/dynmat* {{ JOB_WORK_DIR }}/../outdir/_ph0/__prefix__.phsave\n{%- endraw -%}\n","name":"espresso_collect_dynmat.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ------------------------------------------------------------------ #\n# This script prepares necessary directories to run VASP NEB\n# calculation. It puts initial POSCAR into directory 00, final into 0N\n# and intermediate images in 01 to 0(N-1). It is assumed that SCF\n# calculations for initial and final structures are already done in\n# previous subworkflows and their standard outputs are written into\n# \"vasp_neb_initial.out\" and \"vasp_neb_final.out\" files respectively.\n# These outputs are here copied into initial (00) and final (0N)\n# directories to calculate the reaction energy profile.\n# ------------------------------------------------------------------ #\n\n{% raw -%}\ncd {{ JOB_WORK_DIR }}\n{%- endraw %}\n\n# Prepare First Directory\nmkdir -p 00\ncat > 00/POSCAR < 0{{ input.INTERMEDIATE_IMAGES.length + 1 }}/POSCAR < 0{{ loop.index }}/POSCAR < Date: Tue, 23 Jan 2024 22:28:34 +0800 Subject: [PATCH 20/20] SOF-7194: fix template file name --- src/js/data/templates.js | 2 +- templates/deepmd/dp_create_lmp_structure.py.yml | 4 ++-- templates/deepmd/dp_prepare.py.yml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/js/data/templates.js b/src/js/data/templates.js index 42151745..ae735d40 100644 --- a/src/js/data/templates.js +++ b/src/js/data/templates.js @@ -1,2 +1,2 @@ /* eslint-disable */ -module.exports = {allTemplates: [{"content":"assets/deepmd/qe_to_lmp_structure.j2.py","name":"qe_to_lmp_structure.py","contextProviders":[],"applicationName":"deepmd","executableName":"python"},{"content":"# LAMMPS input, deepmd-kit version. Built from bulk water calculation.\n\nunits metal\nboundary p p p\natom_style atomic\n\nneighbor 2.0 bin\nneigh_modify every 10 delay 0 check no\n\nread_data system.lmp\nmass 1 16\nmass 2 2\n\npair_style\t deepmd ./graph.pb\npair_coeff * *\n\nvelocity all create 330.0 23456789\n\nfix 1 all nvt temp 330.0 330.0 0.5\ntimestep 0.0005\nthermo_style custom step pe ke etotal temp press vol\nthermo 100\ndump 1 all custom 100 system.dump id type x y z\n\nrun 100\n","name":"in.lammps","contextProviders":[],"applicationName":"deepmd","executableName":"lmp"},{"content":"assets/deepmd/qe_cp_to_deepmd.j2.py","name":"qe_cp_to_deepmd.py","contextProviders":[],"applicationName":"deepmd","executableName":"python"},{"content":"{\n \"model\": {\n \"type_map\": [\n \"O\",\n \"H\"\n ],\n \"descriptor\": {\n \"type\": \"se_e2_r\",\n \"sel\": [\n 23,\n 46\n ],\n \"rcut_smth\": 0.50,\n \"rcut\": 5.00,\n \"neuron\": [\n 5,\n 5,\n 5\n ],\n \"resnet_dt\": false,\n \"seed\": 1\n },\n \"fitting_net\": {\n \"neuron\": [\n 60,\n 60,\n 60\n ],\n \"resnet_dt\": true,\n \"seed\": 1\n }\n },\n \"learning_rate\": {\n \"type\": \"exp\",\n \"decay_steps\": 1000,\n \"start_lr\": 0.005,\n \"stop_lr\": 3.51e-6\n },\n \"loss\": {\n \"start_pref_e\": 0.02,\n \"limit_pref_e\": 1,\n \"start_pref_f\": 1000,\n \"limit_pref_f\": 1,\n \"start_pref_v\": 0,\n \"limit_pref_v\": 0\n },\n \"training\": {\n \"training_data\": {\n \"systems\": [\n \"./training/\"\n ],\n \"batch_size\": \"auto\"\n },\n \"validation_data\": {\n \"systems\": [\n \"./validation/\"\n ],\n \"batch_size\": \"auto\"\n },\n \"numb_steps\": 301,\n \"seed\": 1,\n \"disp_file\": \"lcurve.out\",\n \"disp_freq\": 10,\n \"numb_test\": 1,\n \"save_freq\": 100\n }\n}\n","name":"dp_train_se_e2_r.json","contextProviders":[],"applicationName":"deepmd","executableName":"dp"},{"content":"1\npp.dat\n1.0\n3000\n3\n3.0000\n","name":"average.in","contextProviders":[],"applicationName":"espresso","executableName":"average.x"},{"content":"&BANDS\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n filband = {% raw %}'{{ JOB_WORK_DIR }}/bands.dat'{% endraw %}\n no_overlap = .true.\n/\n\n","name":"bands.in","contextProviders":[],"applicationName":"espresso","executableName":"bands.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'cp'\n restart_mode = '{{ input.RESTART_MODE }}'\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}'{% endraw %}\n prefix = 'cp'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n nstep = {{ dynamics.numberOfSteps }}\n iprint = 1\n dt = {{ dynamics.timeStep }}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n nr1b = 20\n nr2b = 20\n nr3b = 20\n/\n&ELECTRONS\n electron_dynamics = 'verlet'\n electron_velocities = 'zero'\n emass = {{ dynamics.electronMass }}\n!! consider the below parameters if orthogonalization fails\n! orthogonalization = 'ortho'\n! ortho_eps = 1d-11\n/\n&IONS\n ion_dynamics = 'verlet'\n ion_velocities = 'zero'\n tempw = {{ dynamics.temperature }}\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\n","name":"cp.in","contextProviders":[{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"IonDynamicsContextProvider"}],"applicationName":"espresso","executableName":"cp.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'cp-wf'\n restart_mode = '{{ input.RESTART_MODE }}'\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}'{% endraw %}\n prefix = 'cp_wf'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n nstep = {{ dynamics.numberOfSteps }}\n iprint = 1\n dt = {{ dynamics.timeStep }}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n! TODO: figure out the runtime error when `ecutrho` is set. \n! In the meantime, using Norm-conserving pseudopotentials works.\n! ecutrho = {{ cutoffs.density }}\n nr1b = 20\n nr2b = 20\n nr3b = 20\n/\n&ELECTRONS\n electron_dynamics = 'verlet'\n electron_velocities = 'zero'\n emass = {{ dynamics.electronMass }}\n/\n&IONS\n ion_dynamics = 'verlet'\n ion_velocities = 'zero'\n tempw = {{ dynamics.temperature }}\n/\n&CELL\n/\n&WANNIER\n nit = 60,\n calwf = 3,\n tolw = 1.D-6,\n nsteps = 50,\n adapt = .FALSE.\n wfdt = 2.0D0,\n wf_q = 500.D0,\n wf_friction = 0.3d0,\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\n","name":"cp_wf.in","contextProviders":[{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"IonDynamicsContextProvider"}],"applicationName":"espresso","executableName":"cp.x"},{"content":"&DOS\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n degauss = 0.01\n/\n\n","name":"dos.in","contextProviders":[],"applicationName":"espresso","executableName":"dos.x"},{"content":"&INPUT\n asr = 'simple'\n flfrc = 'force_constants.fc'\n flfrq = 'frequencies.freq'\n dos = .true.\n fldos = 'phonon_dos.out'\n deltaE = 1.d0\n {% for d in igrid.dimensions -%}\n nk{{loop.index}} = {{d}}\n {% endfor %}\n /\n","name":"dynmat_grid.in","contextProviders":[{"name":"IGridFormDataManager"}],"applicationName":"espresso","executableName":"dynmat.x"},{"content":"&inputpp\n calculation = \"eps\"\n prefix = \"__prefix__\"\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n/\n\n&energy_grid\n smeartype = \"gauss\"\n intersmear = 0.2\n intrasmear = 0.0\n wmin = 0.0\n wmax = 30.0\n nw = 500\n shift = 0.0\n/\n\n","name":"epsilon.in","contextProviders":[],"applicationName":"espresso","executableName":"epsilon.x"},{"content":"&gw_input\n\n ! see http://www.sternheimergw.org for more information.\n\n ! config of the scf run\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n\n ! the grid used for the linear response\n kpt_grid = {{ kgrid.dimensions|join(', ') }}\n qpt_grid = {{ qgrid.dimensions|join(', ') }}\n\n ! truncation (used for both correlation and exchange)\n truncation = '2d'\n\n ! number of bands for which the GW correction is calculated\n num_band = 8\n\n ! configuration of the Coulomb solver\n thres_coul = 1.0d-2\n\n ! configuration of W in the convolution\n model_coul = 'godby-needs'\n max_freq_coul = 120\n num_freq_coul = 35\n\n ! configuration of the Green solver\n thres_green = 1.0d-3\n max_iter_green = 300\n\n ! configuration for the correlation self energy\n ecut_corr = 5.0\n max_freq_corr = 100.0\n num_freq_corr = 11\n\n ! configuration for the exchange self energy\n ecut_exch = 20.0\n\n ! configuration for the output\n eta = 0.1\n min_freq_wind = -30.0\n max_freq_wind = 30.0\n num_freq_wind = 601\n/\n\n&gw_output\n/\n\nFREQUENCIES\n2\n 0.0 0.0\n 0.0 10.0\n/\n\nK_points\n{{ explicitKPath2PIBA.length }}\n{% for point in explicitKPath2PIBA -%}\n{% for coordinate in point.coordinates %}{{ coordinate }}{% endfor %}\n{% endfor %}\n/\n\n","name":"gw_bands_plasmon_pole.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QGridFormDataManager"},{"name":"ExplicitKPath2PIBAFormDataManager"}],"applicationName":"espresso","executableName":"gw.x"},{"content":"&gw_input\n\n ! see http://www.sternheimergw.org for more information.\n\n ! config of the scf run\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n\n ! the grid used for the linear response\n kpt_grid = {{ kgrid.dimensions|join(', ') }}\n qpt_grid = {{ qgrid.dimensions|join(', ') }}\n\n ! number of bands for which the GW correction is calculated\n num_band = 8\n\n ! configuration of W in the convolution\n max_freq_coul = 200\n num_freq_coul = 51\n\n ! configuration for the correlation self energy\n ecut_corr = 6.0\n\n ! configuration for the exchange self energy\n ecut_exch = 15.0\n/\n\n&gw_output\n/\n\nFREQUENCIES\n35\n 0.0 0.0\n 0.0 0.3\n 0.0 0.9\n 0.0 1.8\n 0.0 3.0\n 0.0 4.5\n 0.0 6.3\n 0.0 8.4\n 0.0 10.8\n 0.0 13.5\n 0.0 16.5\n 0.0 19.8\n 0.0 23.4\n 0.0 27.3\n 0.0 31.5\n 0.0 36.0\n 0.0 40.8\n 0.0 45.9\n 0.0 51.3\n 0.0 57.0\n 0.0 63.0\n 0.0 69.3\n 0.0 75.9\n 0.0 82.8\n 0.0 90.0\n 0.0 97.5\n 0.0 105.3\n 0.0 113.4\n 0.0 121.8\n 0.0 130.5\n 0.0 139.5\n 0.0 148.8\n 0.0 158.4\n 0.0 168.3\n 0.0 178.5\n/\n\nK_points\n{{ explicitKPath2PIBA.length }}\n{% for point in explicitKPath2PIBA -%}\n{% for coordinate in point.coordinates %}{{ coordinate }}{% endfor %}\n{% endfor %}\n/\n\n","name":"gw_bands_full_frequency.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QGridFormDataManager"},{"name":"ExplicitKPath2PIBAFormDataManager"}],"applicationName":"espresso","executableName":"gw.x"},{"content":"&inputhp\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n {%- for d in qgrid.dimensions %}\n nq{{ loop.index }} = {{ d }}\n {%- endfor %}\n/\n","name":"hp.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"hp.x"},{"content":"&INPUT\n asr = 'simple'\n flfrc = 'force_constants.fc'\n flfrq = 'frequencies.freq'\n dos = .true.\n fldos = 'phonon_dos.out'\n deltaE = 1.d0\n {% for d in igrid.dimensions -%}\n nk{{loop.index}} = {{d}}\n {% endfor %}\n /\n","name":"matdyn_grid.in","contextProviders":[{"name":"IGridFormDataManager"}],"applicationName":"espresso","executableName":"matdyn.x"},{"content":"&INPUT\n asr = 'simple'\n flfrc ='force_constants.fc'\n flfrq ='frequencies.freq'\n flvec ='normal_modes.out'\n q_in_band_form = .true.\n /\n{{ipath.length}}\n{% for point in ipath -%}\n{% for d in point.coordinates %}{{d}} {% endfor -%}{{point.steps}}\n{% endfor %}\n","name":"matdyn_path.in","contextProviders":[{"name":"IPathFormDataManager"}],"applicationName":"espresso","executableName":"matdyn.x"},{"content":"BEGIN\nBEGIN_PATH_INPUT\n&PATH\n restart_mode = 'from_scratch'\n string_method = 'neb',\n nstep_path = 50,\n ds = 2.D0,\n opt_scheme = \"broyden\",\n num_of_images = {{ 2 + (input.INTERMEDIATE_IMAGES.length || neb.nImages) }},\n k_max = 0.3D0,\n k_min = 0.2D0,\n CI_scheme = \"auto\",\n path_thr = 0.1D0,\n/\nEND_PATH_INPUT\nBEGIN_ENGINE_INPUT\n&CONTROL\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.03\n nspin = 2\n starting_magnetization = 0.5\n/\n&ELECTRONS\n conv_thr = 1.D-8\n mixing_beta = 0.3\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nBEGIN_POSITIONS\nFIRST_IMAGE\nATOMIC_POSITIONS crystal\n{{ input.FIRST_IMAGE }}\n{%- for IMAGE in input.INTERMEDIATE_IMAGES %}\nINTERMEDIATE_IMAGE\nATOMIC_POSITIONS crystal\n{{ IMAGE }}\n{%- endfor %}\nLAST_IMAGE\nATOMIC_POSITIONS crystal\n{{ input.LAST_IMAGE }}\nEND_POSITIONS\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nEND_ENGINE_INPUT\nEND\n","name":"neb.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"NEBFormDataManager"},{"name":"QENEBInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"neb.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-12\n asr = .true.\n search_sym = .false.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n fildyn = 'dyn'\n ldisp = .true.\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_grid.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-12\n asr = .true.\n search_sym = .false.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n/\n{% for point in qpath -%}\n{% for d in point.coordinates %}{{d}} {% endfor %}\n{% endfor %}\n","name":"ph_path.in","contextProviders":[{"name":"QPathFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-12\n asr = .true.\n search_sym = .false.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n/\n0 0 0\n","name":"ph_gamma.in","contextProviders":[],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-18,\n recover = .false.\n start_irr = 0\n last_irr = 0\n ldisp = .true.\n fildyn = 'dyn0'\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_init_qpoints.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-18,\n recover = .true.\n ldisp = .true.\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n fildyn = 'dyn'\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_grid_restart.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPH\n tr2_ph = 1.0d-18\n ldisp = .true.\n {% raw -%}\n start_q = {{MAP_DATA.qpoint}}\n last_q = {{MAP_DATA.qpoint}}\n start_irr = {{MAP_DATA.irr}}\n last_irr= {{MAP_DATA.irr}}\n {%- endraw %}\n recover = .true.\n fildyn = 'dyn'\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_SCRATCH_DIR }}/outdir'{% endraw %}\n {% for d in qgrid.dimensions -%}\n nq{{loop.index}} = {{d}}\n {% endfor %}\n/\n","name":"ph_single_irr_qpt.in","contextProviders":[{"name":"QGridFormDataManager"}],"applicationName":"espresso","executableName":"ph.x"},{"content":"&INPUTPP\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n filplot = 'pp.dat'\n plot_num = 0\n/\n&PLOT\n iflag = 3\n output_format = 5\n fileout ='density.xsf'\n/\n\n","name":"pp_density.in","contextProviders":[],"applicationName":"espresso","executableName":"pp.x"},{"content":"&INPUTPP\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n filplot = 'pp.dat'\n plot_num = 11\n/\n","name":"pp_electrostatic_potential.in","contextProviders":[],"applicationName":"espresso","executableName":"pp.x"},{"content":"&PROJWFC\n prefix = '__prefix__'\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n degauss = 0.01\n deltaE = 0.05\n/\n","name":"projwfc.in","contextProviders":[],"applicationName":"espresso","executableName":"projwfc.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_scf.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n input_dft = 'hse',\n {% for d in qgrid.dimensions -%}\n nqx{{loop.index}} = {{d}}\n {% endfor %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS crystal\n{{ '{{' }} {{ explicitKPath.length }} {% raw %} + KPOINTS|length }} {% endraw %}\n{%- raw %}\n{% for point in KPOINTS -%}\n {% for d in point.coordinates %}{{ \"%14.9f\"|format(d) }} {% endfor -%}{{ point.weight }}\n{% endfor %}\n{% endraw -%}\n{% for point in explicitKPath -%}\n{% for d in point.coordinates %}{{d}} {% endfor -%}0.0000001\n{% endfor %}\n","name":"pw_scf_bands_hse.in","contextProviders":[{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"QGridFormDataManager"},{"name":"ExplicitKPathFormDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n assume_isolated = 'esm'\n esm_bc = '{{ boundaryConditions.type }}'\n fcp_mu = {{ boundaryConditions.targetFermiEnergy }}\n esm_w = {{ boundaryConditions.offset }}\n esm_efield = {{ boundaryConditions.electricField }}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_esm.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"BoundaryConditionsFormDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'relax'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n assume_isolated = 'esm'\n esm_bc = '{{ boundaryConditions.type }}'\n fcp_mu = {{ boundaryConditions.targetFermiEnergy }}\n esm_w = {{ boundaryConditions.offset }}\n esm_efield = {{ boundaryConditions.electricField }}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_esm_relax.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"BoundaryConditionsFormDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% raw %}{{PARAMETER | default('1')}} {{PARAMETER | default('1')}} {{PARAMETER | default('1')}} 0 0 0{% endraw %}\n","name":"pw_scf_kpt_conv.in","contextProviders":[{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'nscf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{input.RESTART_MODE}}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n{%- if subworkflowContext.NO_SYMMETRY_NO_INVERSION %}\n nosym = .true.\n noinv = .true.\n{%- endif %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_nscf.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'relax'\n nstep = 50\n title = ''\n verbosity = 'low'\n restart_mode = 'from_scratch'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_relax.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&CONTROL\n calculation = 'vc-relax'\n title = ''\n verbosity = 'low'\n restart_mode = 'from_scratch'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_vc_relax.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'bands'\n title = ''\n verbosity = 'low'\n restart_mode = '{{input.RESTART_MODE}}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'smearing'\n degauss = 0.005\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS crystal_b\n{{kpath.length}}\n{% for point in kpath -%}\n{% for d in point.coordinates %}{{d}} {% endfor -%}{{point.steps}}\n{% endfor %}\n","name":"pw_bands.in","contextProviders":[{"name":"KPathFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n ecutfock = 100\n occupations = 'smearing'\n degauss = 0.005\n input_dft='hse',\n nqx1 = {% if kgrid.dimensions[0]%2 == 0 %}{{kgrid.dimensions[0]/2}}{% else %}{{(kgrid.dimensions[0]+1)/2}}{% endif %}, nqx2 = {% if kgrid.dimensions[1]%2 == 0 %}{{kgrid.dimensions[1]/2}}{% else %}{{(kgrid.dimensions[1]+1)/2}}{% endif %}, nqx3 = {% if kgrid.dimensions[2]%2 == 0 %}{{kgrid.dimensions[2]/2}}{% else %}{{(kgrid.dimensions[2]+1)/2}}{% endif %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{% if d%2 == 0 %}{{d}} {% else %}{{d+1}} {% endif %}{% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_scf_hse.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nHUBBARD {ortho-atomic}\n{% for row in hubbard_u -%}\nU {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.hubbardUValue }}\n{% endfor -%}\n","name":"pw_scf_dft_u.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardUContextManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nHUBBARD {ortho-atomic}\n{% for row in hubbard_u -%}\nU {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.hubbardUValue }}\n{% endfor -%}\n{% for row in hubbard_v -%}\nV {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.atomicSpecies2 }}-{{ row.atomicOrbital2 }} {{ row.siteIndex }} {{ row.siteIndex2 }} {{ row.hubbardVValue }}\n{% endfor -%}\n","name":"pw_scf_dft_v.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardUContextManager"},{"name":"HubbardVContextManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\nHUBBARD {ortho-atomic}\n{% for row in hubbard_j -%}\n{{ row.paramType }} {{ row.atomicSpecies }}-{{ row.atomicOrbital }} {{ row.value }}\n{% endfor -%}\n","name":"pw_scf_dft_j.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardJContextManager"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"{% if subworkflowContext.MATERIAL_INDEX %}\n{%- set input = input.perMaterial[subworkflowContext.MATERIAL_INDEX] -%}\n{% endif -%}\n&CONTROL\n calculation = 'scf'\n title = ''\n verbosity = 'low'\n restart_mode = '{{ input.RESTART_MODE }}'\n wf_collect = .true.\n tstress = .true.\n tprnfor = .true.\n outdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n wfcdir = {% raw %}'{{ JOB_WORK_DIR }}/outdir'{% endraw %}\n prefix = '__prefix__'\n pseudo_dir = {% raw %}'{{ JOB_WORK_DIR }}/pseudo'{% endraw %}\n/\n&SYSTEM\n ibrav = {{ input.IBRAV }}\n nat = {{ input.NAT }}\n ntyp = {{ input.NTYP }}\n ecutwfc = {{ cutoffs.wavefunction }}\n ecutrho = {{ cutoffs.density }}\n occupations = 'fixed'\n lda_plus_u = .true.\n lda_plus_u_kind = 0\n U_projection_type = 'ortho-atomic'\n {%- for row in hubbard_legacy %}\n Hubbard_U({{ row.atomicSpeciesIndex }}) = {{ row.hubbardUValue }}\n {%- endfor %}\n/\n&ELECTRONS\n diagonalization = 'david'\n diago_david_ndim = 4\n diago_full_acc = .true.\n mixing_beta = 0.3\n startingwfc = 'atomic+random'\n/\n&IONS\n/\n&CELL\n/\nATOMIC_SPECIES\n{{ input.ATOMIC_SPECIES }}\nATOMIC_POSITIONS crystal\n{{ input.ATOMIC_POSITIONS }}\nCELL_PARAMETERS angstrom\n{{ input.CELL_PARAMETERS }}\nK_POINTS automatic\n{% for d in kgrid.dimensions %}{{d}} {% endfor %}{% for s in kgrid.shifts %}{{s}} {% endfor %}\n","name":"pw_scf_dft_u_legacy.in","contextProviders":[{"name":"KGridFormDataManager"},{"name":"QEPWXInputDataManager"},{"name":"PlanewaveCutoffDataManager"},{"name":"HubbardContextManagerLegacy"}],"applicationName":"espresso","executableName":"pw.x"},{"content":"&INPUT\n fildyn = 'dyn'\n zasr = 'simple'\n flfrc = 'force_constants.fc'\n/\n","name":"q2r.in","contextProviders":[],"applicationName":"espresso","executableName":"q2r.x"},{"content":"# ------------------------------------------------------------------------------- #\n# #\n# Example JupyterLab requirements #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. A Python virtual environment is created #\n# - in /scratch/$USER/$JOB_ID (for build: 'Default') #\n# - in /export/share/python/ (for build: 'with-pre-installed-packages') #\n# 3. This list is used to populate a Python virtual environment #\n# 4. JupyterLab is started #\n# #\n# For more information visit: #\n# - https://jupyterlab.readthedocs.io/en/stable/index.html #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# Note: With the JupyterLab build 'with-pre-installed-packages', packages #\n# cannot be added during the notebook runtime. #\n# #\n# ------------------------------------------------------------------------------- #\n\njupyterlab==3.0.3\nnotebook>=6.2.0\nexabyte-api-client>=2020.10.19\nnumpy>=1.17.3\npandas>=1.1.4\nurllib3<2\n","name":"requirements.txt","contextProviders":[],"applicationName":"jupyterLab","executableName":"jupyter"},{"content":" start nwchem\n title \"Test\"\n charge {{ input.CHARGE }}\n geometry units au noautosym\n {{ input.ATOMIC_POSITIONS }}\n end\n basis\n * library {{ input.BASIS }}\n end\n dft\n xc {{ input.FUNCTIONAL }}\n mult {{ input.MULT }}\n end\n task dft energy\n","name":"nwchem_total_energy.inp","contextProviders":[{"name":"NWChemInputDataManager"}],"applicationName":"nwchem","executableName":"nwchem"},{"content":"# ---------------------------------------------------------------- #\n# #\n# Example python script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. runtime directory for this calculation is created #\n# 2. requirements.txt is used to create a virtual environment #\n# 3. virtual environment is activated #\n# 4. python process running this script is started #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\nimport pymatgen as mg\n\nsi = mg.Element(\"Si\")\n\nprint(si.atomic_mass)\n","name":"hello_world.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2\n","name":"requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------------ #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# Please add any packages required for this unit below following #\n# the requirements.txt specification: #\n# https://pip.pypa.io/en/stable/reference/requirements-file-format/ #\n# ------------------------------------------------------------------ #\n","name":"requirements_empty.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# -------------------------------------------------------------------------------\n# This script contains a few helpful commands for basic plotting with matplotlib.\n# The commented out blocks are optional suggestions and included for convenience.\n# -------------------------------------------------------------------------------\nimport matplotlib.pyplot as plt\nimport matplotlib.ticker as ticker\nimport numpy as np\n\n\n# Plot Settings\n# -------------\nfigure_size = (6.4, 4.8) # width, height [inches]\ndpi = 100 # resolution [dots-per-inch]\nfont_size_title = 16 # font size of title\nfont_size_axis = 12 # font size of axis label\nfont_size_tick = 12 # font size of tick label\nfont_size_legend = 14 # font size of legend\nx_axis_label = None # label for x-axis\ny_axis_label = None # label for y-axis\ntitle = None # figure title\nshow_legend = False # whether to show legend\nsave_name = \"plot.pdf\" # output filename (with suffix), e.g. 'plot.pdf'\nx_view_limits = {\"left\": None, \"right\": None} # view limits for x-axis\ny_view_limits = {\"top\": None, \"bottom\": None} # view limits for y-axis\nx_tick_spacing = None # custom tick spacing for x-axis (optional)\ny_tick_spacing = None # custom tick spacing for y-axis (optional)\nx_tick_labels = None # custom tick labels for x-axis (optional)\ny_tick_labels = None # custom tick labels for y-axis (optional)\n\n\n# Figure & axes objects\n# ---------------------\nfig = plt.figure(figsize=figure_size, dpi=dpi)\nax = fig.add_subplot(111)\n\n# Example plot (REPLACE ACCORDINGLY)\n# ------------\nx = np.linspace(0, 7, num=100)\ny = np.sin(x)\nax.plot(x, y, \"g-\", zorder=3)\n\n\n# Help lines\n# ----------\n# ax.axhline(y=0, color=\"0.25\", linewidth=0.6, zorder=1)\n# ax.axvline(x=0, color=\"0.25\", linewidth=0.6, zorder=1)\n\n\n# View limits\n# -----------\nax.set_xlim(**x_view_limits)\nax.set_ylim(**y_view_limits)\n\n\n# Grid lines\n# ----------\n# grid_style = {\n# \"linestyle\" : \"dotted\",\n# \"linewidth\" : 0.6,\n# \"color\" : \"0.25\",\n# }\n# ax.grid(**grid_style)\n\n# Custom tick spacing\n# -------------------\n# ax.xaxis.set_major_locator(ticker.MultipleLocator(x_tick_spacing))\n# ax.yaxis.set_major_locator(ticker.MultipleLocator(y_tick_spacing))\n\n# Custom tick labels\n# ------------------\nif x_tick_labels is not None:\n ax.set_xticklabels(x_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\nif y_tick_labels is not None:\n ax.set_yticklabels(y_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\n\n# Other tick settings\n# -------------------\n# ax.tick_params(axis=\"both\", which=\"major\", labelsize=font_size_tick, direction=\"in\")\n# ax.tick_params(axis=\"x\", which=\"major\", pad=10)\n# ax.tick_params(axis=\"x\", which=\"minor\", bottom=False, top=False)\n\n\n# Axis labels\n# -----------\nif x_axis_label is not None:\n ax.set_xlabel(x_axis_label, size=font_size_axis)\nif y_axis_label is not None:\n ax.set_ylabel(y_axis_label, size=font_size_axis)\n\n# Figure title\n# ------------\nif title is not None:\n ax.set_title(title, fontsize=font_size_title)\n\n# Legend\n# ------\nif show_legend:\n ax.legend(prop={'size': font_size_legend})\n\n# Save figure\n# -----------\nif save_name is not None:\n save_format = save_name.split(\".\")[-1]\n fig.savefig(save_name, format=save_format, bbox_inches=\"tight\")\n","name":"matplotlib_basic.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ---------------------------------------------------------- #\n# #\n# This script extracts q-points and irreducible #\n# representations from Quantum ESPRESSO xml data. #\n# #\n# Expects control_ph.xml and patterns.?.xml files to exist #\n# #\n# ---------------------------------------------------------- #\nfrom __future__ import print_function\nimport json\nfrom xml.dom import minidom\n\n{# JOB_WORK_DIR will be initialized at runtime => avoid substituion below #}\n{%- raw -%}\nCONTROL_PH_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/control_ph.xml\"\nPATTERNS_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/patterns.{}.xml\"\n{%- endraw -%}\n\n# get integer content of an xml tag in a document\ndef get_int_by_tag_name(doc, tag_name):\n element = doc.getElementsByTagName(tag_name)\n return int(element[0].firstChild.nodeValue)\n\nvalues = []\n\n# get number of q-points and cycle through them\nxmldoc = minidom.parse(CONTROL_PH_FILENAME)\nnumber_of_qpoints = get_int_by_tag_name(xmldoc, \"NUMBER_OF_Q_POINTS\")\n\nfor i in range(number_of_qpoints):\n # get number of irreducible representations per qpoint\n xmldoc = minidom.parse(PATTERNS_FILENAME.format(i+1))\n number_of_irr_per_qpoint = get_int_by_tag_name(xmldoc, \"NUMBER_IRR_REP\")\n # add each distinct combination of qpoint and irr as a separate entry\n for j in range(number_of_irr_per_qpoint):\n values.append({\n \"qpoint\": i + 1,\n \"irr\": j + 1\n })\n\n# store final values in standard output (STDOUT)\nprint(json.dumps(values, indent=4))\n","name":"espresso_xml_get_qpt_irr.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"import re\nimport json\n\ndouble_regex = r'[-+]?\\d*\\.\\d+(?:[eE][-+]?\\d+)?'\nregex = r\"\\s+k\\(\\s+\\d*\\)\\s+=\\s+\\(\\s+({0})\\s+({0})\\s+({0})\\),\\s+wk\\s+=\\s+({0}).+?\\n\".format(double_regex)\n\nwith open(\"pw_scf.out\") as f:\n text = f.read()\n\npattern = re.compile(regex, re.I | re.MULTILINE)\nmatch = pattern.findall(text[text.rfind(\" cryst. coord.\"):])\nkpoints = [{\"coordinates\": list(map(float, m[:3])), \"weight\": float(m[3])} for m in match]\nprint(json.dumps({\"name\": \"KPOINTS\", \"value\": kpoints, \"scope\": \"global\"}, indent=4))\n","name":"espresso_extract_kpoints.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------- #\n# This script aims to determine extrema for a given array. #\n# Please adjust the parameters according to your data. #\n# Note: This template expects the array to be defined in the #\n# context as 'array_from_context' (see details below). #\n# ----------------------------------------------------------- #\nimport numpy as np\nfrom scipy.signal import find_peaks\nimport json\nfrom munch import Munch\n\n# Data From Context\n# -----------------\n# The array 'array_from_context' is a 1D list (float or int) that has to be defined in\n# a preceding assignment unit in order to be extracted from the context.\n# Example: [0.0, 1.0, 4.0, 3.0]\n# Upon rendering the following Jinja template the extracted array will be inserted.\n{% raw %}Y = np.array({{array_from_context}}){% endraw %}\n\n# Settings\n# --------\nprominence = 0.3 # required prominence in the unit of the data array\n\n# Find Extrema\n# ------------\nmax_indices, _ = find_peaks(Y, prominence=prominence)\nmin_indices, _ = find_peaks(-1 * Y, prominence=prominence)\n\nresult = {\n \"maxima\": Y[max_indices].tolist(),\n \"minima\": Y[min_indices].tolist(),\n}\n\n# print final values to standard output (STDOUT),\n# so that they can be read by a subsequent assignment unit (using value=STDOUT)\nprint(json.dumps(result, indent=4))\n","name":"find_extrema.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# ----------------------------------------------------------------- #\n\n\nmunch==2.5.0\nnumpy>=1.19.5\nscipy>=1.5.4\nmatplotlib>=3.0.0\n","name":"processing_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# PythonML Package Requirements for use on the Exabyte.io Platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscikit-learn==0.24.1\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2;python_version>=\"3.6\"\n","name":"pyml_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# General settings for PythonML jobs on the Exabyte.io Platform #\n# #\n# This file generally shouldn't be modified directly by users. #\n# The \"datafile\" and \"is_workflow_running_to_predict\" variables #\n# are defined in the head subworkflow, and are templated into #\n# this file. This helps facilitate the workflow's behavior #\n# differing whether it is in a \"train\" or \"predict\" mode. #\n# #\n# Also in this file is the \"Context\" object, which helps maintain #\n# certain Python objects between workflow units, and between #\n# predict runs. #\n# #\n# Whenever a python object needs to be stored for subsequent runs #\n# (such as in the case of a trained model), context.save() can be #\n# called to save it. The object can then be loaded again by using #\n# context.load(). #\n# ----------------------------------------------------------------- #\n\n\nimport pickle, os\n\n# ==================================================\n# Variables modified in the Important Settings menu\n# ==================================================\n# Variables in this section can (and oftentimes need to) be modified by the user in the \"Important Settings\" tab\n# of a workflow.\n\n# Target_column_name is used during training to identify the variable the model is traing to predict.\n# For example, consider a CSV containing three columns, \"Y\", \"X1\", and \"X2\". If the goal is to train a model\n# that will predict the value of \"Y,\" then target_column_name would be set to \"Y\"\ntarget_column_name = \"{{ mlSettings.target_column_name }}\"\n\n# The type of ML problem being performed. Can be either \"regression\", \"classification,\" or \"clustering.\"\nproblem_category = \"{{ mlSettings.problem_category }}\"\n\n# =============================\n# Non user-modifiable variables\n# =============================\n# Variables in this section generally do not need to be modified.\n\n# The problem category, regression or classification or clustering. In regression, the target (predicted) variable\n# is continues. In classification, it is categorical. In clustering, there is no target - a set of labels is\n# automatically generated.\nis_regression = is_classification = is_clustering = False\nif problem_category.lower() == \"regression\":\n is_regression = True\nelif problem_category.lower() == \"classification\":\n is_classification = True\nelif problem_category.lower() == \"clustering\":\n is_clustering = True\nelse:\n raise ValueError(\n \"Variable 'problem_category' must be either 'regression', 'classification', or 'clustering'. Check settings.py\")\n\n# The variables \"is_workflow_running_to_predict\" and \"is_workflow_running_to_train\" are used to control whether\n# the workflow is in a \"training\" mode or a \"prediction\" mode. The \"IS_WORKFLOW_RUNNING_TO_PREDICT\" variable is set by\n# an assignment unit in the \"Set Up the Job\" subworkflow that executes at the start of the job. It is automatically\n# changed when the predict workflow is generated, so users should not need to modify this variable.\nis_workflow_running_to_predict = {% raw %}{{IS_WORKFLOW_RUNNING_TO_PREDICT}}{% endraw %}\nis_workflow_running_to_train = not is_workflow_running_to_predict\n\n# Sets the datafile variable. The \"datafile\" is the data that will be read in, and will be used by subsequent\n# workflow units for either training or prediction, depending on the workflow mode.\nif is_workflow_running_to_predict:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\nelse:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\n\n# The \"Context\" class allows for data to be saved and loaded between units, and between train and predict runs.\n# Variables which have been saved using the \"Save\" method are written to disk, and the predict workflow is automatically\n# configured to obtain these files when it starts.\n#\n# IMPORTANT NOTE: Do *not* adjust the value of \"context_dir_pathname\" in the Context object. If the value is changed, then\n# files will not be correctly copied into the generated predict workflow. This will cause the predict workflow to be\n# generated in a broken state, and it will not be able to make any predictions.\nclass Context(object):\n \"\"\"\n Saves and loads objects from the disk, useful for preserving data between workflow units\n\n Attributes:\n context_paths (dict): Dictionary of the format {variable_name: path}, that governs where\n pickle saves files.\n\n Methods:\n save: Used to save objects to the context directory\n load: Used to load objects from the context directory\n \"\"\"\n\n def __init__(self, context_file_basename=\"workflow_context_file_mapping\"):\n \"\"\"\n Constructor for Context objects\n\n Args:\n context_file_basename (str): Name of the file to store context paths in\n \"\"\"\n\n # Warning: DO NOT modify the context_dir_pathname variable below\n # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n context_dir_pathname = \"{% raw %}{{ CONTEXT_DIR_RELATIVE_PATH }}{% endraw %}\"\n # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n self._context_dir_pathname = context_dir_pathname\n self._context_file = os.path.join(context_dir_pathname, context_file_basename)\n\n # Make context dir if it does not exist\n if not os.path.exists(context_dir_pathname):\n os.makedirs(context_dir_pathname)\n\n # Read in the context sources dictionary, if it exists\n if os.path.exists(self._context_file):\n with open(self._context_file, \"rb\") as file_handle:\n self.context_paths: dict = pickle.load(file_handle)\n else:\n # Items is a dictionary of {varname: path}\n self.context_paths = {}\n\n def __enter__(self):\n return self\n\n def __exit__(self, exc_type, exc_value, traceback):\n self._update_context()\n\n def __contains__(self, item):\n return item in self.context_paths\n\n def _update_context(self):\n with open(self._context_file, \"wb\") as file_handle:\n pickle.dump(self.context_paths, file_handle)\n\n def load(self, name: str):\n \"\"\"\n Returns a contextd object\n\n Args:\n name (str): The name in self.context_paths of the object\n \"\"\"\n path = self.context_paths[name]\n with open(path, \"rb\") as file_handle:\n obj = pickle.load(file_handle)\n return obj\n\n def save(self, obj: object, name: str):\n \"\"\"\n Saves an object to disk using pickle\n\n Args:\n name (str): Friendly name for the object, used for lookup in load() method\n obj (object): Object to store on disk\n \"\"\"\n path = os.path.join(self._context_dir_pathname, f\"{name}.pkl\")\n self.context_paths[name] = path\n with open(path, \"wb\") as file_handle:\n pickle.dump(obj, file_handle)\n self._update_context()\n\n# Generate a context object, so that the \"with settings.context\" can be used by other units in this workflow.\ncontext = Context()\n\nis_using_train_test_split = \"is_using_train_test_split\" in context and (context.load(\"is_using_train_test_split\"))\n\n# Create a Class for a DummyScaler()\nclass DummyScaler:\n \"\"\"\n This class is a 'DummyScaler' which trivially acts on data by returning it unchanged.\n \"\"\"\n\n def fit(self, X):\n return self\n\n def transform(self, X):\n return X\n\n def fit_transform(self, X):\n return X\n\n def inverse_transform(self, X):\n return X\n\nif 'target_scaler' not in context:\n context.save(DummyScaler(), 'target_scaler')\n","name":"pyml_settings.py","contextProviders":[{"name":"MLSettingsDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Custom workflow unit template for the Exabyte.io platform #\n# #\n# This file imports a set of workflow-specific context variables #\n# from settings.py. It then uses a context manager to save and #\n# load Python objects. When saved, these objects can then be #\n# loaded either later in the same workflow, or by subsequent #\n# predict jobs. #\n# #\n# Any pickle-able Python object can be saved using #\n# settings.context. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport settings\n\n# The context manager exists to facilitate\n# saving and loading objects across Python units within a workflow.\n\n# To load an object, simply do to \\`context.load(\"name-of-the-saved-object\")\\`\n# To save an object, simply do \\`context.save(\"name-for-the-object\", object_here)\\`\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Do some transformations to the data here\n\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n descriptors = context.load(\"descriptors\")\n\n # Do some predictions or transformation to the data here\n","name":"pyml_custom.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to read in data for the ML workflow. #\n# #\n# Also showcased here is the concept of branching based on #\n# whether the workflow is in \"train\" or \"predict\" mode. #\n# #\n# If the workflow is in \"training\" mode, it will read in the data #\n# before converting it to a Numpy array and save it for use #\n# later. During training, we already have values for the output, #\n# and this gets saved to \"target.\" #\n# #\n# Finally, whether the workflow is in training or predict mode, #\n# it will always read in a set of descriptors from a datafile #\n# defined in settings.py #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport sklearn.preprocessing\nimport settings\n\nwith settings.context as context:\n data = pandas.read_csv(settings.datafile)\n\n # Train\n # By default, we don't do train/test splitting: the train and test represent the same dataset at first.\n # Other units (such as a train/test splitter) down the line can adjust this as-needed.\n if settings.is_workflow_running_to_train:\n\n # Handle the case where we are clustering\n if settings.is_clustering:\n target = data.to_numpy()[:, 0] # Just get the first column, it's not going to get used anyway\n else:\n target = data.pop(settings.target_column_name).to_numpy()\n\n # Handle the case where we are classifying. In this case, we must convert any labels provided to be categorical.\n # Specifically, labels are encoded with values between 0 and (N_Classes - 1)\n if settings.is_classification:\n label_encoder = sklearn.preprocessing.LabelEncoder()\n target = label_encoder.fit_transform(target)\n context.save(label_encoder, \"label_encoder\")\n\n target = target.reshape(-1, 1) # Reshape array from a row vector into a column vector\n\n context.save(target, \"train_target\")\n context.save(target, \"test_target\")\n\n descriptors = data.to_numpy()\n\n context.save(descriptors, \"train_descriptors\")\n context.save(descriptors, \"test_descriptors\")\n\n else:\n descriptors = data.to_numpy()\n context.save(descriptors, \"descriptors\")\n","name":"data_input_read_csv_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to perform a train/test split #\n# #\n# Splits the dataset into a training and testing set. The #\n# variable `percent_held_as_test` controls how much of the #\n# input dataset is removed for use as a testing set. By default, #\n# this unit puts 20% of the dataset into the testing set, and #\n# places the remaining 80% into the training set. #\n# #\n# Does nothing in the case of predictions. #\n# #\n# ----------------------------------------------------------------- #\n\nimport sklearn.model_selection\nimport numpy as np\nimport settings\n\n# `percent_held_as_test` is the amount of the dataset held out as the testing set. If it is set to 0.2,\n# then 20% of the dataset is held out as a testing set. The remaining 80% is the training set.\npercent_held_as_test = {{ mlTrainTestSplit.fraction_held_as_test_set }}\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Load training data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n\n # Combine datasets to facilitate train/test split\n\n # Do train/test split\n train_descriptors, test_descriptors, train_target, test_target = sklearn.model_selection.train_test_split(\n train_descriptors, train_target, test_size=percent_held_as_test)\n\n # Set the flag for using a train/test split\n context.save(True, \"is_using_train_test_split\")\n\n # Save training data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"data_input_train_test_split_sklearn.py","contextProviders":[{"name":"MLTrainTestSplitDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn MinMax Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it is on interval #\n# [0,1]. It then saves the data for use further down #\n# the road in the workflow, for use in un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the min and max of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor MinMax Scaler\n scaler = sklearn.preprocessing.MinMaxScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_min_max_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Duplicates workflow unit #\n# #\n# This workflow unit drops all duplicate rows, if it is running #\n# in the \"train\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop duplicates from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n df = df.drop_duplicates()\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop duplicates from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.drop_duplicates()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_duplicates_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Missing Workflow Unit #\n# #\n# This workflow unit allows missing rows and/or columns to be #\n# dropped from the dataset by configuring the `to_drop` #\n# parameter. #\n# #\n# Valid values for `to_drop`: #\n# - \"rows\": rows with missing values will be removed #\n# - \"columns\": columns with missing values will be removed #\n# - \"both\": rows and columns with missing values will be removed #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\n# `to_drop` can either be \"rows\" or \"columns\"\n# If it is set to \"rows\" (by default), then all rows with missing values will be dropped.\n# If it is set to \"columns\", then all columns with missing values will be dropped.\n# If it is set to \"both\", then all rows and columns with missing values will be dropped.\nto_drop = \"rows\"\n\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop missing from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n\n directions = {\n \"rows\": (\"index\",),\n \"columns\": (\"columns\",),\n \"both\": (\"index\", \"columns\"),\n }[to_drop]\n for direction in directions:\n df = df.dropna(direction)\n\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop missing from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.dropna()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_missing_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn Standard Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it a mean of 0 and #\n# a standard deviation of 1. It then saves the data for use #\n# further down the road in the workflow, for use in #\n# un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the mean and variance of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor Scaler\n scaler = sklearn.preprocessing.StandardScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_standardization_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge-regression model in Scikit-Learn. #\n# Alpha is taken from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.AdaBoostRegressor(\n n_estimators=50,\n learning_rate=1,\n loss=\"linear\",\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"adaboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"adaboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_adaboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a bagged trees regression model with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.BaggingRegressor(\n n_estimators=10,\n max_samples=1.0,\n max_features=1.0,\n bootstrap=True,\n bootstrap_features=False,\n oob_score=False,\n verbose=0,\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"bagged_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"bagged_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_bagged_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for gradient-boosted tree regression with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. Note: In the gradient- #\n# boosted trees ensemble used, the weak learners used as #\n# estimators cannot be tuned with the same level of fidelity #\n# allowed in the adaptive-boosted trees ensemble. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.GradientBoostingRegressor(\n loss=\"ls\",\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion=\"friedman_mse\",\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n max_features=None,\n alpha=0.9,\n verbose=0,\n max_leaf_nodes=None,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees regression #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBRegressor(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='reg:squarederror',\n eval_metric='rmse')\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_regression\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_regression\")\n\n # Make some predictions and unscale\n predictions = model.predict(descriptors)\n predictions = predictions.reshape(-1, 1)\n target_scaler = context.load(\"target_scaler\")\n\n predictions = target_scaler.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\")\n","name":"model_extreme_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for k-means clustering. #\n# #\n# In k-means clustering, the labels are not provided ahead of #\n# time. Instead, one supplies the number of groups the #\n# algorithm should split the dataset into. Here, we set our #\n# own default of 4 groups (fewer than sklearn's default of 8). #\n# Otherwise, the default parameters of the clustering method #\n# are the same as in sklearn. #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.cluster\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Initialize the Model\n model = sklearn.cluster.KMeans(\n n_clusters=4,\n init=\"k-means++\",\n n_init=10,\n max_iter=300,\n tol=0.0001,\n copy_x=True,\n algorithm=\"auto\",\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors)\n context.save(model, \"k_means\")\n train_labels = model.predict(train_descriptors)\n test_labels = model.predict(test_descriptors)\n\n context.save(train_labels, \"train_labels\")\n context.save(test_labels, \"test_labels\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"k_means\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_k_means_clustering_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a kernelized ridge-regression model with #\n# Scikit-Learn. Model parameters are derived from Scikit- #\n# Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.kernel_ridge\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.kernel_ridge.KernelRidge(\n alpha=1.0,\n kernel=\"linear\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"kernel_ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"kernel_ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_kernel_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a LASSO-regression model with Scikit- #\n# Learn. Model parameters derived from Scikit-Learn's #\n# Defaults. Alpha has been lowered from the default of 1.0, to #\n# 0.1. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Lasso(\n alpha=0.1,\n fit_intercept=True,\n normalize=False,\n precompute=False,\n tol=0.0001,\n positive=True,\n selection=\"cyclic\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"LASSO\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"LASSO\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_lasso_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit to train a simple feedforward neural network #\n# model on a regression problem using scikit-learn. In this #\n# template, we use the default values for hidden_layer_sizes, #\n# activation, solver, and learning rate. Other parameters are #\n# available (consult the sklearn docs), but in this case, we #\n# only include those relevant to the Adam optimizer. Sklearn #\n# Docs: Sklearn docs:http://scikit-learn.org/stable/modules/ge #\n# nerated/sklearn.neural_network.MLPRegressor.html #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.neural_network\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.neural_network.MLPRegressor(\n hidden_layer_sizes=(100,),\n activation=\"relu\",\n solver=\"adam\",\n max_iter=300,\n early_stopping=False,\n validation_fraction=0.1,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"multilayer_perceptron\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"multilayer_perceptron\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_mlp_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a random forest classification model with #\n# Scikit-Learn. Parameters derived from Scikit-Learn's #\n# defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. When #\n# the workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a filee named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestClassifier(\n n_estimators=100,\n criterion=\"gini\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n oob_score=False,\n verbose=0,\n class_weight=None,\n ccp_alpha=0.0,\n max_samples=None,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target, test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for a gradient boosted classification model with #\n# Scikit-Learn. Parameters derived from sklearn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = sklearn.ensemble.GradientBoostingClassifier(loss='deviance',\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion='friedman_mse',\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n min_impurity_split=None,\n init=None,\n random_state=None,\n max_features=None,\n verbose=0,\n max_leaf_nodes=None,\n warm_start=False,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees classification #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBClassifier(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='binary:logistic',\n eval_metric='logloss',\n use_label_encoder=False)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_extreme_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow for a random forest regression model with Scikit- #\n# Learn. Parameters are derived from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestRegressor(\n n_estimators=100,\n criterion=\"mse\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n max_samples=None,\n oob_score=False,\n ccp_alpha=0.0,\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge regression model with Scikit- #\n# Learn. Alpha is taken from Scikit-Learn's default #\n# parameters. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Ridge(\n alpha=1.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Parity plot generation unit #\n# #\n# This unit generates a parity plot based on the known values #\n# in the training data, and the predicted values generated #\n# using the training data. #\n# #\n# Because this metric compares predictions versus a ground truth, #\n# it doesn't make sense to generate the plot when a predict #\n# workflow is being run (because in that case, we generally don't #\n# know the ground truth for the values being predicted). Hence, #\n# this unit does nothing if the workflow is in \"predict\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_predictions = context.load(\"train_predictions\")\n test_target = context.load(\"test_target\")\n test_predictions = context.load(\"test_predictions\")\n\n # Un-transform the data\n target_scaler = context.load(\"target_scaler\")\n train_target = target_scaler.inverse_transform(train_target)\n train_predictions = target_scaler.inverse_transform(train_predictions)\n test_target = target_scaler.inverse_transform(test_target)\n test_predictions = target_scaler.inverse_transform(test_predictions)\n\n # Plot the data\n plt.scatter(train_target, train_predictions, c=\"#203d78\", label=\"Training Set\")\n if settings.is_using_train_test_split:\n plt.scatter(test_target, test_predictions, c=\"#67ac5b\", label=\"Testing Set\")\n plt.xlabel(\"Actual Value\")\n plt.ylabel(\"Predicted Value\")\n\n # Scale the plot\n target_range = (min(min(train_target), min(test_target)),\n max(max(train_target), max(test_target)))\n predictions_range = (min(min(train_predictions), min(test_predictions)),\n max(max(train_predictions), max(test_predictions)))\n\n limits = (min(min(target_range), min(target_range)),\n max(max(predictions_range), max(predictions_range)))\n plt.xlim = (limits[0], limits[1])\n plt.ylim = (limits[0], limits[1])\n\n # Draw a parity line, as a guide to the eye\n plt.plot((limits[0], limits[1]), (limits[0], limits[1]), c=\"black\", linestyle=\"dotted\", label=\"Parity\")\n plt.legend()\n\n # Save the figure\n plt.tight_layout()\n plt.savefig(\"my_parity_plot.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_parity_plot_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Cluster Visualization #\n# #\n# This unit takes an N-dimensional feature space, and uses #\n# Principal-component Analysis (PCA) to project into a 2D space #\n# to facilitate plotting on a scatter plot. #\n# #\n# The 2D space we project into are the first two principal #\n# components identified in PCA, which are the two vectors with #\n# the highest variance. #\n# #\n# Wikipedia Article on PCA: #\n# https://en.wikipedia.org/wiki/Principal_component_analysis #\n# #\n# We then plot the labels assigned to the train an test set, #\n# and color by class. #\n# #\n# ----------------------------------------------------------------- #\n\nimport pandas as pd\nimport matplotlib.cm\nimport matplotlib.lines\nimport matplotlib.pyplot as plt\nimport sklearn.decomposition\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_labels = context.load(\"train_labels\")\n train_descriptors = context.load(\"train_descriptors\")\n test_labels = context.load(\"test_labels\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Unscale the descriptors\n descriptor_scaler = context.load(\"descriptor_scaler\")\n train_descriptors = descriptor_scaler.inverse_transform(train_descriptors)\n test_descriptors = descriptor_scaler.inverse_transform(test_descriptors)\n\n # We need at least 2 dimensions, exit if the dataset is 1D\n if train_descriptors.ndim < 2:\n raise ValueError(\"The train descriptors do not have enough dimensions to be plot in 2D\")\n\n # The data could be multidimensional. Let's do some PCA to get things into 2 dimensions.\n pca = sklearn.decomposition.PCA(n_components=2)\n train_descriptors = pca.fit_transform(train_descriptors)\n test_descriptors = pca.transform(test_descriptors)\n xlabel = \"Principle Component 1\"\n ylabel = \"Principle Component 2\"\n\n # Determine the labels we're going to be using, and generate their colors\n labels = set(train_labels)\n colors = {}\n for count, label in enumerate(labels):\n cm = matplotlib.cm.get_cmap('jet', len(labels))\n color = cm(count / len(labels))\n colors[label] = color\n train_colors = [colors[label] for label in train_labels]\n test_colors = [colors[label] for label in test_labels]\n\n # Train / Test Split Visualization\n plt.title(\"Train Test Split Visualization\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=\"#33548c\", marker=\"o\", label=\"Training Set\")\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=\"#F0B332\", marker=\"o\", label=\"Testing Set\")\n xmin, xmax, ymin, ymax = plt.axis()\n plt.legend()\n plt.tight_layout()\n plt.savefig(\"train_test_split.png\", dpi=600)\n plt.close()\n\n def clusters_legend(cluster_colors):\n \"\"\"\n Helper function that creates a legend, given the coloration by clusters.\n Args:\n cluster_colors: A dictionary of the form {cluster_number : color_value}\n\n Returns:\n None; just creates the legend and puts it on the plot\n \"\"\"\n legend_symbols = []\n for group, color in cluster_colors.items():\n label = f\"Cluster {group}\"\n legend_symbols.append(matplotlib.lines.Line2D([], [], color=color, marker=\"o\",\n linewidth=0, label=label))\n plt.legend(handles=legend_symbols)\n\n # Training Set Clusters\n plt.title(\"Training Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=train_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"train_clusters.png\", dpi=600)\n plt.close()\n\n # Testing Set Clusters\n plt.title(\"Testing Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=test_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"test_clusters.png\", dpi=600)\n plt.close()\n\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_pca_2d_clusters_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# ROC Curve Generator #\n# #\n# Computes and displays the Receiver Operating Characteristic #\n# (ROC) curve. This is restricted to binary classification tasks. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\nimport matplotlib.collections\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n test_target = context.load(\"test_target\").flatten()\n # Slice the first column because Sklearn's ROC curve prefers probabilities for the positive class\n test_probabilities = context.load(\"test_probabilities\")[:, 1]\n\n # Exit if there's more than one label in the predictions\n if len(set(test_target)) > 2:\n exit()\n\n # ROC curve function in sklearn prefers the positive class\n false_positive_rate, true_positive_rate, thresholds = sklearn.metrics.roc_curve(test_target, test_probabilities,\n pos_label=1)\n thresholds[0] -= 1 # Sklearn arbitrarily adds 1 to the first threshold\n roc_auc = np.round(sklearn.metrics.auc(false_positive_rate, true_positive_rate), 3)\n\n # Plot the curve\n fig, ax = plt.subplots()\n points = np.array([false_positive_rate, true_positive_rate]).T.reshape(-1, 1, 2)\n segments = np.concatenate([points[:-1], points[1:]], axis=1)\n norm = plt.Normalize(thresholds.min(), thresholds.max())\n lc = matplotlib.collections.LineCollection(segments, cmap='jet', norm=norm, linewidths=2)\n lc.set_array(thresholds)\n line = ax.add_collection(lc)\n fig.colorbar(line, ax=ax).set_label('Threshold')\n\n # Padding to ensure we see the line\n ax.margins(0.01)\n\n plt.title(f\"ROC curve, AUC={roc_auc}\")\n plt.xlabel(\"False Positive Rate\")\n plt.ylabel(\"True Positive Rate\")\n plt.tight_layout()\n plt.savefig(\"my_roc_curve.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_roc_curve_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"#!/bin/bash\n# ---------------------------------------------------------------- #\n# #\n# Example shell script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. shebang line is read from the first line above #\n# 2. based on shebang one of the shell types is selected: #\n# - /bin/bash #\n# - /bin/csh #\n# - /bin/tclsh #\n# - /bin/tcsh #\n# - /bin/zsh #\n# 3. runtime directory for this calculation is created #\n# 4. the content of the script is executed #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\necho \"Hello world!\"\n","name":"hello_world.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ---------------------------------------------------------------- #\n# #\n# Example job submission script for Exabyte.io platform #\n# #\n# Shows resource manager directives for: #\n# #\n# 1. the name of the job (-N) #\n# 2. the number of nodes to be used (-l nodes=) #\n# 3. the number of processors per node (-l ppn=) #\n# 4. the walltime in dd:hh:mm:ss format (-l walltime=) #\n# 5. queue (-q) D, OR, OF, SR, SF #\n# 6. merging standard output and error (-j oe) #\n# 7. email about job abort, begin, end (-m abe) #\n# 8. email address to use (-M) #\n# #\n# For more information visit https://docs.exabyte.io/cli/jobs #\n# ---------------------------------------------------------------- #\n\n#PBS -N ESPRESSO-TEST\n#PBS -j oe\n#PBS -l nodes=1\n#PBS -l ppn=1\n#PBS -l walltime=00:00:10:00\n#PBS -q D\n#PBS -m abe\n#PBS -M info@exabyte.io\n\n# load module\nmodule add espresso/540-i-174-impi-044\n\n# go to the job working directory\ncd $PBS_O_WORKDIR\n\n# create input file\ncat > pw.in < pw.out\n","name":"job_espresso_pw_scf.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\nmkdir -p {{ JOB_SCRATCH_DIR }}/outdir/_ph0\ncd {{ JOB_SCRATCH_DIR }}/outdir\ncp -r {{ JOB_WORK_DIR }}/../outdir/__prefix__.* .\n{%- endraw -%}\n","name":"espresso_link_outdir_save.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\ncp {{ JOB_SCRATCH_DIR }}/outdir/_ph0/__prefix__.phsave/dynmat* {{ JOB_WORK_DIR }}/../outdir/_ph0/__prefix__.phsave\n{%- endraw -%}\n","name":"espresso_collect_dynmat.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ------------------------------------------------------------------ #\n# This script prepares necessary directories to run VASP NEB\n# calculation. It puts initial POSCAR into directory 00, final into 0N\n# and intermediate images in 01 to 0(N-1). It is assumed that SCF\n# calculations for initial and final structures are already done in\n# previous subworkflows and their standard outputs are written into\n# \"vasp_neb_initial.out\" and \"vasp_neb_final.out\" files respectively.\n# These outputs are here copied into initial (00) and final (0N)\n# directories to calculate the reaction energy profile.\n# ------------------------------------------------------------------ #\n\n{% raw -%}\ncd {{ JOB_WORK_DIR }}\n{%- endraw %}\n\n# Prepare First Directory\nmkdir -p 00\ncat > 00/POSCAR < 0{{ input.INTERMEDIATE_IMAGES.length + 1 }}/POSCAR < 0{{ loop.index }}/POSCAR <=6.2.0\nexabyte-api-client>=2020.10.19\nnumpy>=1.17.3\npandas>=1.1.4\nurllib3<2\n","name":"requirements.txt","contextProviders":[],"applicationName":"jupyterLab","executableName":"jupyter"},{"content":" start nwchem\n title \"Test\"\n charge {{ input.CHARGE }}\n geometry units au noautosym\n {{ input.ATOMIC_POSITIONS }}\n end\n basis\n * library {{ input.BASIS }}\n end\n dft\n xc {{ input.FUNCTIONAL }}\n mult {{ input.MULT }}\n end\n task dft energy\n","name":"nwchem_total_energy.inp","contextProviders":[{"name":"NWChemInputDataManager"}],"applicationName":"nwchem","executableName":"nwchem"},{"content":"# ---------------------------------------------------------------- #\n# #\n# Example python script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. runtime directory for this calculation is created #\n# 2. requirements.txt is used to create a virtual environment #\n# 3. virtual environment is activated #\n# 4. python process running this script is started #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\nimport pymatgen as mg\n\nsi = mg.Element(\"Si\")\n\nprint(si.atomic_mass)\n","name":"hello_world.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2\n","name":"requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------------ #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# Please add any packages required for this unit below following #\n# the requirements.txt specification: #\n# https://pip.pypa.io/en/stable/reference/requirements-file-format/ #\n# ------------------------------------------------------------------ #\n","name":"requirements_empty.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# -------------------------------------------------------------------------------\n# This script contains a few helpful commands for basic plotting with matplotlib.\n# The commented out blocks are optional suggestions and included for convenience.\n# -------------------------------------------------------------------------------\nimport matplotlib.pyplot as plt\nimport matplotlib.ticker as ticker\nimport numpy as np\n\n\n# Plot Settings\n# -------------\nfigure_size = (6.4, 4.8) # width, height [inches]\ndpi = 100 # resolution [dots-per-inch]\nfont_size_title = 16 # font size of title\nfont_size_axis = 12 # font size of axis label\nfont_size_tick = 12 # font size of tick label\nfont_size_legend = 14 # font size of legend\nx_axis_label = None # label for x-axis\ny_axis_label = None # label for y-axis\ntitle = None # figure title\nshow_legend = False # whether to show legend\nsave_name = \"plot.pdf\" # output filename (with suffix), e.g. 'plot.pdf'\nx_view_limits = {\"left\": None, \"right\": None} # view limits for x-axis\ny_view_limits = {\"top\": None, \"bottom\": None} # view limits for y-axis\nx_tick_spacing = None # custom tick spacing for x-axis (optional)\ny_tick_spacing = None # custom tick spacing for y-axis (optional)\nx_tick_labels = None # custom tick labels for x-axis (optional)\ny_tick_labels = None # custom tick labels for y-axis (optional)\n\n\n# Figure & axes objects\n# ---------------------\nfig = plt.figure(figsize=figure_size, dpi=dpi)\nax = fig.add_subplot(111)\n\n# Example plot (REPLACE ACCORDINGLY)\n# ------------\nx = np.linspace(0, 7, num=100)\ny = np.sin(x)\nax.plot(x, y, \"g-\", zorder=3)\n\n\n# Help lines\n# ----------\n# ax.axhline(y=0, color=\"0.25\", linewidth=0.6, zorder=1)\n# ax.axvline(x=0, color=\"0.25\", linewidth=0.6, zorder=1)\n\n\n# View limits\n# -----------\nax.set_xlim(**x_view_limits)\nax.set_ylim(**y_view_limits)\n\n\n# Grid lines\n# ----------\n# grid_style = {\n# \"linestyle\" : \"dotted\",\n# \"linewidth\" : 0.6,\n# \"color\" : \"0.25\",\n# }\n# ax.grid(**grid_style)\n\n# Custom tick spacing\n# -------------------\n# ax.xaxis.set_major_locator(ticker.MultipleLocator(x_tick_spacing))\n# ax.yaxis.set_major_locator(ticker.MultipleLocator(y_tick_spacing))\n\n# Custom tick labels\n# ------------------\nif x_tick_labels is not None:\n ax.set_xticklabels(x_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\nif y_tick_labels is not None:\n ax.set_yticklabels(y_tick_labels, fontdict={\"fontsize\": font_size_tick}, minor=False)\n\n# Other tick settings\n# -------------------\n# ax.tick_params(axis=\"both\", which=\"major\", labelsize=font_size_tick, direction=\"in\")\n# ax.tick_params(axis=\"x\", which=\"major\", pad=10)\n# ax.tick_params(axis=\"x\", which=\"minor\", bottom=False, top=False)\n\n\n# Axis labels\n# -----------\nif x_axis_label is not None:\n ax.set_xlabel(x_axis_label, size=font_size_axis)\nif y_axis_label is not None:\n ax.set_ylabel(y_axis_label, size=font_size_axis)\n\n# Figure title\n# ------------\nif title is not None:\n ax.set_title(title, fontsize=font_size_title)\n\n# Legend\n# ------\nif show_legend:\n ax.legend(prop={'size': font_size_legend})\n\n# Save figure\n# -----------\nif save_name is not None:\n save_format = save_name.split(\".\")[-1]\n fig.savefig(save_name, format=save_format, bbox_inches=\"tight\")\n","name":"matplotlib_basic.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ---------------------------------------------------------- #\n# #\n# This script extracts q-points and irreducible #\n# representations from Quantum ESPRESSO xml data. #\n# #\n# Expects control_ph.xml and patterns.?.xml files to exist #\n# #\n# ---------------------------------------------------------- #\nfrom __future__ import print_function\nimport json\nfrom xml.dom import minidom\n\n{# JOB_WORK_DIR will be initialized at runtime => avoid substituion below #}\n{%- raw -%}\nCONTROL_PH_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/control_ph.xml\"\nPATTERNS_FILENAME = \"{{JOB_WORK_DIR}}/outdir/_ph0/__prefix__.phsave/patterns.{}.xml\"\n{%- endraw -%}\n\n# get integer content of an xml tag in a document\ndef get_int_by_tag_name(doc, tag_name):\n element = doc.getElementsByTagName(tag_name)\n return int(element[0].firstChild.nodeValue)\n\nvalues = []\n\n# get number of q-points and cycle through them\nxmldoc = minidom.parse(CONTROL_PH_FILENAME)\nnumber_of_qpoints = get_int_by_tag_name(xmldoc, \"NUMBER_OF_Q_POINTS\")\n\nfor i in range(number_of_qpoints):\n # get number of irreducible representations per qpoint\n xmldoc = minidom.parse(PATTERNS_FILENAME.format(i+1))\n number_of_irr_per_qpoint = get_int_by_tag_name(xmldoc, \"NUMBER_IRR_REP\")\n # add each distinct combination of qpoint and irr as a separate entry\n for j in range(number_of_irr_per_qpoint):\n values.append({\n \"qpoint\": i + 1,\n \"irr\": j + 1\n })\n\n# store final values in standard output (STDOUT)\nprint(json.dumps(values, indent=4))\n","name":"espresso_xml_get_qpt_irr.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"import re\nimport json\n\ndouble_regex = r'[-+]?\\d*\\.\\d+(?:[eE][-+]?\\d+)?'\nregex = r\"\\s+k\\(\\s+\\d*\\)\\s+=\\s+\\(\\s+({0})\\s+({0})\\s+({0})\\),\\s+wk\\s+=\\s+({0}).+?\\n\".format(double_regex)\n\nwith open(\"pw_scf.out\") as f:\n text = f.read()\n\npattern = re.compile(regex, re.I | re.MULTILINE)\nmatch = pattern.findall(text[text.rfind(\" cryst. coord.\"):])\nkpoints = [{\"coordinates\": list(map(float, m[:3])), \"weight\": float(m[3])} for m in match]\nprint(json.dumps({\"name\": \"KPOINTS\", \"value\": kpoints, \"scope\": \"global\"}, indent=4))\n","name":"espresso_extract_kpoints.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------- #\n# This script aims to determine extrema for a given array. #\n# Please adjust the parameters according to your data. #\n# Note: This template expects the array to be defined in the #\n# context as 'array_from_context' (see details below). #\n# ----------------------------------------------------------- #\nimport numpy as np\nfrom scipy.signal import find_peaks\nimport json\nfrom munch import Munch\n\n# Data From Context\n# -----------------\n# The array 'array_from_context' is a 1D list (float or int) that has to be defined in\n# a preceding assignment unit in order to be extracted from the context.\n# Example: [0.0, 1.0, 4.0, 3.0]\n# Upon rendering the following Jinja template the extracted array will be inserted.\n{% raw %}Y = np.array({{array_from_context}}){% endraw %}\n\n# Settings\n# --------\nprominence = 0.3 # required prominence in the unit of the data array\n\n# Find Extrema\n# ------------\nmax_indices, _ = find_peaks(Y, prominence=prominence)\nmin_indices, _ = find_peaks(-1 * Y, prominence=prominence)\n\nresult = {\n \"maxima\": Y[max_indices].tolist(),\n \"minima\": Y[min_indices].tolist(),\n}\n\n# print final values to standard output (STDOUT),\n# so that they can be read by a subsequent assignment unit (using value=STDOUT)\nprint(json.dumps(result, indent=4))\n","name":"find_extrema.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Example Python package requirements for the Mat3ra platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# ----------------------------------------------------------------- #\n\n\nmunch==2.5.0\nnumpy>=1.19.5\nscipy>=1.5.4\nmatplotlib>=3.0.0\n","name":"processing_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# PythonML Package Requirements for use on the Exabyte.io Platform #\n# #\n# Will be used as follows: #\n# #\n# 1. A runtime directory for this calculation is created #\n# 2. This list is used to populate a Python virtual environment #\n# 3. The virtual environment is activated #\n# 4. The Python process running the script included within this #\n# job is started #\n# #\n# For more information visit: #\n# - https://pip.pypa.io/en/stable/reference/pip_install #\n# - https://virtualenv.pypa.io/en/stable/ #\n# #\n# The package set below is a stable working set of pymatgen and #\n# all of its dependencies. Please adjust the list to include #\n# your preferred packages. #\n# #\n# ----------------------------------------------------------------- #\n\n# Python 3 packages\ncertifi==2020.12.5\nchardet==4.0.0\ncycler==0.10.0\ndecorator==4.4.2\nfuture==0.18.2\nidna==2.10\nkiwisolver==1.3.1\nmatplotlib==3.3.4\nmonty==4.0.2\nmpmath==1.2.1\nnetworkx==2.5\nnumpy==1.19.5\npalettable==3.3.0\npandas==1.1.5\nPillow==8.1.0\nplotly==4.14.3\npymatgen==2021.2.8.1\npyparsing==2.4.7\npython-dateutil==2.8.1\npytz==2021.1\nrequests==2.25.1\nretrying==1.3.3\nruamel.yaml==0.16.12\nruamel.yaml.clib==0.2.2\nscikit-learn==0.24.1\nscipy==1.5.4\nsix==1.15.0\nspglib==1.16.1\nsympy==1.7.1\ntabulate==0.8.7\nuncertainties==3.1.5\nurllib3==1.26.3\nxgboost==1.4.2;python_version>=\"3.6\"\n","name":"pyml_requirements.txt","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# General settings for PythonML jobs on the Exabyte.io Platform #\n# #\n# This file generally shouldn't be modified directly by users. #\n# The \"datafile\" and \"is_workflow_running_to_predict\" variables #\n# are defined in the head subworkflow, and are templated into #\n# this file. This helps facilitate the workflow's behavior #\n# differing whether it is in a \"train\" or \"predict\" mode. #\n# #\n# Also in this file is the \"Context\" object, which helps maintain #\n# certain Python objects between workflow units, and between #\n# predict runs. #\n# #\n# Whenever a python object needs to be stored for subsequent runs #\n# (such as in the case of a trained model), context.save() can be #\n# called to save it. The object can then be loaded again by using #\n# context.load(). #\n# ----------------------------------------------------------------- #\n\n\nimport pickle, os\n\n# ==================================================\n# Variables modified in the Important Settings menu\n# ==================================================\n# Variables in this section can (and oftentimes need to) be modified by the user in the \"Important Settings\" tab\n# of a workflow.\n\n# Target_column_name is used during training to identify the variable the model is traing to predict.\n# For example, consider a CSV containing three columns, \"Y\", \"X1\", and \"X2\". If the goal is to train a model\n# that will predict the value of \"Y,\" then target_column_name would be set to \"Y\"\ntarget_column_name = \"{{ mlSettings.target_column_name }}\"\n\n# The type of ML problem being performed. Can be either \"regression\", \"classification,\" or \"clustering.\"\nproblem_category = \"{{ mlSettings.problem_category }}\"\n\n# =============================\n# Non user-modifiable variables\n# =============================\n# Variables in this section generally do not need to be modified.\n\n# The problem category, regression or classification or clustering. In regression, the target (predicted) variable\n# is continues. In classification, it is categorical. In clustering, there is no target - a set of labels is\n# automatically generated.\nis_regression = is_classification = is_clustering = False\nif problem_category.lower() == \"regression\":\n is_regression = True\nelif problem_category.lower() == \"classification\":\n is_classification = True\nelif problem_category.lower() == \"clustering\":\n is_clustering = True\nelse:\n raise ValueError(\n \"Variable 'problem_category' must be either 'regression', 'classification', or 'clustering'. Check settings.py\")\n\n# The variables \"is_workflow_running_to_predict\" and \"is_workflow_running_to_train\" are used to control whether\n# the workflow is in a \"training\" mode or a \"prediction\" mode. The \"IS_WORKFLOW_RUNNING_TO_PREDICT\" variable is set by\n# an assignment unit in the \"Set Up the Job\" subworkflow that executes at the start of the job. It is automatically\n# changed when the predict workflow is generated, so users should not need to modify this variable.\nis_workflow_running_to_predict = {% raw %}{{IS_WORKFLOW_RUNNING_TO_PREDICT}}{% endraw %}\nis_workflow_running_to_train = not is_workflow_running_to_predict\n\n# Sets the datafile variable. The \"datafile\" is the data that will be read in, and will be used by subsequent\n# workflow units for either training or prediction, depending on the workflow mode.\nif is_workflow_running_to_predict:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\nelse:\n datafile = \"{% raw %}{{DATASET_BASENAME}}{% endraw %}\"\n\n# The \"Context\" class allows for data to be saved and loaded between units, and between train and predict runs.\n# Variables which have been saved using the \"Save\" method are written to disk, and the predict workflow is automatically\n# configured to obtain these files when it starts.\n#\n# IMPORTANT NOTE: Do *not* adjust the value of \"context_dir_pathname\" in the Context object. If the value is changed, then\n# files will not be correctly copied into the generated predict workflow. This will cause the predict workflow to be\n# generated in a broken state, and it will not be able to make any predictions.\nclass Context(object):\n \"\"\"\n Saves and loads objects from the disk, useful for preserving data between workflow units\n\n Attributes:\n context_paths (dict): Dictionary of the format {variable_name: path}, that governs where\n pickle saves files.\n\n Methods:\n save: Used to save objects to the context directory\n load: Used to load objects from the context directory\n \"\"\"\n\n def __init__(self, context_file_basename=\"workflow_context_file_mapping\"):\n \"\"\"\n Constructor for Context objects\n\n Args:\n context_file_basename (str): Name of the file to store context paths in\n \"\"\"\n\n # Warning: DO NOT modify the context_dir_pathname variable below\n # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n context_dir_pathname = \"{% raw %}{{ CONTEXT_DIR_RELATIVE_PATH }}{% endraw %}\"\n # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n self._context_dir_pathname = context_dir_pathname\n self._context_file = os.path.join(context_dir_pathname, context_file_basename)\n\n # Make context dir if it does not exist\n if not os.path.exists(context_dir_pathname):\n os.makedirs(context_dir_pathname)\n\n # Read in the context sources dictionary, if it exists\n if os.path.exists(self._context_file):\n with open(self._context_file, \"rb\") as file_handle:\n self.context_paths: dict = pickle.load(file_handle)\n else:\n # Items is a dictionary of {varname: path}\n self.context_paths = {}\n\n def __enter__(self):\n return self\n\n def __exit__(self, exc_type, exc_value, traceback):\n self._update_context()\n\n def __contains__(self, item):\n return item in self.context_paths\n\n def _update_context(self):\n with open(self._context_file, \"wb\") as file_handle:\n pickle.dump(self.context_paths, file_handle)\n\n def load(self, name: str):\n \"\"\"\n Returns a contextd object\n\n Args:\n name (str): The name in self.context_paths of the object\n \"\"\"\n path = self.context_paths[name]\n with open(path, \"rb\") as file_handle:\n obj = pickle.load(file_handle)\n return obj\n\n def save(self, obj: object, name: str):\n \"\"\"\n Saves an object to disk using pickle\n\n Args:\n name (str): Friendly name for the object, used for lookup in load() method\n obj (object): Object to store on disk\n \"\"\"\n path = os.path.join(self._context_dir_pathname, f\"{name}.pkl\")\n self.context_paths[name] = path\n with open(path, \"wb\") as file_handle:\n pickle.dump(obj, file_handle)\n self._update_context()\n\n# Generate a context object, so that the \"with settings.context\" can be used by other units in this workflow.\ncontext = Context()\n\nis_using_train_test_split = \"is_using_train_test_split\" in context and (context.load(\"is_using_train_test_split\"))\n\n# Create a Class for a DummyScaler()\nclass DummyScaler:\n \"\"\"\n This class is a 'DummyScaler' which trivially acts on data by returning it unchanged.\n \"\"\"\n\n def fit(self, X):\n return self\n\n def transform(self, X):\n return X\n\n def fit_transform(self, X):\n return X\n\n def inverse_transform(self, X):\n return X\n\nif 'target_scaler' not in context:\n context.save(DummyScaler(), 'target_scaler')\n","name":"pyml_settings.py","contextProviders":[{"name":"MLSettingsDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Custom workflow unit template for the Exabyte.io platform #\n# #\n# This file imports a set of workflow-specific context variables #\n# from settings.py. It then uses a context manager to save and #\n# load Python objects. When saved, these objects can then be #\n# loaded either later in the same workflow, or by subsequent #\n# predict jobs. #\n# #\n# Any pickle-able Python object can be saved using #\n# settings.context. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport settings\n\n# The context manager exists to facilitate\n# saving and loading objects across Python units within a workflow.\n\n# To load an object, simply do to \\`context.load(\"name-of-the-saved-object\")\\`\n# To save an object, simply do \\`context.save(\"name-for-the-object\", object_here)\\`\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Do some transformations to the data here\n\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n descriptors = context.load(\"descriptors\")\n\n # Do some predictions or transformation to the data here\n","name":"pyml_custom.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to read in data for the ML workflow. #\n# #\n# Also showcased here is the concept of branching based on #\n# whether the workflow is in \"train\" or \"predict\" mode. #\n# #\n# If the workflow is in \"training\" mode, it will read in the data #\n# before converting it to a Numpy array and save it for use #\n# later. During training, we already have values for the output, #\n# and this gets saved to \"target.\" #\n# #\n# Finally, whether the workflow is in training or predict mode, #\n# it will always read in a set of descriptors from a datafile #\n# defined in settings.py #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport sklearn.preprocessing\nimport settings\n\nwith settings.context as context:\n data = pandas.read_csv(settings.datafile)\n\n # Train\n # By default, we don't do train/test splitting: the train and test represent the same dataset at first.\n # Other units (such as a train/test splitter) down the line can adjust this as-needed.\n if settings.is_workflow_running_to_train:\n\n # Handle the case where we are clustering\n if settings.is_clustering:\n target = data.to_numpy()[:, 0] # Just get the first column, it's not going to get used anyway\n else:\n target = data.pop(settings.target_column_name).to_numpy()\n\n # Handle the case where we are classifying. In this case, we must convert any labels provided to be categorical.\n # Specifically, labels are encoded with values between 0 and (N_Classes - 1)\n if settings.is_classification:\n label_encoder = sklearn.preprocessing.LabelEncoder()\n target = label_encoder.fit_transform(target)\n context.save(label_encoder, \"label_encoder\")\n\n target = target.reshape(-1, 1) # Reshape array from a row vector into a column vector\n\n context.save(target, \"train_target\")\n context.save(target, \"test_target\")\n\n descriptors = data.to_numpy()\n\n context.save(descriptors, \"train_descriptors\")\n context.save(descriptors, \"test_descriptors\")\n\n else:\n descriptors = data.to_numpy()\n context.save(descriptors, \"descriptors\")\n","name":"data_input_read_csv_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow Unit to perform a train/test split #\n# #\n# Splits the dataset into a training and testing set. The #\n# variable `percent_held_as_test` controls how much of the #\n# input dataset is removed for use as a testing set. By default, #\n# this unit puts 20% of the dataset into the testing set, and #\n# places the remaining 80% into the training set. #\n# #\n# Does nothing in the case of predictions. #\n# #\n# ----------------------------------------------------------------- #\n\nimport sklearn.model_selection\nimport numpy as np\nimport settings\n\n# `percent_held_as_test` is the amount of the dataset held out as the testing set. If it is set to 0.2,\n# then 20% of the dataset is held out as a testing set. The remaining 80% is the training set.\npercent_held_as_test = {{ mlTrainTestSplit.fraction_held_as_test_set }}\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Load training data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n\n # Combine datasets to facilitate train/test split\n\n # Do train/test split\n train_descriptors, test_descriptors, train_target, test_target = sklearn.model_selection.train_test_split(\n train_descriptors, train_target, test_size=percent_held_as_test)\n\n # Set the flag for using a train/test split\n context.save(True, \"is_using_train_test_split\")\n\n # Save training data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"data_input_train_test_split_sklearn.py","contextProviders":[{"name":"MLTrainTestSplitDataManager"}],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn MinMax Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it is on interval #\n# [0,1]. It then saves the data for use further down #\n# the road in the workflow, for use in un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the min and max of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor MinMax Scaler\n scaler = sklearn.preprocessing.MinMaxScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_min_max_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Duplicates workflow unit #\n# #\n# This workflow unit drops all duplicate rows, if it is running #\n# in the \"train\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop duplicates from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n df = df.drop_duplicates()\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop duplicates from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.drop_duplicates()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_duplicates_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Pandas Remove Missing Workflow Unit #\n# #\n# This workflow unit allows missing rows and/or columns to be #\n# dropped from the dataset by configuring the `to_drop` #\n# parameter. #\n# #\n# Valid values for `to_drop`: #\n# - \"rows\": rows with missing values will be removed #\n# - \"columns\": columns with missing values will be removed #\n# - \"both\": rows and columns with missing values will be removed #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport pandas\nimport settings\n\n# `to_drop` can either be \"rows\" or \"columns\"\n# If it is set to \"rows\" (by default), then all rows with missing values will be dropped.\n# If it is set to \"columns\", then all columns with missing values will be dropped.\n# If it is set to \"both\", then all rows and columns with missing values will be dropped.\nto_drop = \"rows\"\n\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Drop missing from the training set\n df = pandas.DataFrame(train_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(train_descriptors))\n\n directions = {\n \"rows\": (\"index\",),\n \"columns\": (\"columns\",),\n \"both\": (\"index\", \"columns\"),\n }[to_drop]\n for direction in directions:\n df = df.dropna(direction)\n\n train_target = df.pop(\"target\").to_numpy()\n train_target = train_target.reshape(-1, 1)\n train_descriptors = df.to_numpy()\n\n # Drop missing from the testing set\n df = pandas.DataFrame(test_target, columns=[\"target\"])\n df = df.join(pandas.DataFrame(test_descriptors))\n df = df.dropna()\n test_target = df.pop(\"target\").to_numpy()\n test_target = test_target.reshape(-1, 1)\n test_descriptors = df.to_numpy()\n\n # Store the data\n context.save(train_target, \"train_target\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_target, \"test_target\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Predict\n else:\n pass\n","name":"pre_processing_remove_missing_pandas.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Sklearn Standard Scaler workflow unit #\n# #\n# This workflow unit scales the data such that it a mean of 0 and #\n# a standard deviation of 1. It then saves the data for use #\n# further down the road in the workflow, for use in #\n# un-transforming the data. #\n# #\n# It is important that new predictions are made by scaling the #\n# new inputs using the mean and variance of the original training #\n# set. As a result, the scaler gets saved in the Training phase. #\n# #\n# During a predict workflow, the scaler is loaded, and the #\n# new examples are scaled using the stored scaler. #\n# ----------------------------------------------------------------- #\n\n\nimport sklearn.preprocessing\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Descriptor Scaler\n scaler = sklearn.preprocessing.StandardScaler\n descriptor_scaler = scaler()\n train_descriptors = descriptor_scaler.fit_transform(train_descriptors)\n test_descriptors = descriptor_scaler.transform(test_descriptors)\n context.save(descriptor_scaler, \"descriptor_scaler\")\n context.save(train_descriptors, \"train_descriptors\")\n context.save(test_descriptors, \"test_descriptors\")\n\n # Our target is only continuous if it's a regression problem\n if settings.is_regression:\n target_scaler = scaler()\n train_target = target_scaler.fit_transform(train_target)\n test_target = target_scaler.transform(test_target)\n context.save(target_scaler, \"target_scaler\")\n context.save(train_target, \"train_target\")\n context.save(test_target, \"test_target\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Get the scaler\n descriptor_scaler = context.load(\"descriptor_scaler\")\n\n # Scale the data\n descriptors = descriptor_scaler.transform(descriptors)\n\n # Store the data\n context.save(descriptors, \"descriptors\")\n","name":"pre_processing_standardization_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge-regression model in Scikit-Learn. #\n# Alpha is taken from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.AdaBoostRegressor(\n n_estimators=50,\n learning_rate=1,\n loss=\"linear\",\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"adaboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"adaboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_adaboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a bagged trees regression model with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.tree\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Base Estimator\n base_estimator = sklearn.tree.DecisionTreeRegressor(\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n ccp_alpha=0.0,\n )\n\n # Initialize the Model\n model = sklearn.ensemble.BaggingRegressor(\n n_estimators=10,\n max_samples=1.0,\n max_features=1.0,\n bootstrap=True,\n bootstrap_features=False,\n oob_score=False,\n verbose=0,\n base_estimator=base_estimator,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"bagged_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"bagged_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_bagged_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for gradient-boosted tree regression with #\n# Scikit-Learn. Parameters for the estimator and ensemble are #\n# derived from Scikit-Learn's Defaults. Note: In the gradient- #\n# boosted trees ensemble used, the weak learners used as #\n# estimators cannot be tuned with the same level of fidelity #\n# allowed in the adaptive-boosted trees ensemble. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.GradientBoostingRegressor(\n loss=\"ls\",\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion=\"friedman_mse\",\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n max_features=None,\n alpha=0.9,\n verbose=0,\n max_leaf_nodes=None,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees regression #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBRegressor(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='reg:squarederror',\n eval_metric='rmse')\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_regression\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_regression\")\n\n # Make some predictions and unscale\n predictions = model.predict(descriptors)\n predictions = predictions.reshape(-1, 1)\n target_scaler = context.load(\"target_scaler\")\n\n predictions = target_scaler.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\")\n","name":"model_extreme_gradboosted_trees_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for k-means clustering. #\n# #\n# In k-means clustering, the labels are not provided ahead of #\n# time. Instead, one supplies the number of groups the #\n# algorithm should split the dataset into. Here, we set our #\n# own default of 4 groups (fewer than sklearn's default of 8). #\n# Otherwise, the default parameters of the clustering method #\n# are the same as in sklearn. #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.cluster\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Initialize the Model\n model = sklearn.cluster.KMeans(\n n_clusters=4,\n init=\"k-means++\",\n n_init=10,\n max_iter=300,\n tol=0.0001,\n copy_x=True,\n algorithm=\"auto\",\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors)\n context.save(model, \"k_means\")\n train_labels = model.predict(train_descriptors)\n test_labels = model.predict(test_descriptors)\n\n context.save(train_labels, \"train_labels\")\n context.save(test_labels, \"test_labels\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"k_means\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_k_means_clustering_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a kernelized ridge-regression model with #\n# Scikit-Learn. Model parameters are derived from Scikit- #\n# Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.kernel_ridge\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.kernel_ridge.KernelRidge(\n alpha=1.0,\n kernel=\"linear\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"kernel_ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"kernel_ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_kernel_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a LASSO-regression model with Scikit- #\n# Learn. Model parameters derived from Scikit-Learn's #\n# Defaults. Alpha has been lowered from the default of 1.0, to #\n# 0.1. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Lasso(\n alpha=0.1,\n fit_intercept=True,\n normalize=False,\n precompute=False,\n tol=0.0001,\n positive=True,\n selection=\"cyclic\",\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"LASSO\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"LASSO\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_lasso_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit to train a simple feedforward neural network #\n# model on a regression problem using scikit-learn. In this #\n# template, we use the default values for hidden_layer_sizes, #\n# activation, solver, and learning rate. Other parameters are #\n# available (consult the sklearn docs), but in this case, we #\n# only include those relevant to the Adam optimizer. Sklearn #\n# Docs: Sklearn docs:http://scikit-learn.org/stable/modules/ge #\n# nerated/sklearn.neural_network.MLPRegressor.html #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.neural_network\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.neural_network.MLPRegressor(\n hidden_layer_sizes=(100,),\n activation=\"relu\",\n solver=\"adam\",\n max_iter=300,\n early_stopping=False,\n validation_fraction=0.1,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"multilayer_perceptron\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"multilayer_perceptron\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_mlp_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a random forest classification model with #\n# Scikit-Learn. Parameters derived from Scikit-Learn's #\n# defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. When #\n# the workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a filee named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestClassifier(\n n_estimators=100,\n criterion=\"gini\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n oob_score=False,\n verbose=0,\n class_weight=None,\n ccp_alpha=0.0,\n max_samples=None,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target, test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for a gradient boosted classification model with #\n# Scikit-Learn. Parameters derived from sklearn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = sklearn.ensemble.GradientBoostingClassifier(loss='deviance',\n learning_rate=0.1,\n n_estimators=100,\n subsample=1.0,\n criterion='friedman_mse',\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_depth=3,\n min_impurity_decrease=0.0,\n min_impurity_split=None,\n init=None,\n random_state=None,\n max_features=None,\n verbose=0,\n max_leaf_nodes=None,\n warm_start=False,\n validation_fraction=0.1,\n n_iter_no_change=None,\n tol=0.0001,\n ccp_alpha=0.0)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"gradboosted_trees_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"gradboosted_trees_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Workflow unit for eXtreme Gradient-Boosted trees classification #\n# with XGBoost's wrapper to Scikit-Learn. Parameters for the #\n# estimator and ensemble are derived from sklearn defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the confusion matrix. #\n# #\n# When the workflow is run in Predict mode, the model is #\n# loaded, predictions are made, they are un-transformed using #\n# the trained scaler from the training run, and they are #\n# written to a filed named \"predictions.csv\" #\n# ----------------------------------------------------------------- #\n\nimport xgboost\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_target = context.load(\"test_target\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the model\n model = xgboost.XGBClassifier(booster='gbtree',\n verbosity=1,\n learning_rate=0.3,\n min_split_loss=0,\n max_depth=6,\n min_child_weight=1,\n max_delta_step=0,\n colsample_bytree=1,\n reg_lambda=1,\n reg_alpha=0,\n scale_pos_weight=1,\n objective='binary:logistic',\n eval_metric='logloss',\n use_label_encoder=False)\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"extreme_gradboosted_tree_classification\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Save the probabilities of the model\n\n test_probabilities = model.predict_proba(test_descriptors)\n context.save(test_probabilities, \"test_probabilities\")\n\n # Print some information to the screen for the regression problem\n confusion_matrix = sklearn.metrics.confusion_matrix(test_target,\n test_predictions)\n print(\"Confusion Matrix:\")\n print(confusion_matrix)\n context.save(confusion_matrix, \"confusion_matrix\")\n\n # Ensure predictions have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"extreme_gradboosted_tree_classification\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Transform predictions back to their original labels\n label_encoder: sklearn.preprocessing.LabelEncoder = context.load(\"label_encoder\")\n predictions = label_encoder.inverse_transform(predictions)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_extreme_gradboosted_trees_classification_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow for a random forest regression model with Scikit- #\n# Learn. Parameters are derived from Scikit-Learn's defaults. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.ensemble\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.ensemble.RandomForestRegressor(\n n_estimators=100,\n criterion=\"mse\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.0,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.0,\n bootstrap=True,\n max_samples=None,\n oob_score=False,\n ccp_alpha=0.0,\n verbose=0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"random_forest\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"random_forest\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_random_forest_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ------------------------------------------------------------ #\n# Workflow unit for a ridge regression model with Scikit- #\n# Learn. Alpha is taken from Scikit-Learn's default #\n# parameters. #\n# #\n# When then workflow is in Training mode, the model is trained #\n# and then it is saved, along with the RMSE and some #\n# predictions made using the training data (e.g. for use in a #\n# parity plot or calculation of other error metrics). When the #\n# workflow is run in Predict mode, the model is loaded, #\n# predictions are made, they are un-transformed using the #\n# trained scaler from the training run, and they are written #\n# to a file named \"predictions.csv\" #\n# ------------------------------------------------------------ #\n\n\nimport sklearn.linear_model\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n test_target = context.load(\"test_target\")\n train_descriptors = context.load(\"train_descriptors\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Flatten the targets\n train_target = train_target.flatten()\n test_target = test_target.flatten()\n\n # Initialize the Model\n model = sklearn.linear_model.Ridge(\n alpha=1.0,\n )\n\n # Train the model and save\n model.fit(train_descriptors, train_target)\n context.save(model, \"ridge\")\n train_predictions = model.predict(train_descriptors)\n test_predictions = model.predict(test_descriptors)\n\n # Scale predictions so they have the same shape as the saved target\n train_predictions = train_predictions.reshape(-1, 1)\n test_predictions = test_predictions.reshape(-1, 1)\n\n # Scale for RMSE calc on the test set\n target_scaler = context.load(\"target_scaler\")\n\n # Unflatten the target\n test_target = test_target.reshape(-1, 1)\n y_true = target_scaler.inverse_transform(test_target)\n y_pred = target_scaler.inverse_transform(test_predictions)\n\n # RMSE\n mse = sklearn.metrics.mean_squared_error(y_true, y_pred)\n rmse = np.sqrt(mse)\n print(f\"RMSE = {rmse}\")\n context.save(rmse, \"RMSE\")\n\n context.save(train_predictions, \"train_predictions\")\n context.save(test_predictions, \"test_predictions\")\n\n # Predict\n else:\n # Restore data\n descriptors = context.load(\"descriptors\")\n\n # Restore model\n model = context.load(\"ridge\")\n\n # Make some predictions\n predictions = model.predict(descriptors)\n\n # Save the predictions to file\n np.savetxt(\"predictions.csv\", predictions, header=\"prediction\", comments=\"\", fmt=\"%s\")\n","name":"model_ridge_regression_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Parity plot generation unit #\n# #\n# This unit generates a parity plot based on the known values #\n# in the training data, and the predicted values generated #\n# using the training data. #\n# #\n# Because this metric compares predictions versus a ground truth, #\n# it doesn't make sense to generate the plot when a predict #\n# workflow is being run (because in that case, we generally don't #\n# know the ground truth for the values being predicted). Hence, #\n# this unit does nothing if the workflow is in \"predict\" mode. #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\n\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_target = context.load(\"train_target\")\n train_predictions = context.load(\"train_predictions\")\n test_target = context.load(\"test_target\")\n test_predictions = context.load(\"test_predictions\")\n\n # Un-transform the data\n target_scaler = context.load(\"target_scaler\")\n train_target = target_scaler.inverse_transform(train_target)\n train_predictions = target_scaler.inverse_transform(train_predictions)\n test_target = target_scaler.inverse_transform(test_target)\n test_predictions = target_scaler.inverse_transform(test_predictions)\n\n # Plot the data\n plt.scatter(train_target, train_predictions, c=\"#203d78\", label=\"Training Set\")\n if settings.is_using_train_test_split:\n plt.scatter(test_target, test_predictions, c=\"#67ac5b\", label=\"Testing Set\")\n plt.xlabel(\"Actual Value\")\n plt.ylabel(\"Predicted Value\")\n\n # Scale the plot\n target_range = (min(min(train_target), min(test_target)),\n max(max(train_target), max(test_target)))\n predictions_range = (min(min(train_predictions), min(test_predictions)),\n max(max(train_predictions), max(test_predictions)))\n\n limits = (min(min(target_range), min(target_range)),\n max(max(predictions_range), max(predictions_range)))\n plt.xlim = (limits[0], limits[1])\n plt.ylim = (limits[0], limits[1])\n\n # Draw a parity line, as a guide to the eye\n plt.plot((limits[0], limits[1]), (limits[0], limits[1]), c=\"black\", linestyle=\"dotted\", label=\"Parity\")\n plt.legend()\n\n # Save the figure\n plt.tight_layout()\n plt.savefig(\"my_parity_plot.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_parity_plot_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# Cluster Visualization #\n# #\n# This unit takes an N-dimensional feature space, and uses #\n# Principal-component Analysis (PCA) to project into a 2D space #\n# to facilitate plotting on a scatter plot. #\n# #\n# The 2D space we project into are the first two principal #\n# components identified in PCA, which are the two vectors with #\n# the highest variance. #\n# #\n# Wikipedia Article on PCA: #\n# https://en.wikipedia.org/wiki/Principal_component_analysis #\n# #\n# We then plot the labels assigned to the train an test set, #\n# and color by class. #\n# #\n# ----------------------------------------------------------------- #\n\nimport pandas as pd\nimport matplotlib.cm\nimport matplotlib.lines\nimport matplotlib.pyplot as plt\nimport sklearn.decomposition\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n train_labels = context.load(\"train_labels\")\n train_descriptors = context.load(\"train_descriptors\")\n test_labels = context.load(\"test_labels\")\n test_descriptors = context.load(\"test_descriptors\")\n\n # Unscale the descriptors\n descriptor_scaler = context.load(\"descriptor_scaler\")\n train_descriptors = descriptor_scaler.inverse_transform(train_descriptors)\n test_descriptors = descriptor_scaler.inverse_transform(test_descriptors)\n\n # We need at least 2 dimensions, exit if the dataset is 1D\n if train_descriptors.ndim < 2:\n raise ValueError(\"The train descriptors do not have enough dimensions to be plot in 2D\")\n\n # The data could be multidimensional. Let's do some PCA to get things into 2 dimensions.\n pca = sklearn.decomposition.PCA(n_components=2)\n train_descriptors = pca.fit_transform(train_descriptors)\n test_descriptors = pca.transform(test_descriptors)\n xlabel = \"Principle Component 1\"\n ylabel = \"Principle Component 2\"\n\n # Determine the labels we're going to be using, and generate their colors\n labels = set(train_labels)\n colors = {}\n for count, label in enumerate(labels):\n cm = matplotlib.cm.get_cmap('jet', len(labels))\n color = cm(count / len(labels))\n colors[label] = color\n train_colors = [colors[label] for label in train_labels]\n test_colors = [colors[label] for label in test_labels]\n\n # Train / Test Split Visualization\n plt.title(\"Train Test Split Visualization\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=\"#33548c\", marker=\"o\", label=\"Training Set\")\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=\"#F0B332\", marker=\"o\", label=\"Testing Set\")\n xmin, xmax, ymin, ymax = plt.axis()\n plt.legend()\n plt.tight_layout()\n plt.savefig(\"train_test_split.png\", dpi=600)\n plt.close()\n\n def clusters_legend(cluster_colors):\n \"\"\"\n Helper function that creates a legend, given the coloration by clusters.\n Args:\n cluster_colors: A dictionary of the form {cluster_number : color_value}\n\n Returns:\n None; just creates the legend and puts it on the plot\n \"\"\"\n legend_symbols = []\n for group, color in cluster_colors.items():\n label = f\"Cluster {group}\"\n legend_symbols.append(matplotlib.lines.Line2D([], [], color=color, marker=\"o\",\n linewidth=0, label=label))\n plt.legend(handles=legend_symbols)\n\n # Training Set Clusters\n plt.title(\"Training Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(train_descriptors[:, 0], train_descriptors[:, 1], c=train_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"train_clusters.png\", dpi=600)\n plt.close()\n\n # Testing Set Clusters\n plt.title(\"Testing Set Clusters\")\n plt.xlabel(xlabel)\n plt.ylabel(ylabel)\n plt.xlim(xmin, xmax)\n plt.ylim(ymin, ymax)\n plt.scatter(test_descriptors[:, 0], test_descriptors[:, 1], c=test_colors)\n clusters_legend(colors)\n plt.tight_layout()\n plt.savefig(\"test_clusters.png\", dpi=600)\n plt.close()\n\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_pca_2d_clusters_matplotlib.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"# ----------------------------------------------------------------- #\n# #\n# ROC Curve Generator #\n# #\n# Computes and displays the Receiver Operating Characteristic #\n# (ROC) curve. This is restricted to binary classification tasks. #\n# #\n# ----------------------------------------------------------------- #\n\n\nimport matplotlib.pyplot as plt\nimport matplotlib.collections\nimport sklearn.metrics\nimport numpy as np\nimport settings\n\nwith settings.context as context:\n # Train\n if settings.is_workflow_running_to_train:\n # Restore the data\n test_target = context.load(\"test_target\").flatten()\n # Slice the first column because Sklearn's ROC curve prefers probabilities for the positive class\n test_probabilities = context.load(\"test_probabilities\")[:, 1]\n\n # Exit if there's more than one label in the predictions\n if len(set(test_target)) > 2:\n exit()\n\n # ROC curve function in sklearn prefers the positive class\n false_positive_rate, true_positive_rate, thresholds = sklearn.metrics.roc_curve(test_target, test_probabilities,\n pos_label=1)\n thresholds[0] -= 1 # Sklearn arbitrarily adds 1 to the first threshold\n roc_auc = np.round(sklearn.metrics.auc(false_positive_rate, true_positive_rate), 3)\n\n # Plot the curve\n fig, ax = plt.subplots()\n points = np.array([false_positive_rate, true_positive_rate]).T.reshape(-1, 1, 2)\n segments = np.concatenate([points[:-1], points[1:]], axis=1)\n norm = plt.Normalize(thresholds.min(), thresholds.max())\n lc = matplotlib.collections.LineCollection(segments, cmap='jet', norm=norm, linewidths=2)\n lc.set_array(thresholds)\n line = ax.add_collection(lc)\n fig.colorbar(line, ax=ax).set_label('Threshold')\n\n # Padding to ensure we see the line\n ax.margins(0.01)\n\n plt.title(f\"ROC curve, AUC={roc_auc}\")\n plt.xlabel(\"False Positive Rate\")\n plt.ylabel(\"True Positive Rate\")\n plt.tight_layout()\n plt.savefig(\"my_roc_curve.png\", dpi=600)\n\n # Predict\n else:\n # It might not make as much sense to draw a plot when predicting...\n pass\n","name":"post_processing_roc_curve_sklearn.py","contextProviders":[],"applicationName":"python","executableName":"python"},{"content":"#!/bin/bash\n# ---------------------------------------------------------------- #\n# #\n# Example shell script for Exabyte.io platform. #\n# #\n# Will be used as follows: #\n# #\n# 1. shebang line is read from the first line above #\n# 2. based on shebang one of the shell types is selected: #\n# - /bin/bash #\n# - /bin/csh #\n# - /bin/tclsh #\n# - /bin/tcsh #\n# - /bin/zsh #\n# 3. runtime directory for this calculation is created #\n# 4. the content of the script is executed #\n# #\n# Adjust the content below to include your code. #\n# #\n# ---------------------------------------------------------------- #\n\necho \"Hello world!\"\n","name":"hello_world.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ---------------------------------------------------------------- #\n# #\n# Example job submission script for Exabyte.io platform #\n# #\n# Shows resource manager directives for: #\n# #\n# 1. the name of the job (-N) #\n# 2. the number of nodes to be used (-l nodes=) #\n# 3. the number of processors per node (-l ppn=) #\n# 4. the walltime in dd:hh:mm:ss format (-l walltime=) #\n# 5. queue (-q) D, OR, OF, SR, SF #\n# 6. merging standard output and error (-j oe) #\n# 7. email about job abort, begin, end (-m abe) #\n# 8. email address to use (-M) #\n# #\n# For more information visit https://docs.exabyte.io/cli/jobs #\n# ---------------------------------------------------------------- #\n\n#PBS -N ESPRESSO-TEST\n#PBS -j oe\n#PBS -l nodes=1\n#PBS -l ppn=1\n#PBS -l walltime=00:00:10:00\n#PBS -q D\n#PBS -m abe\n#PBS -M info@exabyte.io\n\n# load module\nmodule add espresso/540-i-174-impi-044\n\n# go to the job working directory\ncd $PBS_O_WORKDIR\n\n# create input file\ncat > pw.in < pw.out\n","name":"job_espresso_pw_scf.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\nmkdir -p {{ JOB_SCRATCH_DIR }}/outdir/_ph0\ncd {{ JOB_SCRATCH_DIR }}/outdir\ncp -r {{ JOB_WORK_DIR }}/../outdir/__prefix__.* .\n{%- endraw -%}\n","name":"espresso_link_outdir_save.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"{%- raw -%}\n#!/bin/bash\n\ncp {{ JOB_SCRATCH_DIR }}/outdir/_ph0/__prefix__.phsave/dynmat* {{ JOB_WORK_DIR }}/../outdir/_ph0/__prefix__.phsave\n{%- endraw -%}\n","name":"espresso_collect_dynmat.sh","contextProviders":[],"applicationName":"shell","executableName":"sh"},{"content":"#!/bin/bash\n\n# ------------------------------------------------------------------ #\n# This script prepares necessary directories to run VASP NEB\n# calculation. It puts initial POSCAR into directory 00, final into 0N\n# and intermediate images in 01 to 0(N-1). It is assumed that SCF\n# calculations for initial and final structures are already done in\n# previous subworkflows and their standard outputs are written into\n# \"vasp_neb_initial.out\" and \"vasp_neb_final.out\" files respectively.\n# These outputs are here copied into initial (00) and final (0N)\n# directories to calculate the reaction energy profile.\n# ------------------------------------------------------------------ #\n\n{% raw -%}\ncd {{ JOB_WORK_DIR }}\n{%- endraw %}\n\n# Prepare First Directory\nmkdir -p 00\ncat > 00/POSCAR < 0{{ input.INTERMEDIATE_IMAGES.length + 1 }}/POSCAR < 0{{ loop.index }}/POSCAR <