Skip to content

Commit

Permalink
Merge pull request #19 from MolecularAI/misc_fixes
Browse files Browse the repository at this point in the history
Scaled descriptor fix for precomputed descriptor inference
  • Loading branch information
lewismervin1 authored Jul 9, 2024
2 parents b52363f + 93d9f5d commit 6deb700
Show file tree
Hide file tree
Showing 11 changed files with 241 additions and 68 deletions.
Binary file modified docs/sphinx-builddir/doctrees/environment.pickle
Binary file not shown.
Binary file not shown.
Binary file modified docs/sphinx-builddir/doctrees/optunaz.doctree
Binary file not shown.
66 changes: 35 additions & 31 deletions docs/sphinx-builddir/html/_modules/optunaz/predict.html
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,23 @@ <h1>Source code for optunaz.predict</h1><div class="highlight"><pre>
<span class="k">raise</span> <span class="n">UncertaintyError</span><span class="p">(</span><span class="s2">&quot;Uncertainty not availble for this model&quot;</span><span class="p">)</span></div>


<div class="viewcode-block" id="set_inference_params"><a class="viewcode-back" href="../../optunaz.html#optunaz.predict.set_inference_params">[docs]</a><span class="k">def</span> <span class="nf">set_inference_params</span><span class="p">(</span><span class="n">args</span><span class="p">,</span> <span class="n">desc</span><span class="p">):</span>
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">desc</span><span class="o">.</span><span class="n">parameters</span><span class="p">,</span> <span class="s2">&quot;descriptor&quot;</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">hasattr</span><span class="p">(</span>
<span class="n">desc</span><span class="o">.</span><span class="n">parameters</span><span class="o">.</span><span class="n">descriptor</span><span class="p">,</span> <span class="s2">&quot;inference_parameters&quot;</span>
<span class="p">):</span> <span class="c1"># Scaled precomputed descriptors handled here</span>
<span class="n">desc</span> <span class="o">=</span> <span class="n">desc</span><span class="o">.</span><span class="n">parameters</span><span class="o">.</span><span class="n">descriptor</span>
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">desc</span><span class="p">,</span> <span class="s2">&quot;inference_parameters&quot;</span><span class="p">):</span>
<span class="n">check_precomp_args</span><span class="p">(</span><span class="n">args</span><span class="p">)</span>
<span class="n">desc</span><span class="o">.</span><span class="n">inference_parameters</span><span class="p">(</span>
<span class="n">args</span><span class="o">.</span><span class="n">input_precomputed_file</span><span class="p">,</span>
<span class="n">args</span><span class="o">.</span><span class="n">input_precomputed_input_column</span><span class="p">,</span>
<span class="n">args</span><span class="o">.</span><span class="n">input_precomputed_response_column</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Precomputed descriptor inference params set&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">True</span>
<span class="k">return</span> <span class="kc">False</span></div>


<div class="viewcode-block" id="check_precomp_args"><a class="viewcode-back" href="../../optunaz.html#optunaz.predict.check_precomp_args">[docs]</a><span class="k">def</span> <span class="nf">check_precomp_args</span><span class="p">(</span><span class="n">args</span><span class="p">):</span>
<span class="k">try</span><span class="p">:</span>
<span class="k">assert</span> <span class="p">(</span>
Expand All @@ -142,42 +159,29 @@ <h1>Source code for optunaz.predict</h1><div class="highlight"><pre>

<div class="viewcode-block" id="validate_set_precomputed"><a class="viewcode-back" href="../../optunaz.html#optunaz.predict.validate_set_precomputed">[docs]</a><span class="k">def</span> <span class="nf">validate_set_precomputed</span><span class="p">(</span><span class="n">args</span><span class="p">,</span> <span class="n">model</span><span class="p">):</span>
<span class="n">descriptor_str</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">descriptor</span><span class="o">.</span><span class="n">name</span>
<span class="k">if</span> <span class="n">descriptor_str</span> <span class="o">==</span> <span class="s2">&quot;CompositeDescriptor&quot;</span><span class="p">:</span>
<span class="n">precomp_idx</span> <span class="o">=</span> <span class="p">[</span>
<span class="n">idx</span>
<span class="k">for</span> <span class="n">idx</span><span class="p">,</span> <span class="n">d</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">model</span><span class="o">.</span><span class="n">descriptor</span><span class="o">.</span><span class="n">parameters</span><span class="o">.</span><span class="n">descriptors</span><span class="p">)</span>
<span class="k">if</span> <span class="n">d</span><span class="o">.</span><span class="n">name</span> <span class="o">==</span> <span class="s2">&quot;PrecomputedDescriptorFromFile&quot;</span>
<span class="p">]</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">precomp_idx</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">if</span> <span class="n">set_inference_params</span><span class="p">(</span><span class="n">args</span><span class="p">,</span> <span class="n">model</span><span class="o">.</span><span class="n">descriptor</span><span class="p">):</span>
<span class="k">return</span> <span class="n">model</span>
<span class="k">elif</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">model</span><span class="o">.</span><span class="n">descriptor</span><span class="o">.</span><span class="n">parameters</span><span class="p">,</span> <span class="s2">&quot;descriptors&quot;</span><span class="p">):</span>
<span class="n">n_precomp</span> <span class="o">=</span> <span class="mi">0</span>
<span class="k">for</span> <span class="n">d</span> <span class="ow">in</span> <span class="n">model</span><span class="o">.</span><span class="n">descriptor</span><span class="o">.</span><span class="n">parameters</span><span class="o">.</span><span class="n">descriptors</span><span class="p">:</span>
<span class="n">n_precomp</span> <span class="o">+=</span> <span class="n">set_inference_params</span><span class="p">(</span><span class="n">args</span><span class="p">,</span> <span class="n">d</span><span class="p">)</span>
<span class="k">if</span> <span class="n">n_precomp</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">logging</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">descriptor_str</span><span class="si">}</span><span class="s2"> has no Precomputed descriptors... ignoring precomputed descriptor parameters&quot;</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">model</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">precomp_idx</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">PrecomputedError</span><span class="p">(</span>
<span class="s2">&quot;Inference for &gt; precomputed descriptor not currently available&quot;</span>
<span class="p">)</span>
<span class="n">check_precomp_args</span><span class="p">(</span><span class="n">args</span><span class="p">)</span>
<span class="n">precomp_desc</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">descriptor</span><span class="o">.</span><span class="n">parameters</span><span class="o">.</span><span class="n">descriptors</span><span class="p">[</span><span class="n">precomp_idx</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span>
<span class="n">precomp_desc</span><span class="o">.</span><span class="n">inference_parameters</span><span class="p">(</span>
<span class="n">args</span><span class="o">.</span><span class="n">input_precomputed_file</span><span class="p">,</span>
<span class="n">args</span><span class="o">.</span><span class="n">input_precomputed_input_column</span><span class="p">,</span>
<span class="n">args</span><span class="o">.</span><span class="n">input_precomputed_response_column</span><span class="p">,</span>
<span class="k">elif</span> <span class="n">n_precomp</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">PrecomputedError</span><span class="p">(</span>
<span class="s2">&quot;Inference for &gt; precomputed descriptor not currently available&quot;</span>
<span class="p">)</span>
<span class="k">elif</span> <span class="n">descriptor_str</span> <span class="o">!=</span> <span class="s2">&quot;PrecomputedDescriptorFromFile&quot;</span><span class="p">:</span>
<span class="n">logging</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;Model was trained using </span><span class="si">{</span><span class="n">descriptor_str</span><span class="si">}</span><span class="s2">... ignoring precomputed descriptor parameters&quot;</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">model</span>
<span class="k">else</span><span class="p">:</span> <span class="c1"># must be precomputed</span>
<span class="n">check_precomp_args</span><span class="p">(</span><span class="n">args</span><span class="p">)</span>
<span class="n">precomp_desc</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">descriptor</span>
<span class="n">precomp_desc</span><span class="o">.</span><span class="n">inference_parameters</span><span class="p">(</span>
<span class="n">args</span><span class="o">.</span><span class="n">input_precomputed_file</span><span class="p">,</span>
<span class="n">args</span><span class="o">.</span><span class="n">input_precomputed_input_column</span><span class="p">,</span>
<span class="n">args</span><span class="o">.</span><span class="n">input_precomputed_response_column</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">check_precomp_args</span><span class="p">(</span><span class="n">args</span><span class="p">)</span>
<span class="n">logging</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;Model was trained using </span><span class="si">{</span><span class="n">descriptor_str</span><span class="si">}</span><span class="s2">... ignoring precomputed descriptor parameters&quot;</span>
<span class="p">)</span>
<span class="k">except</span> <span class="n">PrecomputedError</span><span class="p">:</span>
<span class="k">pass</span>
<span class="k">return</span> <span class="n">model</span></div>


Expand Down
2 changes: 2 additions & 0 deletions docs/sphinx-builddir/html/genindex.html
Original file line number Diff line number Diff line change
Expand Up @@ -3129,6 +3129,8 @@ <h2 id="S">S</h2>
<li><a href="optunaz.config.html#optunaz.config.build_from_opt.set_build_cache">set_build_cache() (in module optunaz.config.build_from_opt)</a>
</li>
<li><a href="optunaz.config.html#optunaz.config.optconfig.OptimizationConfig.set_cache">set_cache() (optunaz.config.optconfig.OptimizationConfig method)</a>
</li>
<li><a href="optunaz.html#optunaz.predict.set_inference_params">set_inference_params() (in module optunaz.predict)</a>
</li>
<li><a href="optunaz.html#optunaz.descriptors.ScaledDescriptor.set_unfitted_scaler_data">set_unfitted_scaler_data() (optunaz.descriptors.ScaledDescriptor method)</a>
</li>
Expand Down
Binary file modified docs/sphinx-builddir/html/objects.inv
Binary file not shown.
5 changes: 5 additions & 0 deletions docs/sphinx-builddir/html/optunaz.html
Original file line number Diff line number Diff line change
Expand Up @@ -1838,6 +1838,11 @@ <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this
<span class="sig-prename descclassname"><span class="pre">optunaz.predict.</span></span><span class="sig-name descname"><span class="pre">validate_uncertainty</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">model</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/optunaz/predict.html#validate_uncertainty"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#optunaz.predict.validate_uncertainty" title="Permalink to this definition"></a></dt>
<dd></dd></dl>

<dl class="py function">
<dt class="sig sig-object py" id="optunaz.predict.set_inference_params">
<span class="sig-prename descclassname"><span class="pre">optunaz.predict.</span></span><span class="sig-name descname"><span class="pre">set_inference_params</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">desc</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/optunaz/predict.html#set_inference_params"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#optunaz.predict.set_inference_params" title="Permalink to this definition"></a></dt>
<dd></dd></dl>

<dl class="py function">
<dt class="sig sig-object py" id="optunaz.predict.check_precomp_args">
<span class="sig-prename descclassname"><span class="pre">optunaz.predict.</span></span><span class="sig-name descname"><span class="pre">check_precomp_args</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">args</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/optunaz/predict.html#check_precomp_args"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#optunaz.predict.check_precomp_args" title="Permalink to this definition"></a></dt>
Expand Down
2 changes: 1 addition & 1 deletion docs/sphinx-builddir/html/searchindex.js

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
{
"task": "optimization",
"data": {
"input_column": "canonical",
"response_column": "molwt",
"training_dataset_file": "tests/data/precomputed_descriptor/train_with_fp.csv"
},
"descriptors": [
{
"name": "ScaledDescriptor",
"parameters": {
"scaler": {
"name": "UnfittedSklearnScaler"
},
"descriptor": {
"name": "PrecomputedDescriptorFromFile",
"parameters": {
"file": "tests/data/precomputed_descriptor/train_with_fp.csv",
"input_column": "canonical",
"response_column": "fp"
}
}
}
}
],
"settings": {
"mode": "regression",
"cross_validation": 3,
"direction": "maximize",
"n_trials": 15,
"n_startup_trials": 10
},
"visualization": null,
"algorithms": [
{
"name": "SVR",
"parameters": {
"C": {
"low": 1E-10,
"high": 100.0
},
"gamma": {
"low": 0.0001,
"high": 100.0
}
}
},
{
"name": "RandomForestRegressor",
"parameters": {
"max_depth": {
"low": 2,
"high": 32
},
"n_estimators": {
"low": 10,
"high": 250
},
"max_features": [
"auto"
]
}
},
{
"name": "Ridge",
"parameters": {
"alpha": {
"low": 0,
"high": 2
}
}
},
{
"name": "Lasso",
"parameters": {
"alpha": {
"low": 0,
"high": 2
}
}
},
{
"name": "PLSRegression",
"parameters": {
"n_components": {
"low": 2,
"high": 3
}
}
},
{
"name": "XGBRegressor",
"parameters": {
"max_depth": {
"low": 2,
"high": 32
},
"n_estimators": {
"low": 3,
"high": 100
},
"learning_rate": {
"low": 0.1,
"high": 0.1
}
}
}
]
}
66 changes: 35 additions & 31 deletions optunaz/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,23 @@ def validate_uncertainty(args, model):
raise UncertaintyError("Uncertainty not availble for this model")


def set_inference_params(args, desc):
if hasattr(desc.parameters, "descriptor") and hasattr(
desc.parameters.descriptor, "inference_parameters"
): # Scaled precomputed descriptors handled here
desc = desc.parameters.descriptor
if hasattr(desc, "inference_parameters"):
check_precomp_args(args)
desc.inference_parameters(
args.input_precomputed_file,
args.input_precomputed_input_column,
args.input_precomputed_response_column,
)
logging.info("Precomputed descriptor inference params set")
return True
return False


def check_precomp_args(args):
try:
assert (
Expand All @@ -62,42 +79,29 @@ def check_precomp_args(args):

def validate_set_precomputed(args, model):
descriptor_str = model.descriptor.name
if descriptor_str == "CompositeDescriptor":
precomp_idx = [
idx
for idx, d in enumerate(model.descriptor.parameters.descriptors)
if d.name == "PrecomputedDescriptorFromFile"
]
if len(precomp_idx) == 0:
if set_inference_params(args, model.descriptor):
return model
elif hasattr(model.descriptor.parameters, "descriptors"):
n_precomp = 0
for d in model.descriptor.parameters.descriptors:
n_precomp += set_inference_params(args, d)
if n_precomp == 0:
logging.warning(
f"{descriptor_str} has no Precomputed descriptors... ignoring precomputed descriptor parameters"
)
return model
else:
if len(precomp_idx) > 1:
raise PrecomputedError(
"Inference for > precomputed descriptor not currently available"
)
check_precomp_args(args)
precomp_desc = model.descriptor.parameters.descriptors[precomp_idx[0]]
precomp_desc.inference_parameters(
args.input_precomputed_file,
args.input_precomputed_input_column,
args.input_precomputed_response_column,
elif n_precomp > 1:
raise PrecomputedError(
"Inference for > precomputed descriptor not currently available"
)
elif descriptor_str != "PrecomputedDescriptorFromFile":
logging.warning(
f"Model was trained using {descriptor_str}... ignoring precomputed descriptor parameters"
)
return model
else: # must be precomputed
check_precomp_args(args)
precomp_desc = model.descriptor
precomp_desc.inference_parameters(
args.input_precomputed_file,
args.input_precomputed_input_column,
args.input_precomputed_response_column,
)
else:
try:
check_precomp_args(args)
logging.warning(
f"Model was trained using {descriptor_str}... ignoring precomputed descriptor parameters"
)
except PrecomputedError:
pass
return model


Expand Down
Loading

0 comments on commit 6deb700

Please sign in to comment.