Skip to content

Commit

Permalink
Benchmark reports (#462)
Browse files Browse the repository at this point in the history
  • Loading branch information
segsell authored Jun 12, 2023
1 parent d25d4c2 commit fcd95f1
Show file tree
Hide file tree
Showing 20 changed files with 1,037 additions and 88 deletions.
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ repos:
- id: check-useless-excludes
# - id: identity # Prints all files passed to pre-commits. Debugging.
- repo: https://github.com/lyz-code/yamlfix
rev: 1.9.0
rev: 1.10.0
hooks:
- id: yamlfix
exclude: tests/optimization/fixtures
Expand Down Expand Up @@ -52,7 +52,7 @@ repos:
- id: check-docstring-first
exclude: src/estimagic/optimization/algo_options.py
- repo: https://github.com/adrienverge/yamllint.git
rev: v1.31.0
rev: v1.32.0
hooks:
- id: yamllint
exclude: tests/optimization/fixtures
Expand All @@ -67,7 +67,7 @@ repos:
- id: blacken-docs
exclude: docs/source/how_to_guides/optimization/how_to_specify_constraints.md
- repo: https://github.com/PyCQA/docformatter
rev: v1.6.4
rev: v1.7.1
hooks:
- id: docformatter
args:
Expand All @@ -79,7 +79,7 @@ repos:
- --blank
exclude: src/estimagic/optimization/algo_options.py
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.0.263
rev: v0.0.270
hooks:
- id: ruff
- repo: https://github.com/nbQA-dev/nbQA
Expand Down Expand Up @@ -110,7 +110,7 @@ repos:
- '88'
files: (docs/.)
- repo: https://github.com/asottile/setup-cfg-fmt
rev: v2.2.0
rev: v2.3.0
hooks:
- id: setup-cfg-fmt
- repo: https://github.com/mgedmin/check-manifest
Expand Down

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ url = https://github.com/OpenSourceEconomics/estimagic
author = Janos Gabler
author_email = [email protected]
license = MIT
license_file = LICENSE
license_files = LICENSE
classifiers =
Development Status :: 4 - Beta
Intended Audience :: Science/Research
Expand Down
6 changes: 6 additions & 0 deletions src/estimagic/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from estimagic import utilities
from estimagic.benchmarking.get_benchmark_problems import get_benchmark_problems
from estimagic.benchmarking.run_benchmark import run_benchmark
from estimagic.benchmarking.benchmark_reports import convergence_report
from estimagic.benchmarking.benchmark_reports import rank_report
from estimagic.benchmarking.benchmark_reports import traceback_report
from estimagic.differentiation.derivatives import first_derivative, second_derivative
from estimagic.estimation.estimate_ml import LikelihoodResult, estimate_ml
from estimagic.estimation.estimate_msm import MomentsResult, estimate_msm
Expand Down Expand Up @@ -45,6 +48,9 @@
"get_benchmark_problems",
"profile_plot",
"convergence_plot",
"convergence_report",
"rank_report",
"traceback_report",
"lollipop_plot",
"derivative_plot",
"slice_plot",
Expand Down
239 changes: 239 additions & 0 deletions src/estimagic/benchmarking/benchmark_reports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
import pandas as pd
from estimagic.benchmarking.process_benchmark_results import (
process_benchmark_results,
)

from estimagic.visualization.profile_plot import create_solution_times


def convergence_report(
problems, results, *, stopping_criterion="y", x_precision=1e-4, y_precision=1e-4
):
"""Create a DataFrame with convergence information for a set of problems.
Args:
problems (dict): estimagic benchmarking problems dictionary. Keys are the
problem names. Values contain information on the problem, including the
solution value.
results (dict): estimagic benchmarking results dictionary. Keys are
tuples of the form (problem, algorithm), values are dictionaries of the
collected information on the benchmark run, including 'criterion_history'
and 'time_history'.
stopping_criterion (str): one of "x_and_y", "x_or_y", "x", "y". Determines
how convergence is determined from the two precisions. Default is "y".
x_precision (float or None): how close an algorithm must have gotten to the
true parameter values (as percent of the Euclidean distance between start
and solution parameters) before the criterion for clipping and convergence
is fulfilled. Default is 1e-4.
y_precision (float or None): how close an algorithm must have gotten to the
true criterion values (as percent of the distance between start
and solution criterion value) before the criterion for clipping and
convergence is fulfilled. Default is 1e-4.
Returns:
pandas.DataFrame: indexes are the problems, columns are the algorithms and
the dimensionality of the benchmark problems. For the algorithms column,
the values are strings that are either "success", "failed", or "error".
For the dimensionality column, the values denote the number of dimensions
of the problem.
"""
_, converged_info = process_benchmark_results(
problems=problems,
results=results,
stopping_criterion=stopping_criterion,
x_precision=x_precision,
y_precision=y_precision,
)

report = _get_success_info(results, converged_info)
report["dimensionality"] = report.index.map(_get_problem_dimensions(problems))

return report


def rank_report(
problems,
results,
*,
runtime_measure="n_evaluations",
stopping_criterion="y",
x_precision=1e-4,
y_precision=1e-4,
):
"""Create a DataFrame with rank information for a set of problems.
Args:
problems (dict): estimagic benchmarking problems dictionary. Keys are the
problem names. Values contain information on the problem, including the
solution value.
results (dict): estimagic benchmarking results dictionary. Keys are
tuples of the form (problem, algorithm), values are dictionaries of the
collected information on the benchmark run, including 'criterion_history'
and 'time_history'.
runtime_measure (str): "n_evaluations", "n_batches" or "walltime".
This is the runtime until the desired convergence was reached by an
algorithm. This is called performance measure by Moré and Wild (2009).
Default is "n_evaluations".
stopping_criterion (str): one of "x_and_y", "x_or_y", "x", "y". Determines
how convergence is determined from the two precisions.
x_precision (float or None): how close an algorithm must have gotten to the
true parameter values (as percent of the Euclidean distance between start
and solution parameters) before the criterion for clipping and convergence
is fulfilled. Default is 1e-4.
y_precision (float or None): how close an algorithm must have gotten to the
true criterion values (as percent of the distance between start
and solution criterion value) before the criterion for clipping and
convergence is fulfilled. Default is 1e-4.
Returns:
pandas.DataFrame: indexes are the problems, columns are the algorithms and the
dimensionality of the problems. The values are the ranks of the algorithms
for each problem, where 0 means the algorithm was the fastest, 1 means it
was the second fastest and so on. If an algorithm did not converge on a
problem, the value is "failed". If an algorithm did encounter an error
during optimization, the value is "error".
"""
histories, converged_info = process_benchmark_results(
problems=problems,
results=results,
stopping_criterion=stopping_criterion,
x_precision=x_precision,
y_precision=y_precision,
)

solution_times = create_solution_times(
histories, runtime_measure, converged_info, return_tidy=False
)
solution_times["rank"] = (
solution_times.groupby("problem")[runtime_measure].rank(
method="dense", ascending=True
)
- 1
).astype("Int64")

success_info = _get_success_info(results, converged_info)

df_wide = solution_times.pivot(index="problem", columns="algorithm", values="rank")
report = df_wide.astype(str)
report.columns.name = None

report[~converged_info] = success_info
report["dimensionality"] = report.index.map(_get_problem_dimensions(problems))

return report


def traceback_report(problems, results, return_type="dataframe"):
"""Create traceback report for all problems that have not been solved.
Args:
results (dict): estimagic benchmarking results dictionary. Keys are
tuples of the form (problem, algorithm), values are dictionaries of the
collected information on the benchmark run, including 'criterion_history'
and 'time_history'.
return_type (str): either "text", "markdown", "dict" or "dataframe".
If "text", the traceback report is returned as a string. If "markdown",
it is a markdown string. If "dict", it is returned as a dictionary.
If "dataframe", it is a tidy pandas DataFrame, where indexes are the
algorithm and problem names, the columns are the tracebacks and the
dimensionality of the problem. Default is "dataframe".
Returns:
(list or str or dict or pandas.DataFrame): traceback report. If return_type
is "text", the report is a list of strings. If "markdown", it is a
formatted markdown string with algorithms and problem names as headers.
If return_type is "dict", the report is a dictionary. If return_type is
"dataframe", it is a tidy pandas DataFrame. In the latter case, indexes
are the algorithm and problem names, the columns are the tracebacks and
the dimensionality of the problems. The values are the tracebacks of the
algorithms for problems where they stopped with an error.
"""

if return_type == "text":
report = []
for result in results.values():
if isinstance(result["solution"], str):
report.append(result["solution"])

elif return_type == "markdown":
report = "```python"
for (problem_name, algorithm_name), result in results.items():
if isinstance(result["solution"], str):
if f"### {algorithm_name}" not in report:
report += f"\n### {algorithm_name} \n"
report += f"\n#### {problem_name} \n"
report += f"\n{result['solution']} \n"
report += "\n```"

elif return_type == "dict":
report = {}
for (problem_name, algorithm_name), result in results.items():
if isinstance(result["solution"], str):
report[(problem_name, algorithm_name)] = result["solution"]

elif return_type == "dataframe":
tracebacks = {}
for (problem_name, algorithm_name), result in results.items():
if isinstance(result["solution"], str):
tracebacks[algorithm_name] = tracebacks.setdefault(algorithm_name, {})
tracebacks[algorithm_name][problem_name] = result["solution"]

report = pd.DataFrame.from_dict(tracebacks, orient="index").stack().to_frame()
report.index.set_names(["algorithm", "problem"], inplace=True)
report.columns = ["traceback"]
report["dimensionality"] = 0

for problem_name, dim in _get_problem_dimensions(problems).items():
if problem_name in report.index.get_level_values("problem"):
report.loc[(slice(None), problem_name), "dimensionality"] = dim

else:
raise ValueError(
f"return_type {return_type} is not supported. Must be one of "
f"'text', 'markdown', 'dict' or 'dataframe'."
)

return report


def _get_success_info(results, converged_info):
"""Create a DataFrame with information on whether an algorithm succeeded or not.
Args:
results (dict): estimagic benchmarking results dictionary. Keys are
tuples of the form (problem, algorithm), values are dictionaries of the
collected information on the benchmark run, including 'criterion_history'
and 'time_history'.
converged_info (pandas.DataFrame): columns are the algorithms, indexes are the
problems. The values are boolean and True when the algorithm arrived at
the solution with the desired precision.
Returns:
pandas.DataFrame: indexes are the problems, columns are the algorithms.
values are strings that are either "success", "failed", or "error".
"""
success_info = converged_info.replace({True: "success", False: "failed"})

for key, value in results.items():
if isinstance(value["solution"], str):
success_info.at[key] = "error"

return success_info


def _get_problem_dimensions(problems):
"""Get the dimension of each problem.
Args:
problems (dict): dictionary of problems. keys are problem names, values are
dictionaries with the problem information.
Returns:
dict: keys are problem names, values are the dimension of the problem.
"""
return {prob: len(problems[prob]["inputs"]["params"]) for prob in problems}
1 change: 1 addition & 0 deletions src/estimagic/benchmarking/process_benchmark_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def process_benchmark_results(
}
infos.append(info)

# breakpoint()
histories = pd.concat(histories, ignore_index=True)
infos = pd.DataFrame(infos).set_index(["problem", "algorithm"]).unstack()
infos.columns = [tup[1] for tup in infos.columns]
Expand Down
3 changes: 2 additions & 1 deletion src/estimagic/optimization/bhhh.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ def bhhh(
):
"""Minimize a likelihood function using the BHHH algorithm.
For details, see :ref:`_own_algorithms`.
For details, see
:ref: `_own_algorithms`.
"""
result_dict = bhhh_internal(
Expand Down
3 changes: 2 additions & 1 deletion src/estimagic/optimization/cyipopt_optimizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,8 @@ def ipopt(
):
"""Minimize a scalar function using the Interior Point Optimizer.
For details see :ref:`ipopt_algorithm`.
For details see
:ref: `ipopt_algorithm`.
"""
if not IS_CYIPOPT_INSTALLED:
Expand Down
3 changes: 2 additions & 1 deletion src/estimagic/optimization/fides_optimizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ def fides(
):
"""Minimize a scalar function using the Fides Optimizer.
For details see :ref:`fides_algorithm`.
For details see
:ref: `fides_algorithm`.
"""
if not IS_FIDES_INSTALLED:
Expand Down
6 changes: 4 additions & 2 deletions src/estimagic/optimization/nag_optimizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ def nag_dfols(
):
r"""Minimize a function with least squares structure using DFO-LS.
For details see :ref:`list_of_nag_algorithms`.
For details see
:ref: `list_of_nag_algorithms`.
"""
if not IS_DFOLS_INSTALLED:
Expand Down Expand Up @@ -281,7 +282,8 @@ def nag_pybobyqa(
):
r"""Minimize a function using the BOBYQA algorithm.
For details see :ref:`list_of_nag_algorithms`.
For details see
:ref: `list_of_nag_algorithms`.
"""
if not IS_PYBOBYQA_INSTALLED:
Expand Down
Loading

0 comments on commit fcd95f1

Please sign in to comment.