From e47747a39cb75020150eff2109880e483ef217b2 Mon Sep 17 00:00:00 2001 From: Alan Manning Date: Mon, 19 Feb 2024 05:34:37 +0000 Subject: [PATCH] fix for caching not being cached at the correct point and example usage --- LICENSE | 2 +- docs/conf.py | 2 +- docs/examples/index.rst | 9 + docs/examples/itteration_tools.rst | 319 +++++++++++++++++++++ docs/index.rst | 1 + src/sonnetsuiteshelper/itteration_tools.py | 101 +++++-- 6 files changed, 401 insertions(+), 33 deletions(-) create mode 100644 docs/examples/index.rst create mode 100644 docs/examples/itteration_tools.rst diff --git a/LICENSE b/LICENSE index a6b5546..a84cfd5 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ sonnetsuiteshelper - An example package. Generated with cookiecutter-pylibrary. -Copyright (c) 2023, Alan Manning. +Copyright (c) 2024, Alan Manning. This program is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License diff --git a/docs/conf.py b/docs/conf.py index b3c8247..400fcfd 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -14,7 +14,7 @@ source_suffix = ".rst" master_doc = "index" project = "sonnetsuiteshelper" -year = "2023" +year = "2024" author = "Alan Manning" copyright = f"{year}, {author}" version = release = "0.4.5" diff --git a/docs/examples/index.rst b/docs/examples/index.rst new file mode 100644 index 0000000..73fe8c6 --- /dev/null +++ b/docs/examples/index.rst @@ -0,0 +1,9 @@ + +Example Code Snippets +##################### + + +.. toctree:: + :maxdepth: 1 + + itteration_tools diff --git a/docs/examples/itteration_tools.rst b/docs/examples/itteration_tools.rst new file mode 100644 index 0000000..3515a46 --- /dev/null +++ b/docs/examples/itteration_tools.rst @@ -0,0 +1,319 @@ +**************** +itteration_tools +**************** + + +SimpleSingleParamOptimiser +========================== + +Here is a example of how the SimpleSingleParamOptimiser class could be used +with the surrouding code to support this. + +Setting up initial files +------------------------ + +First this code makes an initial batch of Sonnet files that will be used as the +base of the optimser and will be the files itterated upon to achieve the +desired result. + +.. code-block:: python + + #file - example.py + from sonnetsuiteshelper import file_generation + + print("Making the initial Batch 1") + base_file_to_edit_name = "file_to_edit.son" + base_file_to_edit_path = "base_file_for_generate" + + output_folder = "batch_1_made_files" + + lens_to_use = [1100.0, 1275.0, 1450.0, 1625.0, 1800.0, 1975.0] + + for file_no, len in enumerate(lens_to_use): + params_to_edit_dict = {} + params_to_edit_dict["length_var"] = len + + name_for_output_file = f"V1__file_({file_no})_len_{len}.son" + + file_generation.generate_file_like( + base_filename=base_file_to_edit_name, + base_file_path=base_file_to_edit_path, + output_filename=name_for_output_file, + output_file_path=output_folder, + params_to_edit=params_to_edit_dict, + ) + print("made all initial files") + +The file structure after this has been run looks like: :: + + example_folder + ├── batch_1_made_files + │ ├── V1__file_(0)_len_1100.0.son + │ ├── V1__file_(1)_len_1275.0.son + │ ├── V1__file_(2)_len_1450.0.son + │ ├── V1__file_(3)_len_1625.0.son + │ ├── V1__file_(4)_len_1800.0.son + │ └── V1__file_(5)_len_1975.0.son + └── example.py + +Now these files can be analysed in Sonnet. + +Setting up SimpleSingleParamOptimiser +------------------------------------- + +The output csv from these son files analysed has for this next example have +been placed in a folder called "batch_1_outputs". However if they still exist +in the same directory as the son file then be sure to change the +batch_1_output_file_path variable to "batch_1_made_files" in the snippet below. + +The next code will set up an optimiser for those Sonnet files. + +.. code-block:: python + + #file - example.py + from sonnetsuiteshelper import itteration_tools + + print("Making optimiser objects") + + dict_of_optimizations = {} + + varaible_param_name = "length_var" + desired_output_param = "QR" + desired_output_param_value = 10000.0 + desired_output_param_value_tolerence_percent = 0.010 + correlation = "-" + + for file_no, len in enumerate(lens_to_use): + batch_1_son_filename = f"V1__file_({file_no})_len_{len}.son" + batch_1_son_file_path = "batch_1_made_files" + + batch_1_output_filename = f"V1__file_({file_no})_len_{len}.csv" + batch_1_output_file_path = "batch_1_outputs" + + init_variable_param_value = len + + optimiser_name = f"{file_no}_{len}" + + optimiser_object = itteration_tools.SimpleSingleParamOptimiser( + optimiser_name, + varaible_param_name, + batch_1_son_filename, + batch_1_son_file_path, + batch_1_output_filename, + batch_1_output_file_path, + init_variable_param_value, + desired_output_param, + desired_output_param_value, + desired_output_param_value_tolerence_percent, + correlation, + sonnet_mesh_size=1.0, + ignore_loading_cache=False, + ) + + dict_of_optimisations[optimiser_name] = optimiser_object + +Note that these optimisers objects have been loaded into a dict such that +itterating through them all is a bit easier as shown by the next example. + +By making an optimiser object, it will automatically analyse that first output +file and make the next batch of files. + +The file structure after this has been run looks like: :: + + example_folder + ├── batch_1_made_files + │ ├── V1__file_(0)_len_1100.0.son + │ ├── V1__file_(1)_len_1275.0.son + │ ├── V1__file_(2)_len_1450.0.son + │ ├── V1__file_(3)_len_1625.0.son + │ ├── V1__file_(4)_len_1800.0.son + │ └── V1__file_(5)_len_1975.0.son + ├── batch_1_outputs + │ ├── V1__file_(0)_len_1100.0.csv + │ ├── V1__file_(1)_len_1275.0.csv + │ ├── V1__file_(2)_len_1450.0.csv + │ ├── V1__file_(3)_len_1625.0.csv + │ ├── V1__file_(4)_len_1800.0.csv + │ └── V1__file_(5)_len_1975.0.csv + ├── batch_2_generated_files + │ ├── batch_2__0_1100.0_length_var_1150.0.son + │ ├── batch_2__0_1275.0_length_var_1325.0.son + │ ├── batch_2__0_1450.0_length_var_1500.0.son + │ ├── batch_2__0_1625.0_length_var_1675.0.son + │ ├── batch_2__0_1800.0_length_var_1850.0.son + │ └── batch_2__0_1975.0_length_var_2025.0.son + ├── OptCache + │ ├── SSPOC_0_1100.0.yml + │ ├── SSPOC_0_1275.0.yml + │ ├── SSPOC_0_1450.0.yml + │ ├── SSPOC_0_1625.0.yml + │ ├── SSPOC_0_1800.0.yml + │ └── SSPOC_0_1975.0.yml + └── example.py + +Itterating using SimpleSingleParamOptimiser +------------------------------------------- + +Once these next output files have been analysed with Sonnet and csv output made, +the next snippet will itterate through all the optimisers and generate the next +batch of files. + +.. code-block:: python + + #file - example.py + for optimiser_name, optimiser_obj in dict_of_optimisations.items(): + next_batch_output_exists = True + + while next_batch_output_exists: + filename = optimiser_obj.get_last_analysis_filename() + file_path = optimiser_obj.get_last_analysis_file_path() + if not os.path.isfile(os.path.join(file_path, filename)): + print("Sonnet output file does not exist") + next_batch_output_exists = False + break + + optimiser_obj.analyze_batch() + optimiser_obj.generate_next_batch() + +This code runs through each optimiser_object in the dictionary containing all +the optimiser_objects. Then for that optimiser_obj, if the analysis file it +expects to find exists then it will analyse that batch and generate the next +batch. This continues untill it cannot find an analysis file then it breaks and +moves on to the next optimiser_obj. + +This structure of code takes full advantage of the caching that happens in the +optimiser object where is updates it state to a cache file after each +generate_next_batch() call. This means that if this code is rerun it will only +analyse the last batch and not every batch leading up to and including that +eliminating the overhead of making lots of files already made. + +The file structure after this has been run looks like: :: + + example_folder + ├── batch_1_made_files + │ ├── V1__file_(0)_len_1100.0.son + │ ├── V1__file_(1)_len_1275.0.son + │ ├── V1__file_(2)_len_1450.0.son + │ ├── V1__file_(3)_len_1625.0.son + │ ├── V1__file_(4)_len_1800.0.son + │ └── V1__file_(5)_len_1975.0.son + ├── batch_1_outputs + │ ├── V1__file_(0)_len_1100.0.csv + │ ├── V1__file_(1)_len_1275.0.csv + │ ├── V1__file_(2)_len_1450.0.csv + │ ├── V1__file_(3)_len_1625.0.csv + │ ├── V1__file_(4)_len_1800.0.csv + │ └── V1__file_(5)_len_1975.0.csv + ├── batch_2_generated_files + │ ├── batch_2__0_1100.0_length_var_1150.0.son + │ ├── batch_2__0_1275.0_length_var_1325.0.son + │ ├── batch_2__0_1450.0_length_var_1500.0.son + │ ├── batch_2__0_1625.0_length_var_1675.0.son + │ ├── batch_2__0_1800.0_length_var_1850.0.son + │ └── batch_2__0_1975.0_length_var_2025.0.son + ├── batch_2_outputs + │ ├── batch_2__0_1100.0_length_var_1150.0.csv + │ ├── batch_2__0_1275.0_length_var_1325.0.csv + │ ├── batch_2__0_1450.0_length_var_1500.0.csv + │ ├── batch_2__0_1625.0_length_var_1675.0.csv + │ ├── batch_2__0_1800.0_length_var_1850.0.csv + │ └── batch_2__0_1975.0_length_var_2025.0.csv + ├── batch_2_generated_files + │ ├── batch_2__0_1100.0_length_var_1175.0.son + │ ├── batch_2__0_1275.0_length_var_1350.0.son + │ ├── batch_2__0_1450.0_length_var_1525.0.son + │ ├── batch_2__0_1625.0_length_var_1700.0.son + │ ├── batch_2__0_1800.0_length_var_1875.0.son + │ └── batch_2__0_1975.0_length_var_2050.0.son + ├── batch_2_outputs + │ ├── batch_2__0_1100.0_length_var_1175.0.csv + │ ├── batch_2__0_1275.0_length_var_1350.0.csv + │ ├── batch_2__0_1450.0_length_var_1525.0.csv + │ ├── batch_2__0_1625.0_length_var_1700.0.csv + │ ├── batch_2__0_1800.0_length_var_1875.0.csv + │ └── batch_2__0_1975.0_length_var_2050.0.csv + ├── batch_3_generated_files + │ └── ... + ├── batch_3_outputs + │ └── ... + ├── batch_4_generated_files + │ └── ... + ├── batch_4_outputs + │ └── ... + ├── OptCache + │ ├── SSPOC_0_1100.0.yml + │ ├── SSPOC_0_1275.0.yml + │ ├── SSPOC_0_1450.0.yml + │ ├── SSPOC_0_1625.0.yml + │ ├── SSPOC_0_1800.0.yml + │ └── SSPOC_0_1975.0.yml + └── example.py + +Once those files have been again analysed in Sonnet and csv outputs made that +same code will generate every next batch untill the optimsations have finished. + +Plotting SimpleSingleParamOptimiser output +------------------------------------------ + +This next snippet utilises the plotting functions built into the optimiser +object to show the current state of the optimiser. + +.. code-block:: python + + #file - example.py + for optimiser_name, optimiser_obj in dict_of_optimisations.items(): + optimiser_object.plot_optimisation() + +This next snippet utilises the plotting functions but supplies a matplotlib +figure axes to create a custom plot showing all the current optimisers state's +in one figure opposed to one seperate figure for each. This means any +matplotlib plot setup can be used to and customised to best suit the +application and make it clear whats happening. + +.. code-block:: python + + #file - example.py + + title = "all_plotted" + fig = plt.figure(title) + plt.clf() + rows = 2 + cols = 3 + grid = plt.GridSpec(rows, cols) + + ax_dict = {} + for row in range(rows): + for col in range(cols): + ax_dict[f"{row}_{col}"] = plt.subplot(grid[row, col]) + + file_no_to_ax = { + 0: "0_0" + 1: "0_1" + 2: "0_2" + 3: "1_0" + 4: "1_1" + 5: "1_2" + } + + for file_no, (opt_name, opt_obj) in enumerate(dict_of_optimizations.items()): + ax_name = file_no_to_ax[file_no] + ax = ax_dict[ax_name] + + optimizer.plot_optimisation( + fig_ax=ax, + plot_fit_function=True, + plot_next_batch_variable_value=True, + set_axis_labels=False, + ) + + ax.set_title(f"file_no - {file_no}") + + ax_dict["0_0"].set_ylabel("QR") + ax_dict["1_0"].set_ylabel("QR") + + ax_dict["0_0"].set_xlabel("length_var") + ax_dict["0_0"].set_xlabel("length_var") + ax_dict["1_0"].set_xlabel("length_var") + + fig.suptitle("All Optimisations") + fig.show() diff --git a/docs/index.rst b/docs/index.rst index ad842d5..5df1249 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,6 +8,7 @@ Contents readme installation usage + examples/index reference/index contributing authors diff --git a/src/sonnetsuiteshelper/itteration_tools.py b/src/sonnetsuiteshelper/itteration_tools.py index 37a4e61..6f6e4b9 100644 --- a/src/sonnetsuiteshelper/itteration_tools.py +++ b/src/sonnetsuiteshelper/itteration_tools.py @@ -1,4 +1,3 @@ -import copy import os import numpy as np @@ -70,6 +69,10 @@ class SimpleSingleParamOptimiser: desired_output_param_values : list This is a list of the desired ouptut parameter values from the optimser. + loaded_cache : bool + This is a bool that shows whether a cache file has been used to laod + values. + sonnet_mesh_size : float Default=1.0. The mesh size in sonnet. This is the smallest change possible in the varaible parameter that will result in a different @@ -100,6 +103,7 @@ def __init__( desired_output_param_value_tolerence_percent: float, correlation: str, sonnet_mesh_size: float = 1.0, + ignore_loading_cache: bool = False, ): """ Parameters @@ -163,8 +167,11 @@ def __init__( possible in the varaible parameter that will result in a different file being produced. - note these values are ints because the grid mesh size in sonnet is 1um - so cant analyze anything other than ints. + ignore_loading_cache : bool + Default=False. When True the optimser will not try to load cached + results into memory. Otherwise by default the optimser will try to + load values from a cache file in the OptCache path. + """ # Check corrent format of arguments @@ -185,17 +192,27 @@ def __init__( ) # General setup self.name = unique_name + + # Check for an existing cache file if ignore_loading_cache is False + if not ignore_loading_cache: + self.load_cached_results() + else: + self.loaded_cache = False + + if self.loaded_cache: + return + self.correlation = +1 if correlation == "+" else -1 # Variable param setup self.variable_param_name = varaible_param_name - self.variable_param_values = [] + self.variable_param_values: list[float] = [] # Desired param setup self.desired_output_param = desired_output_param self.desired_output_param_value = desired_output_param_value self.desired_output_param_value_tolerence_percent = desired_output_param_value_tolerence_percent - self.desired_output_param_values = [] + self.desired_output_param_values: list[float] = [] # File settings self.sonnet_mesh_size = sonnet_mesh_size @@ -219,9 +236,10 @@ def __str__(self) -> str: string += f"\n\tdesired_output_param_value: {self.desired_output_param_value}" return string - def __getstate__(self) -> str: - """Get the state of the object for pyyaml.""" - return dict( + def _create_yaml_str(self) -> str: + """Create a yaml formated string containing all the relevant instance + variables.""" + state = dict( name=self.name, correlation=self.correlation, variable_param_name=self.variable_param_name, @@ -230,6 +248,7 @@ def __getstate__(self) -> str: desired_output_param_value=self.desired_output_param_value, desired_output_param_value_tolerence_percent=self.desired_output_param_value_tolerence_percent, desired_output_param_values=self.desired_output_param_values, + next_variable_param_value=self.next_variable_param_value, sonnet_mesh_size=self.sonnet_mesh_size, batch_1_son_filename=self.batch_1_son_filename, batch_1_son_file_path=self.batch_1_son_file_path, @@ -237,6 +256,15 @@ def __getstate__(self) -> str: batch_1_output_file_path=self.batch_1_output_file_path, ) + state_str = "---\n" + for key, val in state.items(): + if isinstance(val, str): + state_str = state_str + f"{key}: '{val}'\n" + else: + state_str = state_str + f"{key}: {val}\n" + + return state_str + def get_cache_filename_and_path(self) -> str: """Get the filename and file path for the optimiser cache file.""" return os.path.join(self.get_cache_file_path(), self.get_cache_filename()) @@ -264,35 +292,45 @@ def cache_results(self) -> None: except Exception as err: raise err + yaml_str = self._create_yaml_str() with open(self.get_cache_filename_and_path(), "w+") as yaml_file: - yaml.dump(self, yaml_file, default_flow_style=False) + yaml_file.write(yaml_str) return - def get_cached_results(self) -> None: - """Get cached results of the optimiser so far if a cache file + def load_cached_results(self) -> None: + """Load cached results of the optimiser so far if a cache file exists.""" cache_file = self.get_cache_filename_and_path() - if os.path.isfile(cache_file): - try: - cached_data = yaml.safe_load(cache_file) - self.name = cached_data["name"] - self.correlation = cached_data["correlation"] - self.variable_param_name = cached_data["variable_param_name"] - self.variable_param_values = cached_data["variable_param_values"] - self.desired_output_param = cached_data["desired_output_param"] - self.desired_output_param_value = cached_data["desired_output_param_value"] - self.desired_output_param_value_tolerence_percent = cached_data["desired_output_param_value_tolerence_percent"] - self.desired_output_param_values = cached_data["desired_output_param_values"] - self.sonnet_mesh_size = cached_data["sonnet_mesh_size"] - self.batch_1_son_filename = cached_data["batch_1_son_filename"] - self.batch_1_son_file_path = cached_data["batch_1_son_file_path"] - self.batch_1_output_filename = cached_data["batch_1_output_filename"] - self.batch_1_output_file_path = cached_data["batch_1_output_file_path"] - except Exception as e: - print("Error loading cache") - raise e + + if not os.path.isfile(cache_file): + self.loaded_cache = False + return + + try: + with open(cache_file, "r") as stream: + cached_data = yaml.safe_load(stream) + + self.name = cached_data.get("name") + self.correlation = cached_data.get("correlation") + self.variable_param_name = cached_data.get("variable_param_name") + self.variable_param_values = cached_data.get("variable_param_values") + self.desired_output_param = cached_data.get("desired_output_param") + self.desired_output_param_value = cached_data.get("desired_output_param_value") + self.desired_output_param_value_tolerence_percent = cached_data.get("desired_output_param_value_tolerence_percent") + self.desired_output_param_values = cached_data.get("desired_output_param_values") + self.next_variable_param_value = cached_data.get("next_variable_param_value") + self.sonnet_mesh_size = cached_data.get("sonnet_mesh_size") + self.batch_1_son_filename = cached_data.get("batch_1_son_filename") + self.batch_1_son_file_path = cached_data.get("batch_1_son_file_path") + self.batch_1_output_filename = cached_data.get("batch_1_output_filename") + self.batch_1_output_file_path = cached_data.get("batch_1_output_file_path") + + self.loaded_cache = True + except Exception as e: + print("Error loading cache") + raise e return @@ -526,6 +564,8 @@ def generate_next_batch(self, override_variable_param_value: float | None = None output_file_path=output_file_path, params_to_edit=params_to_edit_dict, ) + + self.cache_results() return def analyze_batch(self) -> None: @@ -558,7 +598,6 @@ def analyze_batch(self) -> None: print("ERROR") raise (ValueError(f"Error, cannot optimise for {self.desired_output_param}")) - self.cache_results() return def plot_optimisation(