Merge branch 'f/1d_composable_nas' into 'main'

KWS example NAS See merge request es/ai/hannah/hannah!370
ekut-es · Feb 12, 2024 · 70fb026 · 70fb026
2 parents fd0e2a9 + d7d6415
commit 70fb026
Show file tree

Hide file tree

Showing 19 changed files with 241 additions and 189 deletions.
diff --git a/environment.yml b/environment.yml
diff --git a/experiments/kws/README.md b/experiments/kws/README.md
@@ -0,0 +1,32 @@
+# Example of using new FlexKI hannah search spaces for new models
+
+The new search spaces for hannah are supposed to allow an easy expression of very large variability of neural network searches. 
+
+This is a very simple example of using a it for searching a 1D-Convolutional Network 
+
+There are 3 options to run the training in this folder. 
+
+### Normal Neural Network Training
+
+```bash
+hannah-train
+```
+
+this will train a single neural network
+
+
+### NAS on a flexible search space definition
+
+```bash
+hannah-train +experiment=ae_nas
+```
+
+This will use the flexible search space definition. Running for a direct nas on an aging evolution based optmizer. 
+
+### Legacy NAS using fixed search spaces
+
+```bash
+hannah-train +experiment=legacy_nas
+```
+
+This uses the legacy/orginal Hannah search spaces as defined in the Paper. 
diff --git a/experiments/kws/config.yaml b/experiments/kws/config.yaml
@@ -19,11 +19,15 @@
 
 
 defaults:
-    - base_config
-    - _self_
+    - base_config  # Base configuration uses a single neural network training and kws dataset
+    - _self_ # This is a special value that specifies that values defined in this file take precedence over values from the other files
 
-module:
-  num_workers: 8
+module: # The module encapsulate the target task for neural network training in this case we use the default task which is classification on 1D signals
+  num_workers: 32  # Number of workers gives the number of parallel processes used to load data
+  batch_size: 1024
 
-trainer:
-  max_epochs: 30
+trainer:                   # Trainer arguments set hyperparameters for all trainings
+  max_epochs: 30 
+
+dataset:
+  data_folder: ${oc.env:HANNAH_DATA_FOLDER,${hydra:runtime.cwd}/../../datasets/}  # Set the location for dataset files in this case we wille use the value of the environment variable HANNAH_DATA_FOLDER or the folder ../../datasets/ relative to the location of the directory where hannah-train is run, usually the folder containing this file
diff --git a/experiments/kws/experiment/ae_nas.yaml b/experiments/kws/experiment/ae_nas.yaml
@@ -0,0 +1,16 @@
+# @package _global_  
+# The preciding line specifies that the following configuration changes global configuration settings instead of setting in the experiment namespace
+
+defaults:
+  - override /nas: aging_evolution_nas
+  - override /model: 1d_space
+
+experiment_id: ae_nas   # The experiment id is used to identify the experiment it especially defines the subfolder under /trained_models where the results will be saved
+
+nas:
+  predictor: null
+  bounds: 
+    val_error: 0.08
+    total_macs: 250000
+  budget: 250
+  input_shape: [40,101] 
diff --git a/experiments/kws/experiment/legacy_nas.yaml b/experiments/kws/experiment/legacy_nas.yaml
@@ -0,0 +1,37 @@
+# @package _global_  
+# The preciding line specifies that the following configuration changes global configuration settings instead of setting in the experiment namespace
+
+defaults:
+  - override /nas: aging_evolution_nas_legacy
+
+experiment_id: legacy_nas   # The experiment id is used to identify the experiment it especially defines the subfolder under /trained_models where the results will be saved
+
+
+nas:
+  parametrization: 
+    model:
+      qconfig:
+        config:
+          bw_f: [4,8]
+          bw_w: [2,4,8]
+      conv:
+        min: 1
+        max: 2
+
+        choices:
+          - target: forward
+            stride: [1,2]
+            blocks:
+              min: 1
+              max: 4
+              choices:
+                - target: conv1d
+                  kernel_size: [3,5,7]
+                  act: true
+                  norm: true
+                  out_channels: [8,16,32,64]
+
+  bounds: 
+    val_error: 0.08
+    total_macs: 250000
+  budget: 250
diff --git a/experiments/kws/model/1d_space.yaml b/experiments/kws/model/1d_space.yaml
@@ -0,0 +1,5 @@
+_target_: hannah.models.simple1d.space
+name: simple1d_searchspace
+num_classes: 12
+max_channels: 256
+max_depth: 4
diff --git a/experiments/progressive_shrinking/config.yaml b/experiments/progressive_shrinking/config.yaml
diff --git a/experiments/progressive_shrinking/experiment/devel.yaml b/experiments/progressive_shrinking/experiment/devel.yaml
diff --git a/experiments/progressive_shrinking/experiment/finetune_float.yaml b/experiments/progressive_shrinking/experiment/finetune_float.yaml
diff --git a/experiments/progressive_shrinking/experiment/overfit.yaml b/experiments/progressive_shrinking/experiment/overfit.yaml
diff --git a/experiments/progressive_shrinking/experiment/shrink_float.yaml b/experiments/progressive_shrinking/experiment/shrink_float.yaml
diff --git a/hannah/callbacks/summaries.py b/hannah/callbacks/summaries.py
@@ -29,7 +29,7 @@
 from tabulate import tabulate
 from torch.fx.graph_module import GraphModule
 
-from hannah.nas.functional_operators.operators import add, conv2d, linear
+from hannah.nas.functional_operators.operators import add, conv2d, linear, conv1d
 from hannah.nas.graph_conversion import GraphConversionTracer
 
 from ..models.factory import qat
@@ -337,7 +337,7 @@ def _do_summary(self, pl_module, input=None, print_log=True):
                 msglogger.info(
                     "Estimated Activations: " + "{:,}".format(estimated_acts)
                 )
-        except RuntimeError as e:
+        except (RuntimeError, KeyError) as e:
             msglogger.warning("Could not create performance summary: %s", str(e))
             return OrderedDict()
 
@@ -494,6 +494,7 @@ def __init__(self, module: torch.nn.Module):
         super().__init__(gm)
 
         self.count_function = {
+            conv1d: get_conv,
             conv2d: get_conv,
             linear: get_linear,
             add: get_zero_op,
@@ -514,6 +515,7 @@ def __init__(self, module: torch.nn.Module):
     def run_node(self, n: torch.fx.Node):
         try:
             out = super().run_node(n)
+            print(out.shape, n)
         except Exception as e:
             print(str(e))
         if n.op == "call_function":
@@ -533,13 +535,15 @@ def run_node(self, n: torch.fx.Node):
                 self.data["MACs"] += [int(macs)]
             except Exception as e:
                 msglogger.warning("Summary of node %s failed: %s", n.name, str(e))
+                print(traceback.format_exc())   
         return out
 
 
 class FxMACSummaryCallback(MacSummaryCallback):
     def _do_summary(self, pl_module, input=None, print_log=True):
         interpreter = MACSummaryInterpreter(pl_module.model)
         dummy_input = input
+
         if dummy_input is None:
             dummy_input = pl_module.example_feature_array
         dummy_input = dummy_input.to(pl_module.device)

diff --git a/hannah/models/embedded_vision_net/blocks.py b/hannah/models/embedded_vision_net/blocks.py
@@ -28,10 +28,10 @@ def grouped_pointwise(input, out_channels):
 
 @scope
 def expansion(input, expanded_channels):
-    pw = partial(pointwise_conv2d, out_channels=expanded_channels)
-    grouped_pw = partial(grouped_pointwise, out_channels=expanded_channels)
-    return choice(input, pw, grouped_pw)
-    # return pointwise_conv2d(input, out_channels=expanded_channels)
+    #pw = partial(pointwise_conv2d, out_channels=expanded_channels)
+    #grouped_pw = partial(grouped_pointwise, out_channels=expanded_channels)
+    #return choice(input, pw, grouped_pw)
+    return pointwise_conv2d(input, out_channels=expanded_channels)
 
 
 @scope
@@ -41,10 +41,10 @@ def spatial_correlation(input, out_channels, kernel_size, stride=1):
 
 @scope
 def reduction(input, out_channels):
-    pw = partial(pointwise_conv2d, out_channels=out_channels)
-    grouped_pw = partial(grouped_pointwise, out_channels=out_channels)
-    return choice(input, pw, grouped_pw)
-    # return pointwise_conv2d(input, out_channels=out_channels)
+    #pw = partial(pointwise_conv2d, out_channels=out_channels)
+    #grouped_pw = partial(grouped_pointwise, out_channels=out_channels)
+    #return choice(input, pw, grouped_pw)
+    return pointwise_conv2d(input, out_channels=out_channels)
 
 
 @scope
@@ -86,9 +86,9 @@ def pattern(input, stride, out_channels, kernel_size, expand_ratio, reduce_ratio
     convolution = partial(conv_relu, stride=stride, kernel_size=kernel_size, out_channels=out_channels)
     red_exp = partial(reduce_expand, out_channels=out_channels, reduce_ratio=reduce_ratio, kernel_size=kernel_size, stride=stride)
     exp_red = partial(expand_reduce, out_channels=out_channels, expand_ratio=expand_ratio, kernel_size=kernel_size, stride=stride)
-    pool = partial(pooling, kernel_size=kernel_size, stride=stride)
+    #pool = partial(pooling, kernel_size=kernel_size, stride=stride)
 
-    out = choice(input, convolution, exp_red, red_exp, pool)
+    out = choice(input, convolution, exp_red, red_exp)
     return out