Merge branch 'test_meaningful_out_channels' into 'main'

Update NAS experiments See merge request es/ai/hannah/hannah!346
ekut-es · Oct 25, 2023 · aafe42a · aafe42a
2 parents 5a7ab42 + ecbd7fc
commit aafe42a
Show file tree

Hide file tree

Showing 24 changed files with 735 additions and 17 deletions.
diff --git a/experiments/embedded_vision_net_ri/config.yaml b/experiments/embedded_vision_net_ri/config.yaml
@@ -0,0 +1,43 @@
+##
+## Copyright (c) 2022 University of Tübingen.
+##
+## This file is part of hannah.
+## See https://atreus.informatik.uni-tuebingen.de/ties/ai/hannah/hannah for further info.
+##
+## Licensed under the Apache License, Version 2.0 (the "License");
+## you may not use this file except in compliance with the License.
+## You may obtain a copy of the License at
+##
+##     http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing, software
+## distributed under the License is distributed on an "AS IS" BASIS,
+## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+## See the License for the specific language governing permissions and
+## limitations under the License.
+##
+defaults:
+    - base_config
+    - experiment: optional
+    - override dataset: ri_capsule   # Dataset configuration name
+    - override features: identity        # Feature extractor configuration name (use identity for vision datasets)
+    #- override model: timm_mobilenetv3_small_075      # Neural network name (for now timm_resnet50 or timm_efficientnet_lite1)
+    - override scheduler: 1cycle         # learning rate scheduler config name
+    - override optimizer: adamw          # Optimizer config name
+    - override normalizer: null          # Feature normalizer (used for quantized neural networks)
+    - override module: image_classifier  # Lightning module config for the training loop (image classifier for image classification tasks)
+    - _self_
+
+
+dataset:
+  data_folder: ${oc.env:HANNAH_DATA_FOLDER,${hydra:runtime.cwd}/../../datasets/}
+
+module:
+  batch_size: 16
+  num_workers: 8
+
+trainer:
+  max_epochs: 10
+
+scheduler:
+  max_lr: 0.001
diff --git a/experiments/embedded_vision_net_ri/eval.yaml b/experiments/embedded_vision_net_ri/eval.yaml
@@ -0,0 +1,69 @@
+##
+## Copyright (c) 2022 University of Tübingen.
+##
+## This file is part of hannah.
+## See https://atreus.informatik.uni-tuebingen.de/ties/ai/hannah/hannah for further info.
+##
+## Licensed under the Apache License, Version 2.0 (the "License");
+## you may not use this file except in compliance with the License.
+## You may obtain a copy of the License at
+##
+##     http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing, software
+## distributed under the License is distributed on an "AS IS" BASIS,
+## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+## See the License for the specific language governing permissions and
+## limitations under the License.
+##
+data:
+  AE: trained_models/ae_nas_cifar10/embedded_vision_net
+  RANDOM: trained_models/random_nas_cifar10_workingbn/embedded_vision_net
+
+metrics:
+  total_act:
+    name: Activations
+  total_weights:
+    name: Weights
+  weights_m:
+    name: Weights [M]
+    derived: data["total_weights"] / 1000 / 1000
+  val_accuracy:
+    name: Accuracy [%]
+    derived: (1.0 - data["val_error"]) * 100.0
+  act_k:
+    name: Activations [k]
+    derived: data["total_act"] / 1000
+  macs_m:
+    name: MACS [M]
+    derived: data["total_macs"] / 1000 / 1000
+
+plots:
+  # Comparison plots 2-3 metrics using y, x and size as visualization points
+  - type: comparison
+    name: accuracy_memory
+    metrics:
+      - val_accuracy
+      - weights_m
+      - act_k
+
+  - type: comparison
+    name: accuracy_macs
+    metrics:
+      - val_accuracy
+      - macs_m
+
+extract:
+  AE:
+    bounds:
+        val_error: 0.20
+        total_macs: 100000000
+        total_weights: 1000000
+
+
+experiment: embedded_vision_net_ri
+force: false
+
+hydra:
+  run:
+    dir: ./nas_results/${experiment}
diff --git a/experiments/embedded_vision_net_ri/experiment/ae_nas_cifar10.yaml b/experiments/embedded_vision_net_ri/experiment/ae_nas_cifar10.yaml
@@ -0,0 +1,21 @@
+# @package _global_
+defaults:
+  - override /nas: aging_evolution_nas
+  - override /model: embedded_vision_nas
+  - override /dataset: cifar10
+
+model:
+  num_classes: 10
+module:
+  batch_size: 128
+nas:
+  budget: 300
+  n_jobs: 8
+
+
+trainer:
+  max_epochs: 10
+
+seed: [1234]
+
+experiment_id: "ae_nas_cifar10"
diff --git a/experiments/embedded_vision_net_ri/experiment/ae_nas_cifar10_v2.yaml b/experiments/embedded_vision_net_ri/experiment/ae_nas_cifar10_v2.yaml
@@ -0,0 +1,21 @@
+# @package _global_
+defaults:
+  - override /nas: aging_evolution_nas
+  - override /model: embedded_vision_nas
+  - override /dataset: cifar10
+
+model:
+  num_classes: 10
+module:
+  batch_size: 128
+nas:
+  budget: 600
+  n_jobs: 8
+
+
+trainer:
+  max_epochs: 10
+
+seed: [1234]
+
+experiment_id: "ae_nas_cifar10_v2"
diff --git a/experiments/embedded_vision_net_ri/experiment/ae_nas_ri.yaml b/experiments/embedded_vision_net_ri/experiment/ae_nas_ri.yaml
@@ -0,0 +1,18 @@
+# @package _global_
+defaults:
+  - override /nas: aging_evolution_nas
+  - override /model: embedded_vision_net
+
+
+nas:
+  budget: 300
+  n_jobs: 8
+  presample: False
+
+
+trainer:
+  max_epochs: 10
+
+seed: [1234]
+
+experiment_id: "ae_nas_ri"
diff --git a/experiments/embedded_vision_net_ri/experiment/random_nas.yaml b/experiments/embedded_vision_net_ri/experiment/random_nas.yaml
@@ -0,0 +1,17 @@
+# @package _global_
+defaults:
+  - override /nas: random_nas
+  - override /model: embedded_vision_net
+
+
+nas:
+  budget: 300
+  n_jobs: 8
+
+
+trainer:
+  max_epochs: 10
+
+seed: [1234]
+
+oxperiment_id: "random_nas"
diff --git a/..._vision_net_ri/experiment/random_nas.yaml → ...net_ri/experiment/random_nas_cifar10.yaml b/..._vision_net_ri/experiment/random_nas.yaml → ...net_ri/experiment/random_nas_cifar10.yaml
@@ -2,17 +2,20 @@
 defaults:
   - override /nas: random_nas
   - override /model: embedded_vision_nas
+  - override /dataset: cifar10
 
-
+model:
+  num_classes: 10
+module:
+  batch_size: 128
 nas:
-  budget: 100
+  budget: 300
   n_jobs: 8
-  presample: False
 
 
 trainer:
   max_epochs: 10
 
 seed: [1234]
 
-experiment_id: "random_nas_log_saving"
+experiment_id: "random_nas_cifar10_workingbn"
diff --git a/experiments/embedded_vision_net_ri/experiment/random_nas_cifar10_nopool.yaml b/experiments/embedded_vision_net_ri/experiment/random_nas_cifar10_nopool.yaml
@@ -0,0 +1,23 @@
+# @package _global_
+defaults:
+  - override /nas: random_nas
+  - override /model: embedded_vision_nas
+  - override /dataset: cifar10
+
+model:
+  num_classes: 10
+module:
+  batch_size: 128
+nas:
+  budget: 300
+  n_jobs: 8
+  predictor:
+    model:
+      input_feature_size: 30
+
+trainer:
+  max_epochs: 10
+
+seed: [1234]
+
+experiment_id: "random_nas_cifar10_nopool"
diff --git a/experiments/embedded_vision_net_ri/hydra/launcher/ml_cloud_4gpu.yaml b/experiments/embedded_vision_net_ri/hydra/launcher/ml_cloud_4gpu.yaml
@@ -0,0 +1,26 @@
+##
+## Copyright (c) 2022 University of Tübingen.
+##
+## This file is part of hannah.
+## See https://atreus.informatik.uni-tuebingen.de/ties/ai/hannah/hannah for further info.
+##
+## Licensed under the Apache License, Version 2.0 (the "License");
+## you may not use this file except in compliance with the License.
+## You may obtain a copy of the License at
+##
+##     http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing, software
+## distributed under the License is distributed on an "AS IS" BASIS,
+## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+## See the License for the specific language governing permissions and
+## limitations under the License.
+##
+defaults:
+  - submitit_slurm
+  - _self_
+
+timeout_min: 3600
+gpus_per_task: 4
+cpus_per_gpu: 8
+partition: gpu-2080ti
diff --git a/experiments/embedded_vision_net_ri/result_exploration.ipynb b/experiments/embedded_vision_net_ri/result_exploration.ipynb
diff --git a/experiments/embedded_vision_net_ri/scripts/experiment_slurm.sh b/experiments/embedded_vision_net_ri/scripts/experiment_slurm.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+##
+## Copyright (c) 2023 Hannah contributors.
+##
+## This file is part of hannah.
+## See https://github.com/ekut-es/hannah for further info.
+##
+## Licensed under the Apache License, Version 2.0 (the "License");
+## you may not use this file except in compliance with the License.
+## You may obtain a copy of the License at
+##
+##     http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing, software
+## distributed under the License is distributed on an "AS IS" BASIS,
+## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+## See the License for the specific language governing permissions and
+## limitations under the License.
+##
+
+
+#SBATCH --job-name=ri_random_nas
+
+#resources:
+
+#SBATCH --partition=gpu-2080ti
+# the slurm partition the job is queued to.
+# FIXME: test if preemptable is avallable
+
+#SBATCH --nodes=1
+# requests that the cores are all on one node
+
+#SBATCH --gres=gpu:rtx2080ti:2
+#the job can use and see 4 GPUs (8 GPUs are available in total on one node)
+
+#SBATCH --time=4320
+# the maximum time the scripts needs to run (720 minutes = 12 hours)
+
+#SBATCH --error=jobs/%j.err
+# write the error output to job.*jobID*.err
+
+#SBATCH --output=jobs/%j.out
+# write the standard output to your home directory job.*jobID*.out
+
+#SBATCH --mail-type=ALL
+#write a mail if a job begins, ends, fails, gets requeued or stages out
+
+#SBATCH [email protected]
+# your mail address
+
+
+#Script
+echo "Job information"
+scontrol show job $SLURM_JOB_ID
+
+
+export HANNAH_DATA_FOLDER=/mnt/qb/datasets/STAGING/bringmann/datasets/
+
+GPUS=2
+BATCH_SIZE=32
+
+# trainer=sharded
+
+hannah-train  +experiment=$1 model=embedded_vision_net trainer.gpus=${gpus} module.batch_size=${BATCH_SIZE}
+module.num_workers=16
diff --git a/experiments/embedded_vision_net_ri/scripts/run-ae_nas-slurm-cifar10.sh b/experiments/embedded_vision_net_ri/scripts/run-ae_nas-slurm-cifar10.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+##
+## Copyright (c) 2023 Hannah contributors.
+##
+## This file is part of hannah.
+## See https://github.com/ekut-es/hannah for further info.
+##
+## Licensed under the Apache License, Version 2.0 (the "License");
+## you may not use this file except in compliance with the License.
+## You may obtain a copy of the License at
+##
+##     http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing, software
+## distributed under the License is distributed on an "AS IS" BASIS,
+## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+## See the License for the specific language governing permissions and
+## limitations under the License.
+##
+
+
+#SBATCH --job-name=run-random_nas
+
+#resources:
+
+#SBATCH --partition=gpu-2080ti
+# the slurm partition the job is queued to.
+# FIXME: test if preemptable is avallable
+
+#SBATCH --nodes=1
+# requests that the cores are all on one node
+
+#SBATCH --gres=gpu:rtx2080ti:8
+#the job can use and see 4 GPUs (8 GPUs are available in total on one node)
+
+#SBATCH --time=4320
+# the maximum time the scripts needs to run (720 minutes = 12 hours)
+
+#SBATCH --error=jobs/%j.err
+# write the error output to job.*jobID*.err
+
+#SBATCH --output=jobs/%j.out
+# write the standard output to your home directory job.*jobID*.out
+
+#SBATCH --mail-type=ALL
+#write a mail if a job begins, ends, fails, gets requeued or stages out
+
+#SBATCH [email protected]
+# your mail address
+
+
+#Script
+echo "Job information"
+scontrol show job $SLURM_JOB_ID
+
+
+
+# export HANNAH_DATA_FOLDER=/mnt/qb/datasets/STAGING/bringmann/datasets/
+conda activate hannah
+
+
+hannah-train trainer.gpus=8 experiment=ae_nas_cifar10_v2 model=embedded_vision_net dataset=cifar10 model.num_classes=10 nas.n_jobs=8 fx_mac_summary=True ~normalizer