Skip to content

Commit

Permalink
Merge branch 'test_meaningful_out_channels' into 'main'
Browse files Browse the repository at this point in the history
Update NAS experiments

See merge request es/ai/hannah/hannah!346
  • Loading branch information
moreib committed Oct 25, 2023
2 parents 5a7ab42 + ecbd7fc commit aafe42a
Show file tree
Hide file tree
Showing 24 changed files with 735 additions and 17 deletions.
43 changes: 43 additions & 0 deletions experiments/embedded_vision_net_ri/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
##
## Copyright (c) 2022 University of Tübingen.
##
## This file is part of hannah.
## See https://atreus.informatik.uni-tuebingen.de/ties/ai/hannah/hannah for further info.
##
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at
##
## http://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
##
defaults:
- base_config
- experiment: optional
- override dataset: ri_capsule # Dataset configuration name
- override features: identity # Feature extractor configuration name (use identity for vision datasets)
#- override model: timm_mobilenetv3_small_075 # Neural network name (for now timm_resnet50 or timm_efficientnet_lite1)
- override scheduler: 1cycle # learning rate scheduler config name
- override optimizer: adamw # Optimizer config name
- override normalizer: null # Feature normalizer (used for quantized neural networks)
- override module: image_classifier # Lightning module config for the training loop (image classifier for image classification tasks)
- _self_


dataset:
data_folder: ${oc.env:HANNAH_DATA_FOLDER,${hydra:runtime.cwd}/../../datasets/}

module:
batch_size: 16
num_workers: 8

trainer:
max_epochs: 10

scheduler:
max_lr: 0.001
69 changes: 69 additions & 0 deletions experiments/embedded_vision_net_ri/eval.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
##
## Copyright (c) 2022 University of Tübingen.
##
## This file is part of hannah.
## See https://atreus.informatik.uni-tuebingen.de/ties/ai/hannah/hannah for further info.
##
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at
##
## http://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
##
data:
AE: trained_models/ae_nas_cifar10/embedded_vision_net
RANDOM: trained_models/random_nas_cifar10_workingbn/embedded_vision_net

metrics:
total_act:
name: Activations
total_weights:
name: Weights
weights_m:
name: Weights [M]
derived: data["total_weights"] / 1000 / 1000
val_accuracy:
name: Accuracy [%]
derived: (1.0 - data["val_error"]) * 100.0
act_k:
name: Activations [k]
derived: data["total_act"] / 1000
macs_m:
name: MACS [M]
derived: data["total_macs"] / 1000 / 1000

plots:
# Comparison plots 2-3 metrics using y, x and size as visualization points
- type: comparison
name: accuracy_memory
metrics:
- val_accuracy
- weights_m
- act_k

- type: comparison
name: accuracy_macs
metrics:
- val_accuracy
- macs_m

extract:
AE:
bounds:
val_error: 0.20
total_macs: 100000000
total_weights: 1000000


experiment: embedded_vision_net_ri
force: false

hydra:
run:
dir: ./nas_results/${experiment}
21 changes: 21 additions & 0 deletions experiments/embedded_vision_net_ri/experiment/ae_nas_cifar10.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# @package _global_
defaults:
- override /nas: aging_evolution_nas
- override /model: embedded_vision_nas
- override /dataset: cifar10

model:
num_classes: 10
module:
batch_size: 128
nas:
budget: 300
n_jobs: 8


trainer:
max_epochs: 10

seed: [1234]

experiment_id: "ae_nas_cifar10"
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# @package _global_
defaults:
- override /nas: aging_evolution_nas
- override /model: embedded_vision_nas
- override /dataset: cifar10

model:
num_classes: 10
module:
batch_size: 128
nas:
budget: 600
n_jobs: 8


trainer:
max_epochs: 10

seed: [1234]

experiment_id: "ae_nas_cifar10_v2"
18 changes: 18 additions & 0 deletions experiments/embedded_vision_net_ri/experiment/ae_nas_ri.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# @package _global_
defaults:
- override /nas: aging_evolution_nas
- override /model: embedded_vision_net


nas:
budget: 300
n_jobs: 8
presample: False


trainer:
max_epochs: 10

seed: [1234]

experiment_id: "ae_nas_ri"
17 changes: 17 additions & 0 deletions experiments/embedded_vision_net_ri/experiment/random_nas.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# @package _global_
defaults:
- override /nas: random_nas
- override /model: embedded_vision_net


nas:
budget: 300
n_jobs: 8


trainer:
max_epochs: 10

seed: [1234]

oxperiment_id: "random_nas"
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,20 @@
defaults:
- override /nas: random_nas
- override /model: embedded_vision_nas
- override /dataset: cifar10


model:
num_classes: 10
module:
batch_size: 128
nas:
budget: 100
budget: 300
n_jobs: 8
presample: False


trainer:
max_epochs: 10

seed: [1234]

experiment_id: "random_nas_log_saving"
experiment_id: "random_nas_cifar10_workingbn"
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# @package _global_
defaults:
- override /nas: random_nas
- override /model: embedded_vision_nas
- override /dataset: cifar10

model:
num_classes: 10
module:
batch_size: 128
nas:
budget: 300
n_jobs: 8
predictor:
model:
input_feature_size: 30

trainer:
max_epochs: 10

seed: [1234]

experiment_id: "random_nas_cifar10_nopool"
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
##
## Copyright (c) 2022 University of Tübingen.
##
## This file is part of hannah.
## See https://atreus.informatik.uni-tuebingen.de/ties/ai/hannah/hannah for further info.
##
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at
##
## http://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
##
defaults:
- submitit_slurm
- _self_

timeout_min: 3600
gpus_per_task: 4
cpus_per_gpu: 8
partition: gpu-2080ti
104 changes: 104 additions & 0 deletions experiments/embedded_vision_net_ri/result_exploration.ipynb

Large diffs are not rendered by default.

65 changes: 65 additions & 0 deletions experiments/embedded_vision_net_ri/scripts/experiment_slurm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/bin/bash
##
## Copyright (c) 2023 Hannah contributors.
##
## This file is part of hannah.
## See https://github.com/ekut-es/hannah for further info.
##
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at
##
## http://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
##


#SBATCH --job-name=ri_random_nas

#resources:

#SBATCH --partition=gpu-2080ti
# the slurm partition the job is queued to.
# FIXME: test if preemptable is avallable

#SBATCH --nodes=1
# requests that the cores are all on one node

#SBATCH --gres=gpu:rtx2080ti:2
#the job can use and see 4 GPUs (8 GPUs are available in total on one node)

#SBATCH --time=4320
# the maximum time the scripts needs to run (720 minutes = 12 hours)

#SBATCH --error=jobs/%j.err
# write the error output to job.*jobID*.err

#SBATCH --output=jobs/%j.out
# write the standard output to your home directory job.*jobID*.out

#SBATCH --mail-type=ALL
#write a mail if a job begins, ends, fails, gets requeued or stages out

#SBATCH [email protected]
# your mail address


#Script
echo "Job information"
scontrol show job $SLURM_JOB_ID


export HANNAH_DATA_FOLDER=/mnt/qb/datasets/STAGING/bringmann/datasets/

GPUS=2
BATCH_SIZE=32

# trainer=sharded

hannah-train +experiment=$1 model=embedded_vision_net trainer.gpus=${gpus} module.batch_size=${BATCH_SIZE}
module.num_workers=16
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/bin/bash
##
## Copyright (c) 2023 Hannah contributors.
##
## This file is part of hannah.
## See https://github.com/ekut-es/hannah for further info.
##
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at
##
## http://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
##


#SBATCH --job-name=run-random_nas

#resources:

#SBATCH --partition=gpu-2080ti
# the slurm partition the job is queued to.
# FIXME: test if preemptable is avallable

#SBATCH --nodes=1
# requests that the cores are all on one node

#SBATCH --gres=gpu:rtx2080ti:8
#the job can use and see 4 GPUs (8 GPUs are available in total on one node)

#SBATCH --time=4320
# the maximum time the scripts needs to run (720 minutes = 12 hours)

#SBATCH --error=jobs/%j.err
# write the error output to job.*jobID*.err

#SBATCH --output=jobs/%j.out
# write the standard output to your home directory job.*jobID*.out

#SBATCH --mail-type=ALL
#write a mail if a job begins, ends, fails, gets requeued or stages out

#SBATCH [email protected]
# your mail address


#Script
echo "Job information"
scontrol show job $SLURM_JOB_ID



# export HANNAH_DATA_FOLDER=/mnt/qb/datasets/STAGING/bringmann/datasets/
conda activate hannah


hannah-train trainer.gpus=8 experiment=ae_nas_cifar10_v2 model=embedded_vision_net dataset=cifar10 model.num_classes=10 nas.n_jobs=8 fx_mac_summary=True ~normalizer
Loading

0 comments on commit aafe42a

Please sign in to comment.