BootstrapNAS External Weight Reorg. (#2234)

### Changes Allow the use of an external weight importance information for reordering weights of the super-network. Adds missing info in experimental schema for previously committed KD. ### Reason for changes Several advanced algorithms can produce weight importance information that outperform L1/L2 weight reordering strategies. This PR allows the use of external weight importance information to reorder the weights in the super-network. ### Related tickets N/A ### Tests Tests have been included. --------- Co-authored-by: Yuan Jinjie <[email protected]>
openvinotoolkit · Nov 10, 2023 · d5e3942 · d5e3942
1 parent 8b46c0c
commit d5e3942
Show file tree

Hide file tree

Showing 6 changed files with 249 additions and 29 deletions.
diff --git a/nncf/config/schemata/experimental_schema.py b/nncf/config/schemata/experimental_schema.py
@@ -13,13 +13,15 @@
 
 from nncf.config.definitions import BOOTSTRAP_NAS_ALGO_NAME_IN_CONFIG
 from nncf.config.definitions import EXPERIMENTAL_QUANTIZATION_ALGO_NAME_IN_CONFIG
+from nncf.config.definitions import KNOWLEDGE_DISTILLATION_ALGO_NAME_IN_CONFIG
 from nncf.config.definitions import MOVEMENT_SPARSITY_ALGO_NAME_IN_CONFIG
 from nncf.config.schemata.algo.quantization import QUANTIZATION_SCHEMA
 from nncf.config.schemata.basic import ARRAY_OF_NUMBERS
 from nncf.config.schemata.basic import ARRAY_OF_STRINGS
 from nncf.config.schemata.basic import BOOLEAN
 from nncf.config.schemata.basic import NUMBER
 from nncf.config.schemata.basic import STRING
+from nncf.config.schemata.basic import make_object_or_array_of_objects_schema
 from nncf.config.schemata.basic import make_string_or_array_of_strings_schema
 from nncf.config.schemata.basic import with_attributes
 from nncf.config.schemata.common.compression import BASIC_COMPRESSION_ALGO_SCHEMA
@@ -115,9 +117,21 @@
         "filter_importance": with_attributes(
             STRING,
             description="The type of filter importance metric. Can be"
-            " one of `L1`, `L2`, `geometric_median`."
+            " one of `L1`, `L2`, `geometric_median`, `external`."
             " `L2` by default.",
         ),
+        "external_importance_path": with_attributes(
+            STRING,
+            description="Path to the custom external weight importance (PyTorch tensor) per node "
+            "that needs to weight reorder. Valid only when filter_importance "
+            "is `external`. The file should be loaded via the torch interface "
+            "torch.load(), represented as a dictionary. It maps NNCF node name "
+            "to importance tensor with the same shape as the weights in the node "
+            "module. For example, node `Model/NNCFLinear[fc1]/linear_0` has a "
+            "3x1 linear module with weight [0.2, 0.3, 0.9], and in the dict"
+            "{'Model/NNCFLinear[fc1]/linear_0': tensor([0.4, 0.01, 0.2])} represents "
+            "the corresponding weight importance.",
+        ),
     },
     "additionalProperties": False,
 }
@@ -148,10 +162,12 @@
             "are available - [width, depth, kernel]",
         ),
         "ignored_scopes": with_attributes(
-            make_string_or_array_of_strings_schema(), description=IGNORED_SCOPES_DESCRIPTION
+            make_string_or_array_of_strings_schema(),
+            description=IGNORED_SCOPES_DESCRIPTION,
         ),
         "target_scopes": with_attributes(
-            make_string_or_array_of_strings_schema(), description=TARGET_SCOPES_DESCRIPTION
+            make_string_or_array_of_strings_schema(),
+            description=TARGET_SCOPES_DESCRIPTION,
         ),
     },
     "additionalProperties": False,
@@ -194,7 +210,8 @@
             "beginning of the stage",
         ),
         "bn_adapt": with_attributes(
-            BOOLEAN, description="if True, triggers batchnorm adaptation in the beginning of the stage"
+            BOOLEAN,
+            description="if True, triggers batchnorm adaptation in the beginning of the stage",
         ),
         "init_lr": with_attributes(
             NUMBER,
@@ -203,10 +220,12 @@
             "the beginning of the stage.",
         ),
         "epochs_lr": with_attributes(
-            NUMBER, description="Number of epochs to compute the adjustment of the learning rate."
+            NUMBER,
+            description="Number of epochs to compute the adjustment of the learning rate.",
         ),
         "sample_rate": with_attributes(
-            NUMBER, description="Number of iterations to activate the random subnet. Default value is 1."
+            NUMBER,
+            description="Number of iterations to activate the random subnet. Default value is 1.",
         ),
     },
     "description": "Defines a supernet training stage: how many epochs it takes, which elasticities with which "
@@ -260,7 +279,8 @@
         "elasticity": ELASTICITY_SCHEMA,
         "lr_schedule": LR_SCHEDULE_SCHEMA,
         "train_steps": with_attributes(
-            NUMBER, description="Defines the number of samples used for each training epoch."
+            NUMBER,
+            description="Defines the number of samples used for each training epoch.",
         ),
     },
     "additionalProperties": False,
@@ -275,14 +295,17 @@
     "type": "object",
     "properties": {
         "algorithm": with_attributes(
-            SEARCH_ALGORITHMS_SCHEMA, description="Defines the search algorithm. Default algorithm is NSGA-II."
+            SEARCH_ALGORITHMS_SCHEMA,
+            description="Defines the search algorithm. Default algorithm is NSGA-II.",
         ),
         "batchnorm_adaptation": BATCHNORM_ADAPTATION_SCHEMA,
         "num_evals": with_attributes(
-            NUMBER, description="Defines the number of evaluations that will be used by the search algorithm."
+            NUMBER,
+            description="Defines the number of evaluations that will be used by the search algorithm.",
         ),
         "population": with_attributes(
-            NUMBER, description="Defines the population size when using an evolutionary search algorithm."
+            NUMBER,
+            description="Defines the population size when using an evolutionary search algorithm.",
         ),
         "acc_delta": with_attributes(
             NUMBER,
@@ -294,6 +317,9 @@
             description="Defines the reference accuracy from the pre-trained model used "
             "to generate the super-network.",
         ),
+        "compression": make_object_or_array_of_objects_schema(
+            {"oneOf": [{"$ref": f"#/$defs/{KNOWLEDGE_DISTILLATION_ALGO_NAME_IN_CONFIG}"}]}
+        ),
     },
     "additionalProperties": False,
 }
@@ -326,10 +352,12 @@
             enum=MOVEMENT_SPARSE_STRUCTURE_MODE,
         ),
         "sparse_factors": with_attributes(
-            ARRAY_OF_NUMBERS, description='The block shape for weights to sparsify. Required when `mode`="block".'
+            ARRAY_OF_NUMBERS,
+            description='The block shape for weights to sparsify. Required when `mode`="block".',
         ),
         "axis": with_attributes(
-            NUMBER, description='The dimension for weights to sparsify. Required when `mode`="per_dim".'
+            NUMBER,
+            description='The dimension for weights to sparsify. Required when `mode`="per_dim".',
         ),
         "target_scopes": with_attributes(
             make_string_or_array_of_strings_schema(),
@@ -344,7 +372,8 @@
     "type": "object",
     "properties": {
         "warmup_start_epoch": with_attributes(
-            NUMBER, description="Index of the starting epoch (include) for warmup stage."
+            NUMBER,
+            description="Index of the starting epoch (include) for warmup stage.",
         ),
         "warmup_end_epoch": with_attributes(NUMBER, description="Index of the end epoch (exclude) for warmup stage."),
         "importance_regularization_factor": with_attributes(
@@ -386,7 +415,11 @@
         ),
     },
     "additionalProperties": False,
-    "required": ["warmup_start_epoch", "warmup_end_epoch", "importance_regularization_factor"],
+    "required": [
+        "warmup_start_epoch",
+        "warmup_end_epoch",
+        "importance_regularization_factor",
+    ],
 }