fix lookup and add docs

axolotl-ai-cloud · Dec 26, 2024 · 82c62cb · 82c62cb
1 parent 32ab034
commit 82c62cb
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 1 deletion.
diff --git a/docs/lr_groups.qmd b/docs/lr_groups.qmd
@@ -0,0 +1,29 @@
+---
+title: Learning Rate Groups
+description: "Setting different learning rates by module name"
+---
+
+## Background
+
+Inspired by LoRA+, Axolotl allows practitioners to specify separate learning rates for each module or groups of
+modules in a model.
+
+## Example
+
+```yaml
+lr_groups:
+  - name: o_proj
+    modules:
+      - self_attn.o_proj.weight
+    lr: 1e-6
+  - name: q_proj
+    modules:
+      - model.layers.2.self_attn.q_proj.weight
+    lr: 1e-5
+
+learning_rate: 2e-5
+```
+
+In this example, we have a default learning rate of 2e-5 across the entire model, but we have a separate learning rate
+of 1e-6 for all the self attention `o_proj` modules across all layers, and a learning are of 1e-5 to the 3rd layer's
+self attention `q_proj` module.
diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py
@@ -495,7 +495,12 @@ def create_optimizer_grouped_parameters(self, opt_model, optimizer_kwargs):
                 if lr_groups_lookup and any(
                     group_modules in name for group_modules in lr_groups_lookup
                 ):
-                    group_name = lr_groups_lookup[name]
+                    lr_group_module = [
+                        group_modules
+                        for group_modules in lr_groups_lookup
+                        if group_modules in name
+                    ][0]
+                    group_name = lr_groups_lookup[lr_group_module]
                     params[f"to_weight_decay_{group_name}"][name] = param
                 else:
                     params["to_weight_decay"][name] = param