From ac5667471e24434c378781c5400b19d595d05fd8 Mon Sep 17 00:00:00 2001
From: Shivam Sahni <shivam15800@gmail.com>
Date: Mon, 16 Dec 2024 22:01:19 -0800
Subject: [PATCH] fix: correct typos in docstrings (#482)

- Fix 'transfomers' to 'transformers' in mixtral.py
- Fix 'Emebedding' to 'Embedding' in orpo_trainer.py

## Summary
<!--- This is a required section; please describe the main purpose of
this proposed code change. --->

<!---
## Details
This is an optional section; is there anything specific that reviewers
should be aware of?
--->

## Testing Done
<!--- This is a required section; please describe how this change was
tested. --->

<!--
Replace BLANK with your device type. For example, A100-80G-PCIe

Complete the following tasks before sending your PR, and replace `[ ]`
with
`[x]` to indicate you have done them.
-->

- Hardware Type: <BLANK>
- [ ] run `make test` to ensure correctness
- [ ] run `make checkstyle` to ensure code style
- [ ] run `make test-convergence` to ensure convergence

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: byhsu@linkedin.com <byhsu@linkedin.com>
---
 src/liger_kernel/transformers/model/mixtral.py        | 2 +-
 src/liger_kernel/transformers/trainer/orpo_trainer.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/liger_kernel/transformers/model/mixtral.py b/src/liger_kernel/transformers/model/mixtral.py
index 22fea53da..145bc78cd 100644
--- a/src/liger_kernel/transformers/model/mixtral.py
+++ b/src/liger_kernel/transformers/model/mixtral.py
@@ -38,7 +38,7 @@ def lce_forward_deprecated(
     cache_position: Optional[torch.LongTensor] = None,
 ) -> Union[Tuple, MoeCausalLMOutputWithPast]:
     r"""
-    Copy paste Mixtral's forward from transfomers v4.44.2 but replace torch cross entropy with liger fused linear cross entropy
+    Copy paste Mixtral's forward from transformers v4.44.2 but replace torch cross entropy with liger fused linear cross entropy
 
 
     Args:
diff --git a/src/liger_kernel/transformers/trainer/orpo_trainer.py b/src/liger_kernel/transformers/trainer/orpo_trainer.py
index 184430ac1..04391fa5f 100644
--- a/src/liger_kernel/transformers/trainer/orpo_trainer.py
+++ b/src/liger_kernel/transformers/trainer/orpo_trainer.py
@@ -17,7 +17,7 @@ class _FSDPForwardRedirection:
     This is needed in cases where we call a submodule of a FSDP module. For instance, when we want to call only
     the `LlamaModel` part out of a FSDP-wrapped `LlamaForCausalLM` to get the hidden states without involving
     GPU-memory-heavy `lm_head` and cross entropy computation, doing this directly (i.e. `model.model.forward()`)
-    will not work because the first `nn.Emebedding` layer is not independently wrapped as a FSDP module (because of
+    will not work because the first `nn.Embedding` layer is not independently wrapped as a FSDP module (because of
     the transformer-based wrapping policy), and not calling it through FSDP root module forward will not all-gather
     its parameter, thus resulting in "RuntimeError: 'weight' must be 2-D" error. Similarly, if we want to call just
     the `lm_head` part of a model, we need this trick too to properly get its params all-gathered.