Skip to content

Commit

Permalink
apply pre-commit and add missing close-paren to mamba config (#1270)
Browse files Browse the repository at this point in the history
  • Loading branch information
Quentin-Anthony authored Sep 8, 2024
1 parent 0d4bdb9 commit ec82c05
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 17 deletions.
1 change: 1 addition & 0 deletions configs/mamba/mamba-130M.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,4 @@
"steps_per_print": 10,
"keep_last_n_checkpoints": 4,
"wall_clock_breakdown": true,
}
12 changes: 6 additions & 6 deletions megatron/data/helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -428,9 +428,9 @@ py::array build_mapping_impl(const py::array_t<int64_t>& docs_,
}

} // for (auto sent_index=sent_index_first; ...
} // if (num_remain_sent > 1) {
} // for (int doc=0; doc < num_docs; ++doc) {
} // for (int epoch=0; epoch < num_epochs; ++epoch) {
} // if (num_remain_sent > 1) {
} // for (int doc=0; doc < num_docs; ++doc) {
} // for (int epoch=0; epoch < num_epochs; ++epoch) {

if (!second) {
if (verbose) {
Expand Down Expand Up @@ -660,9 +660,9 @@ py::array build_blocks_mapping_impl(const py::array_t<int64_t>& docs_,
num_sent = 0;
}
} // for (auto sent_index=sent_index_first; ...
} // if (num_remain_sent > 1) {
} // for (int doc=0; doc < num_docs; ++doc) {
} // for (int epoch=0; epoch < num_epochs; ++epoch) {
} // if (num_remain_sent > 1) {
} // for (int doc=0; doc < num_docs; ++doc) {
} // for (int epoch=0; epoch < num_epochs; ++epoch) {

if (!second) {
if (verbose) {
Expand Down
14 changes: 10 additions & 4 deletions megatron/model/mamba/mamba.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
from causal_conv1d import causal_conv1d_fn
import einops
except ModuleNotFoundError:
print( "Unable to import Mamba kernels. Install them from our requirements/requirements-mamba.txt, \
or directly from https://github.com/state-spaces/mamba")
print(
"Unable to import Mamba kernels. Install them from our requirements/requirements-mamba.txt, \
or directly from https://github.com/state-spaces/mamba"
)
pass

from megatron.model.norms import get_norm
Expand Down Expand Up @@ -44,7 +46,9 @@ def __init__(
neox_args.mamba_use_bias_in_linears and neox_args.mamba_inner_func_fusion
), "Mamba fused inner fn and bias in x_proj not compatible!"

assert neox_args.intermediate_size == None or neox_args.expansion_factor == None, "Must pass either the absolute intermediate size or the relative expansion factor for the mamba projections"
assert (
neox_args.intermediate_size == None or neox_args.expansion_factor == None
), "Must pass either the absolute intermediate size or the relative expansion factor for the mamba projections"

# set variables, mostly following mamba defaults
self.d_model = neox_args.hidden_size
Expand All @@ -53,7 +57,9 @@ def __init__(
if neox_args.intermediate_size:
self.d_inner = neox_args.intermediate_size
else:
self.expand = neox_args.expansion_factor if neox_args.expansion_factor else 2
self.expand = (
neox_args.expansion_factor if neox_args.expansion_factor else 2
)
self.d_inner = int(self.expand * self.d_model)
self.dt_rank = math.ceil(self.d_model / 16) # rank of dt / Delta parameter
self.dt_scale = 1.0
Expand Down
8 changes: 6 additions & 2 deletions megatron/model/rwkv/v6/rwkv.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,13 +275,17 @@ def __init__(self, neox_args, layer_number):
self.layer_number = layer_number
self.fp16 = neox_args.precision == "fp16"
self.bf16 = neox_args.precision == "bfloat16"
assert neox_args.intermediate_size == None or neox_args.expansion_factor == None, "Must pass either the absolute intermediate size or the relative expansion factor for the mamba projections"
assert (
neox_args.intermediate_size == None or neox_args.expansion_factor == None
), "Must pass either the absolute intermediate size or the relative expansion factor for the mamba projections"
if not hasattr(neox_args, "dim_att"):
neox_args.dim_att = neox_args.hidden_size
if neox_args.intermediate_size:
neox_args.ffn_dim = neox_args.intermediate_size
else:
self.expand = neox_args.expansion_factor if neox_args.expansion_factor else 3.5
self.expand = (
neox_args.expansion_factor if neox_args.expansion_factor else 3.5
)
neox_args.ffn_dim = int(self.expand * neox_args.hidden_size)
# Make hidden size 3.5x by default. Round to nearest multiple of 32 until we add hdim rounding logic
neox_args.ffn_dim = int(neox_args.ffn_dim // 32 * 32)
Expand Down
10 changes: 8 additions & 2 deletions megatron/model/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,9 @@ def __init__(
MoE_mp_size=1,
):
super().__init__()
assert neox_args.intermediate_size == None or neox_args.expansion_factor == None, "Must pass either the absolute intermediate size or the relative expansion factor for the mamba projections"
assert (
neox_args.intermediate_size == None or neox_args.expansion_factor == None
), "Must pass either the absolute intermediate size or the relative expansion factor for the mamba projections"

self.activation_func, self.is_gated = get_activation(neox_args)
self.activation_type = neox_args.activation
Expand Down Expand Up @@ -1230,7 +1232,11 @@ def forward(self, x, attention_mask, layer_past=None):
raise KeyError(self.moe_type)

with torch.enable_grad():
if self.activation == "swiglu" or self.num_experts > 1 and self.moe_type == "deepspeed":
if (
self.activation == "swiglu"
or self.num_experts > 1
and self.moe_type == "deepspeed"
):
# No dropout either
assert mlp_bias is None
output = mlp_output + attention_output
Expand Down
4 changes: 3 additions & 1 deletion megatron/neox_arguments/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -1187,7 +1187,9 @@ def validate_values(self):
return False

# Checks.
if self.hidden_size % self.num_attention_heads != 0 and not ("mamba" in self.attention_config):
if self.hidden_size % self.num_attention_heads != 0 and not (
"mamba" in self.attention_config
):
error_message = (
self.__class__.__name__
+ ".validate_values() hidden_size must be divisible by num_attention_heads"
Expand Down
6 changes: 4 additions & 2 deletions megatron/tokenizer/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@ def build_tokenizer(args):
"""Initialize tokenizer."""
if args.rank == 0:
print("> building {} tokenizer ...".format(args.tokenizer_type), flush=True)

assert args.tokenizer_type is not None, "tokenizer_type must be specified in the .yml config"

assert (
args.tokenizer_type is not None
), "tokenizer_type must be specified in the .yml config"

# Select and instantiate the tokenizer.
if args.tokenizer_type.lower() == "GPT2BPETokenizer".lower():
Expand Down

0 comments on commit ec82c05

Please sign in to comment.