Skip to content

Commit

Permalink
Merge pull request #142 from macrocosm-os/dev
Browse files Browse the repository at this point in the history
Release 2.6.0
  • Loading branch information
RusticLuftig authored Dec 5, 2024
2 parents 1237391 + 67bde1f commit c6dce9d
Show file tree
Hide file tree
Showing 22 changed files with 440 additions and 258 deletions.
2 changes: 2 additions & 0 deletions competitions/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ class CompetitionId(IntEnum):

B7_MULTI_CHOICE = 2

INSTRUCT_8B = 3

# Overwrite the default __repr__, which doesn't work with
# bt.logging for some unknown reason.
def __repr__(self) -> str:
Expand Down
102 changes: 88 additions & 14 deletions constants/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,14 @@
from transformers import (
BartForCausalLM,
FalconForCausalLM,
Gemma2ForCausalLM,
GemmaForCausalLM,
GPTNeoXForCausalLM,
LlamaForCausalLM,
MistralForCausalLM,
Phi3ForCausalLM,
PhiForCausalLM,
Qwen2ForCausalLM,
)

from competitions.data import CompetitionId
Expand All @@ -30,7 +33,7 @@
# Project Constants.
# ---------------------------------

__version__ = "2.5.1"
__version__ = "2.6.0"
version_split = __version__.split(".")
__spec_version__ = (
(1000 * int(version_split[0]))
Expand All @@ -45,16 +48,16 @@
# Block the subnet was registered.
GENESIS_BLOCK = 3138611
# Define the number of blocks per vali "sync". This cadence is used to align validator behavior for better vtrust.
SYNC_BLOCK_CADENCE = 180
SYNC_BLOCK_CADENCE = 270
# Rough estimate of the number of seconds per block.
SECONDS_PER_BLOCK = 12
# Validator weight moving average term.
# At 0.9 a model will go from 0 -> 0.190 in 2 cycles and from 0 -> 0.83 in 17 cycles.
ALPHA = 0.9
# At 0.85 a model will go from 0 -> 0.278 in 2 cycles and from 0 -> 0.833 in 11 cycles.
ALPHA = 0.85
# Any miners with a combined competition weight below this threshold will instead receive 0 weight.
# This is intended to help vtrust in conjunction with a low alpha by handling the tail ends.
# At 1 eval per 180 blocks, newly winning models will start recieving weight after ~360 blocks.
# Previously winning models will phase out after ~3060 blocks, at which point only the new winner will have weight.
# At 1 eval per 270 blocks, newly winning models will start recieving weight after ~540 blocks.
# Previously winning models will phase out after ~2970 blocks, at which point only the new winner will have weight.
MIN_WEIGHT_THRESHOLD = 0.18

# The validator WANDB project.
Expand All @@ -75,7 +78,8 @@
WEIGHT_SYNC_VALI_MIN_STAKE = 100_000
# Minimum percent of weight on a vali for a miner to be considered a top miner.
# Since there can be multiple competitions at different reward percentages we can't just check biggest.
WEIGHT_SYNC_MINER_MIN_PERCENT = 0.10
# Since we only set weights per competition with a threshold of 0.18 we can just take any percent here.
WEIGHT_SYNC_MINER_MIN_PERCENT = 0.01
# The root directory of this project.
ROOT_DIR = Path(__file__).parent.parent
# The maximum bytes for the hugging face repo.
Expand All @@ -98,7 +102,7 @@
kwargs={
"torch_dtype": torch.bfloat16,
},
eval_block_delay=1200, # ~4 hours.
eval_block_delay=1600, # ~5 hours.
norm_validation_constraints=NormValidationConstraints(
norm_eps_soft=200,
norm_eps_soft_percent_threshold=0.15,
Expand All @@ -107,10 +111,36 @@
epsilon_func=LinearDecay(0.05, 0.01, 7200 * 5), # Decay over ~5 days.
max_bytes=15 * 1024 * 1024 * 1024,
),
CompetitionId.INSTRUCT_8B: ModelConstraints(
max_model_parameter_size=8_100_000_000,
sequence_length=4096,
allowed_architectures=[
BartForCausalLM,
FalconForCausalLM,
Gemma2ForCausalLM,
GemmaForCausalLM,
GPTNeoXForCausalLM,
LlamaForCausalLM,
MistralForCausalLM,
Phi3ForCausalLM,
PhiForCausalLM,
],
tokenizer=None, # Any tokenizer can be used.
kwargs={
"torch_dtype": torch.bfloat16,
},
eval_block_delay=1600, # ~5 hours.
norm_validation_constraints=NormValidationConstraints(
norm_eps_soft=200,
norm_eps_soft_percent_threshold=0.15,
norm_eps_hard=1000,
),
epsilon_func=LinearDecay(0.05, 0.01, 7200 * 5), # Decay over ~5 days.
max_bytes=20 * (1024**3),
),
}

# Block to start including fineweb data.
IF_EVAL_BLOCK = 4_344_030
INSTRUCT_8B_BLOCK = 4_451_695

# Schedule of competitions by block.
COMPETITION_SCHEDULE_BY_BLOCK: List[Tuple[int, List[Competition]]] = [
Expand All @@ -127,7 +157,7 @@
method_id=EvalMethodId.MULTIPLE_CHOICE,
dataset_id=DatasetId.SYNTHETIC_MMLU,
normalization_id=NormalizationId.NONE,
weight=0.9,
weight=0.85,
),
EvalTask(
name="WORD_SORTING",
Expand All @@ -145,24 +175,31 @@
normalization_kwargs={"ceiling": 20.0},
weight=0.05,
),
EvalTask(
name="IF_EVAL_V1",
method_id=EvalMethodId.IF_EVAL,
dataset_id=DatasetId.SYNTHETIC_IF_EVAL,
normalization_id=NormalizationId.NONE,
weight=0.05,
),
],
),
],
),
(
IF_EVAL_BLOCK,
INSTRUCT_8B_BLOCK,
[
Competition(
CompetitionId.B7_MULTI_CHOICE,
MODEL_CONSTRAINTS_BY_COMPETITION_ID[CompetitionId.B7_MULTI_CHOICE],
1.0,
0.9,
eval_tasks=[
EvalTask(
name="SYNTHETIC_MMLU",
method_id=EvalMethodId.MULTIPLE_CHOICE,
dataset_id=DatasetId.SYNTHETIC_MMLU,
normalization_id=NormalizationId.NONE,
weight=0.85,
weight=0.8,
),
EvalTask(
name="WORD_SORTING",
Expand All @@ -178,8 +215,45 @@
dataset_id=DatasetId.FINEWEB,
normalization_id=NormalizationId.INVERSE_EXPONENTIAL,
normalization_kwargs={"ceiling": 20.0},
weight=0.1,
),
EvalTask(
name="IF_EVAL_V1",
method_id=EvalMethodId.IF_EVAL,
dataset_id=DatasetId.SYNTHETIC_IF_EVAL,
normalization_id=NormalizationId.NONE,
weight=0.05,
),
],
),
Competition(
CompetitionId.INSTRUCT_8B,
MODEL_CONSTRAINTS_BY_COMPETITION_ID[CompetitionId.INSTRUCT_8B],
0.1,
eval_tasks=[
EvalTask(
name="SYNTHETIC_MMLU",
method_id=EvalMethodId.MULTIPLE_CHOICE,
dataset_id=DatasetId.SYNTHETIC_MMLU,
normalization_id=NormalizationId.NONE,
weight=0.8,
),
EvalTask(
name="WORD_SORTING",
method_id=EvalMethodId.REFERENCE_LOSS,
dataset_id=DatasetId.WORD_SORTING,
normalization_id=NormalizationId.INVERSE_EXPONENTIAL,
normalization_kwargs={"ceiling": 40.0},
weight=0.05,
),
EvalTask(
name="FINEWEB",
method_id=EvalMethodId.TEXT_LOSS,
dataset_id=DatasetId.FINEWEB,
normalization_id=NormalizationId.INVERSE_EXPONENTIAL,
normalization_kwargs={"ceiling": 20.0},
weight=0.1,
),
EvalTask(
name="IF_EVAL_V1",
method_id=EvalMethodId.IF_EVAL,
Expand Down
14 changes: 14 additions & 0 deletions docs/competitions.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,20 @@ Models submitted to this competition are evaluated on a set of evaluation tasks,

[Code Link](https://github.com/macrocosm-os/finetuning/blob/94e8fd92ab4158e1e4a425a9562695eebafa27b1/constants/__init__.py#L128)

## Competition INSTRUCT_8B:

### Goal

The goal of this competition is to train a SOTA instruct 8B model. This competition provides more freedom to miners than other competitions: there are no restrictions on the tokenizer used and miners are allowed to use a wider range of architectures.

### Evaluation

The evaluation tasks are the same as the B7_MULTICHOICE competition

### Definitions

TODO: Fill in post check-in

# Deprecated Competitions

## Competition 1: SN9_MODEL
Expand Down
6 changes: 3 additions & 3 deletions docs/examples.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@
")\n",
"\n",
"# Move the model to the appropriate device and set to eval mode.\n",
"model.to(device)\n",
"model.eval()\n",
"model.pt_model.to(device)\n",
"model.pt_model.eval()\n",
"\n",
"# Load the competition so we can load the right tokenizer.\n",
"metagraph = bt.metagraph(constants.SUBNET_UID)\n",
Expand Down Expand Up @@ -134,7 +134,7 @@
" pad_token_id=tokenizer.eos_token_id,\n",
")\n",
"response = ft.eval.method.generate_output(\n",
" model=model,\n",
" model=model.pt_model,\n",
" input_ids=input_ids,\n",
" generation_config=generation_config,\n",
" device=device,\n",
Expand Down
10 changes: 6 additions & 4 deletions finetune/datasets/factory.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,24 @@
from typing import Any, Dict, Set

from finetune.datasets.generated.dyck_loader import DyckLoader
from finetune.datasets.generated.if_eval_loader import IFEvalLoader
from finetune.datasets.generated.word_sorting_loader import WordSortingLoader
from finetune.datasets.hugging_face.hugging_face_loader import (
HuggingFaceLoader,
FINEWEB_EDU_SCORE_2_NAME,
HuggingFaceLoader,
)
from finetune.datasets.ids import DatasetId
from typing import Dict, Any, Set
from finetune.datasets.loader import DatasetLoader


class DatasetLoader:
class DatasetLoaderFactory:
@staticmethod
def get_loader(
dataset_id: DatasetId,
dataset_kwargs: Dict[str, Any],
seed: int,
validator_hotkeys: Set[str],
) -> "DatasetLoader":
) -> DatasetLoader:
"""Loads data samples from the appropriate dataset."""

match dataset_id:
Expand Down
4 changes: 3 additions & 1 deletion finetune/datasets/generated/dyck_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import torch
from transformers import PreTrainedTokenizerBase

from finetune.datasets.loader import DatasetLoader

# Characters to use in the dycks.
DYCK_CHARACTER_PAIRS = [("<", ">"), ("[", "]"), ("{", "}"), ("(", ")")]
DYCK_ENDING_CHARS = [x[1] for x in DYCK_CHARACTER_PAIRS]
Expand Down Expand Up @@ -69,7 +71,7 @@ def generate_dyck(
return dyck_word


class DyckLoader:
class DyckLoader(DatasetLoader):
def __init__(
self,
dyck_character_pairs: typing.List[
Expand Down
3 changes: 2 additions & 1 deletion finetune/datasets/generated/if_eval_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@
from transformers import PreTrainedTokenizerBase

from finetune.datasets.generated.mmlu_parser import extract_q_and_a_text
from finetune.datasets.loader import DatasetLoader
from finetune.datasets.subnet.prompting_subset_loader import PromptingSubsetLoader
from finetune.eval.if_eval import rule_factory
from finetune.eval.if_eval.sample import IFEvalTokenizedSample


class IFEvalLoader:
class IFEvalLoader(DatasetLoader):
"""Generates samples for the IfEval task."""

# The min/max number of rules per sample.
Expand Down
4 changes: 3 additions & 1 deletion finetune/datasets/generated/word_sorting_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import torch
from transformers import PreTrainedTokenizerBase

from finetune.datasets.loader import DatasetLoader

try:
from nltk.corpus import words
except:
Expand All @@ -28,7 +30,7 @@
WORD_SORTING_CHALLENGE_PROMPT = "Sort the following words alphabetically: "


class WordSortingLoader:
class WordSortingLoader(DatasetLoader):
def __init__(
self,
min_word_count: int = 2,
Expand Down
4 changes: 3 additions & 1 deletion finetune/datasets/hugging_face/hugging_face_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@
import bittensor as bt
from transformers import PreTrainedTokenizerBase

from finetune.datasets.loader import DatasetLoader


FINEWEB_EDU_SCORE_2_NAME = "HuggingFaceFW/fineweb-edu-score-2"
FALCON_NAME = "tiiuae/falcon-refinedweb"


class HuggingFaceLoader:
class HuggingFaceLoader(DatasetLoader):
rows_base_url: str = "https://datasets-server.huggingface.co/rows"
size_base_url: str = "https://datasets-server.huggingface.co/size"

Expand Down
24 changes: 24 additions & 0 deletions finetune/datasets/loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import abc
from typing import List

from transformers import PreTrainedTokenizerBase

from finetune.eval.sample import EvalSample


class DatasetLoader(abc.ABC):
"""Base class for dataset loaders."""

@abc.abstractmethod
def tokenize(
self, tokenizer: PreTrainedTokenizerBase, sequence_length: int
) -> List[EvalSample]:
pass

@abc.abstractmethod
def __iter__(self):
pass

@abc.abstractmethod
def __len__(self):
pass
3 changes: 2 additions & 1 deletion finetune/datasets/subnet/prompting_subset_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from transformers import PreTrainedTokenizerBase

import constants
from finetune.datasets.loader import DatasetLoader
from finetune.datasets.subnet.history_scan import SampledHistoryScan

# Multiple choice answers for the prompting subnet.
Expand All @@ -36,7 +37,7 @@
EARLIEST_DATE = dt.datetime(2024, 8, 29, tzinfo=dt.timezone.utc)


class PromptingSubsetLoader:
class PromptingSubsetLoader(DatasetLoader):
@staticmethod
def _get_filters(
validator_hotkeys: typing.List[str],
Expand Down
Loading

0 comments on commit c6dce9d

Please sign in to comment.