From 83660e912ad2622d3bc67ccdf7291242d2bd8a3c Mon Sep 17 00:00:00 2001
From: rusticluftig <rusticluftig@gmail.com>
Date: Fri, 29 Nov 2024 14:08:59 -0800
Subject: [PATCH 01/26] Use per-model tokenizers for eval

---
 finetune/datasets/factory.py                  | 10 ++--
 finetune/datasets/generated/dyck_loader.py    |  4 +-
 finetune/datasets/generated/if_eval_loader.py |  3 +-
 .../datasets/generated/word_sorting_loader.py |  4 +-
 finetune/datasets/loader.py                   | 24 +++++++++
 .../subnet/prompting_subset_loader.py         |  3 +-
 finetune/validation.py                        |  6 ++-
 neurons/validator.py                          | 50 ++++++++++++++-----
 8 files changed, 81 insertions(+), 23 deletions(-)
 create mode 100644 finetune/datasets/loader.py

diff --git a/finetune/datasets/factory.py b/finetune/datasets/factory.py
index d257d18..ab91f73 100644
--- a/finetune/datasets/factory.py
+++ b/finetune/datasets/factory.py
@@ -1,22 +1,24 @@
+from typing import Any, Dict, Set
+
 from finetune.datasets.generated.dyck_loader import DyckLoader
 from finetune.datasets.generated.if_eval_loader import IFEvalLoader
 from finetune.datasets.generated.word_sorting_loader import WordSortingLoader
 from finetune.datasets.hugging_face.hugging_face_loader import (
-    HuggingFaceLoader,
     FINEWEB_EDU_SCORE_2_NAME,
+    HuggingFaceLoader,
 )
 from finetune.datasets.ids import DatasetId
-from typing import Dict, Any, Set
+from finetune.datasets.loader import DatasetLoader
 
 
-class DatasetLoader:
+class DatasetLoaderFactory:
     @staticmethod
     def get_loader(
         dataset_id: DatasetId,
         dataset_kwargs: Dict[str, Any],
         seed: int,
         validator_hotkeys: Set[str],
-    ) -> "DatasetLoader":
+    ) -> DatasetLoader:
         """Loads data samples from the appropriate dataset."""
 
         match dataset_id:
diff --git a/finetune/datasets/generated/dyck_loader.py b/finetune/datasets/generated/dyck_loader.py
index da4272d..ba52b3f 100644
--- a/finetune/datasets/generated/dyck_loader.py
+++ b/finetune/datasets/generated/dyck_loader.py
@@ -19,6 +19,8 @@
 import torch
 from transformers import PreTrainedTokenizerBase
 
+from finetune.datasets.loader import DatasetLoader
+
 # Characters to use in the dycks.
 DYCK_CHARACTER_PAIRS = [("<", ">"), ("[", "]"), ("{", "}"), ("(", ")")]
 DYCK_ENDING_CHARS = [x[1] for x in DYCK_CHARACTER_PAIRS]
@@ -69,7 +71,7 @@ def generate_dyck(
     return dyck_word
 
 
-class DyckLoader:
+class DyckLoader(DatasetLoader):
     def __init__(
         self,
         dyck_character_pairs: typing.List[
diff --git a/finetune/datasets/generated/if_eval_loader.py b/finetune/datasets/generated/if_eval_loader.py
index 225abdb..4ae514a 100644
--- a/finetune/datasets/generated/if_eval_loader.py
+++ b/finetune/datasets/generated/if_eval_loader.py
@@ -8,12 +8,13 @@
 from transformers import PreTrainedTokenizerBase
 
 from finetune.datasets.generated.mmlu_parser import extract_q_and_a_text
+from finetune.datasets.loader import DatasetLoader
 from finetune.datasets.subnet.prompting_subset_loader import PromptingSubsetLoader
 from finetune.eval.if_eval import rule_factory
 from finetune.eval.if_eval.sample import IFEvalTokenizedSample
 
 
-class IFEvalLoader:
+class IFEvalLoader(DatasetLoader):
     """Generates samples for the IfEval task."""
 
     # The min/max number of rules per sample.
diff --git a/finetune/datasets/generated/word_sorting_loader.py b/finetune/datasets/generated/word_sorting_loader.py
index 9707404..575bc22 100644
--- a/finetune/datasets/generated/word_sorting_loader.py
+++ b/finetune/datasets/generated/word_sorting_loader.py
@@ -20,6 +20,8 @@
 import torch
 from transformers import PreTrainedTokenizerBase
 
+from finetune.datasets.loader import DatasetLoader
+
 try:
     from nltk.corpus import words
 except:
@@ -28,7 +30,7 @@
 WORD_SORTING_CHALLENGE_PROMPT = "Sort the following words alphabetically: "
 
 
-class WordSortingLoader:
+class WordSortingLoader(DatasetLoader):
     def __init__(
         self,
         min_word_count: int = 2,
diff --git a/finetune/datasets/loader.py b/finetune/datasets/loader.py
new file mode 100644
index 0000000..115fd98
--- /dev/null
+++ b/finetune/datasets/loader.py
@@ -0,0 +1,24 @@
+import abc
+from typing import List
+
+from transformers import PreTrainedTokenizerBase
+
+from finetune.eval.sample import EvalSample
+
+
+class DatasetLoader(abc.ABC):
+    """Base class for dataset loaders."""
+
+    @abc.abstractmethod
+    def tokenize(
+        self, tokenizer: PreTrainedTokenizerBase, sequence_length: int
+    ) -> List[EvalSample]:
+        pass
+
+    @abc.abstractmethod
+    def __iter__(self):
+        pass
+
+    @abc.abstractmethod
+    def __len__(self):
+        pass
diff --git a/finetune/datasets/subnet/prompting_subset_loader.py b/finetune/datasets/subnet/prompting_subset_loader.py
index e9a7f40..0fe2360 100644
--- a/finetune/datasets/subnet/prompting_subset_loader.py
+++ b/finetune/datasets/subnet/prompting_subset_loader.py
@@ -27,6 +27,7 @@
 from transformers import PreTrainedTokenizerBase
 
 import constants
+from finetune.datasets.loader import DatasetLoader
 from finetune.datasets.subnet.history_scan import SampledHistoryScan
 
 # Multiple choice answers for the prompting subnet.
@@ -36,7 +37,7 @@
 EARLIEST_DATE = dt.datetime(2024, 8, 29, tzinfo=dt.timezone.utc)
 
 
-class PromptingSubsetLoader:
+class PromptingSubsetLoader(DatasetLoader):
     @staticmethod
     def _get_filters(
         validator_hotkeys: typing.List[str],
diff --git a/finetune/validation.py b/finetune/validation.py
index 19bd835..7144921 100644
--- a/finetune/validation.py
+++ b/finetune/validation.py
@@ -135,7 +135,6 @@ class ScoreDetails:
 
 def score_model(
     model,
-    tokenizer: transformers.PreTrainedTokenizer,
     evals: typing.List[EvalTask],
     samples: typing.List[typing.List[EvalSample]],
     competition: Competition,
@@ -145,7 +144,6 @@ def score_model(
 
     Args:
         model (torch.nn.Module): The model to score.
-        tokenizer (transformers.PreTrainedTokenizer): The tokenizer to use for tokenization.
         evals (list): A list of EvalTasks to score the model on.
         samples (list): A list of samples to use for scoring for the eval tasks. Must be the same length as evals.
         competition (Competition): The competition to score the model for.
@@ -157,12 +155,16 @@ def score_model(
     if len(evals) != len(samples):
         raise ValueError("Number of eval tasks and samples must match.")
 
+    if not model.tokenizer:
+        raise ValueError("Model does not have a tokenizer")
+
     with torch.inference_mode():
         model.to(device)
         model.eval()
 
         score = 0
         score_details = {task.name: ScoreDetails() for task in evals}
+        tokenizer = model.tokenizer
 
         for task, samples in zip(evals, samples):
             bt.logging.trace(f"Scoring model on task: {task.name}")
diff --git a/neurons/validator.py b/neurons/validator.py
index bba3884..194465c 100644
--- a/neurons/validator.py
+++ b/neurons/validator.py
@@ -23,8 +23,9 @@
 from retry import retry
 from taoverse.model.eval.task import EvalTask
 
-from finetune.datasets.factory import DatasetLoader
+from finetune.datasets.factory import DatasetLoaderFactory
 from finetune.datasets.ids import DatasetId
+from finetune.datasets.loader import DatasetLoader
 from finetune.eval.sample import EvalSample
 from finetune.validation import ScoreDetails
 
@@ -945,13 +946,17 @@ async def run_step(self):
 
         # Pull the latest sample data based on the competition.
         load_data_perf = PerfMonitor("Eval: Load data")
-        # Tokenize the data into batches for use in evaluation.
-        # If custom tokenizers are allowed this will need to be done on a per uid basis instead.
-        tokenizer = ft.model.load_tokenizer(
-            competition.constraints, cache_dir=self.config.model_dir
-        )
+
+        use_default_tokenizer = False
+        if competition.constraints.tokenizer:
+            tokenizer = ft.model.load_tokenizer(
+                competition.constraints, cache_dir=self.config.model_dir
+            )
+            use_default_tokenizer = True
+
         seed = self._get_seed(sync_block)
         eval_tasks: typing.List[EvalTask] = []
+        data_loaders: typing.List[DatasetLoader] = []
         samples: typing.List[typing.List[EvalSample]] = []
 
         # Load data based on the competition.
@@ -973,7 +978,7 @@ async def run_step(self):
                         vali_hotkeys,
                     )
                 else:
-                    data_loader = DatasetLoader.get_loader(
+                    data_loader = DatasetLoaderFactory.get_loader(
                         dataset_id=eval_task.dataset_id,
                         dataset_kwargs=eval_task.dataset_kwargs,
                         seed=seed,
@@ -982,11 +987,14 @@ async def run_step(self):
 
                 if data_loader:
                     eval_tasks.append(eval_task)
-                    samples.append(
-                        data_loader.tokenize(
-                            tokenizer, competition.constraints.sequence_length
+                    data_loaders.append(data_loader)
+                    if use_default_tokenizer:
+                        assert tokenizer is not None
+                        samples.append(
+                            data_loader.tokenize(
+                                tokenizer, competition.constraints.sequence_length
+                            )
                         )
-                    )
 
         # Compute model score on batches.
         bt.logging.debug(
@@ -1035,13 +1043,29 @@ async def run_step(self):
                             hotkey, model_i_metadata.id, kwargs
                         )
 
+                        # If the competition defines a default tokenizer, set it here.
+                        if use_default_tokenizer:
+                            model_i.tokenizer = tokenizer
+                        else:
+                            if not model_i.tokenizer:
+                                raise ValueError(
+                                    f"Model {uid_i} does not have a tokenizer."
+                                )
+
+                            samples = [
+                                loader.tokenize(
+                                    model_i.tokenizer,
+                                    competition.constraints.sequence_length,
+                                )
+                                for loader in data_loaders
+                            ]
+
                     with compute_score_perf.sample():
                         # Run each computation in a subprocess so that the GPU is reset between each model.
                         score, score_details = utils.run_in_subprocess(
                             functools.partial(
                                 ft.validation.score_model,
-                                model_i.pt_model,
-                                tokenizer,
+                                model_i,
                                 eval_tasks,
                                 samples,
                                 competition,

From 897abcc4fabea13fd146df9afbdcb980a30f7322 Mon Sep 17 00:00:00 2001
From: rusticluftig <rusticluftig@gmail.com>
Date: Fri, 29 Nov 2024 14:22:00 -0800
Subject: [PATCH 02/26] Minor clean up

---
 finetune/datasets/hugging_face/hugging_face_loader.py    | 4 +++-
 neurons/validator.py                                     | 2 +-
 tests/finetune/datasets/generated/test_if_eval_loader.py | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/finetune/datasets/hugging_face/hugging_face_loader.py b/finetune/datasets/hugging_face/hugging_face_loader.py
index 88efcba..7b2c2ea 100644
--- a/finetune/datasets/hugging_face/hugging_face_loader.py
+++ b/finetune/datasets/hugging_face/hugging_face_loader.py
@@ -22,12 +22,14 @@
 import bittensor as bt
 from transformers import PreTrainedTokenizerBase
 
+from finetune.datasets.loader import DatasetLoader
+
 
 FINEWEB_EDU_SCORE_2_NAME = "HuggingFaceFW/fineweb-edu-score-2"
 FALCON_NAME = "tiiuae/falcon-refinedweb"
 
 
-class HuggingFaceLoader:
+class HuggingFaceLoader(DatasetLoader):
     rows_base_url: str = "https://datasets-server.huggingface.co/rows"
     size_base_url: str = "https://datasets-server.huggingface.co/size"
 
diff --git a/neurons/validator.py b/neurons/validator.py
index 194465c..e9cbce0 100644
--- a/neurons/validator.py
+++ b/neurons/validator.py
@@ -989,7 +989,7 @@ async def run_step(self):
                     eval_tasks.append(eval_task)
                     data_loaders.append(data_loader)
                     if use_default_tokenizer:
-                        assert tokenizer is not None
+                        assert tokenizer
                         samples.append(
                             data_loader.tokenize(
                                 tokenizer, competition.constraints.sequence_length
diff --git a/tests/finetune/datasets/generated/test_if_eval_loader.py b/tests/finetune/datasets/generated/test_if_eval_loader.py
index f28225c..d41b385 100644
--- a/tests/finetune/datasets/generated/test_if_eval_loader.py
+++ b/tests/finetune/datasets/generated/test_if_eval_loader.py
@@ -8,7 +8,7 @@
 
 class TestIFEvalLoader(unittest.TestCase):
     def setUp(self):
-        self.loader = IFEvalLoader(random_seed=42, max_samples=100)
+        self.loader = IFEvalLoader(random_seed=420, max_samples=100)
 
     def test_uniform_distribution_of_rules(self):
         rule_counts = [len(sample.rules) for sample in self.loader]

From 8f6f15540f8e439a3cf5caa3ba87657f3065317d Mon Sep 17 00:00:00 2001
From: rusticluftig <rusticluftig@gmail.com>
Date: Fri, 29 Nov 2024 15:03:23 -0800
Subject: [PATCH 03/26] Use taoverse 1.2.0

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index a612438..90da8a1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,4 +8,4 @@ safetensors
 torch==2.3.1
 transformers==4.44.1
 wandb==0.18.0
-taoverse==1.1.1
+taoverse==1.2.0

From 47c3ff443c62534f2737bb1cb794055f6cf46359 Mon Sep 17 00:00:00 2001
From: rusticluftig <rusticluftig@gmail.com>
Date: Fri, 29 Nov 2024 15:47:53 -0800
Subject: [PATCH 04/26] Define the 8B instruct competition

---
 competitions/data.py  |  2 +
 constants/__init__.py | 89 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 83 insertions(+), 8 deletions(-)

diff --git a/competitions/data.py b/competitions/data.py
index 0a0afcc..d001a99 100644
--- a/competitions/data.py
+++ b/competitions/data.py
@@ -11,6 +11,8 @@ class CompetitionId(IntEnum):
 
     B7_MULTI_CHOICE = 2
 
+    INSTRUCT_8B = 3
+
     # Overwrite the default __repr__, which doesn't work with
     # bt.logging for some unknown reason.
     def __repr__(self) -> str:
diff --git a/constants/__init__.py b/constants/__init__.py
index 9da1130..d25ac10 100644
--- a/constants/__init__.py
+++ b/constants/__init__.py
@@ -15,11 +15,14 @@
 from transformers import (
     BartForCausalLM,
     FalconForCausalLM,
+    Gemma2ForCausalLM,
     GemmaForCausalLM,
     GPTNeoXForCausalLM,
     LlamaForCausalLM,
     MistralForCausalLM,
+    Phi3ForCausalLM,
     PhiForCausalLM,
+    Qwen2ForCausalLM,
 )
 
 from competitions.data import CompetitionId
@@ -30,7 +33,7 @@
 # Project Constants.
 # ---------------------------------
 
-__version__ = "2.5.1"
+__version__ = "2.5.2"
 version_split = __version__.split(".")
 __spec_version__ = (
     (1000 * int(version_split[0]))
@@ -107,10 +110,36 @@
         epsilon_func=LinearDecay(0.05, 0.01, 7200 * 5),  # Decay over ~5 days.
         max_bytes=15 * 1024 * 1024 * 1024,
     ),
+    CompetitionId.INSTRUCT_8B: ModelConstraints(
+        max_model_parameter_size=8_100_000_000,
+        sequence_length=4096,
+        allowed_architectures=[
+            BartForCausalLM,
+            FalconForCausalLM,
+            Gemma2ForCausalLM,
+            GemmaForCausalLM,
+            GPTNeoXForCausalLM,
+            LlamaForCausalLM,
+            MistralForCausalLM,
+            Phi3ForCausalLM,
+            PhiForCausalLM,
+            Qwen2ForCausalLM,
+        ],
+        kwargs={
+            "torch_dtype": torch.bfloat16,
+        },
+        eval_block_delay=1200,  # ~4 hours.
+        norm_validation_constraints=NormValidationConstraints(
+            norm_eps_soft=200,
+            norm_eps_soft_percent_threshold=0.15,
+            norm_eps_hard=1000,
+        ),
+        epsilon_func=LinearDecay(0.05, 0.01, 7200 * 5),  # Decay over ~5 days.
+        max_bytes=20 * (1024**3),
+    ),
 }
 
-# Block to start including fineweb data.
-IF_EVAL_BLOCK = 4_344_030
+INSTRUCT_8B_BLOCK = 4_423_335
 
 # Schedule of competitions by block.
 COMPETITION_SCHEDULE_BY_BLOCK: List[Tuple[int, List[Competition]]] = [
@@ -127,7 +156,7 @@
                         method_id=EvalMethodId.MULTIPLE_CHOICE,
                         dataset_id=DatasetId.SYNTHETIC_MMLU,
                         normalization_id=NormalizationId.NONE,
-                        weight=0.9,
+                        weight=0.85,
                     ),
                     EvalTask(
                         name="WORD_SORTING",
@@ -145,24 +174,31 @@
                         normalization_kwargs={"ceiling": 20.0},
                         weight=0.05,
                     ),
+                    EvalTask(
+                        name="IF_EVAL_V1",
+                        method_id=EvalMethodId.IF_EVAL,
+                        dataset_id=DatasetId.SYNTHETIC_IF_EVAL,
+                        normalization_id=NormalizationId.NONE,
+                        weight=0.05,
+                    ),
                 ],
             ),
         ],
     ),
     (
-        IF_EVAL_BLOCK,
+        INSTRUCT_8B_BLOCK,
         [
             Competition(
                 CompetitionId.B7_MULTI_CHOICE,
                 MODEL_CONSTRAINTS_BY_COMPETITION_ID[CompetitionId.B7_MULTI_CHOICE],
-                1.0,
+                0.9,
                 eval_tasks=[
                     EvalTask(
                         name="SYNTHETIC_MMLU",
                         method_id=EvalMethodId.MULTIPLE_CHOICE,
                         dataset_id=DatasetId.SYNTHETIC_MMLU,
                         normalization_id=NormalizationId.NONE,
-                        weight=0.85,
+                        weight=0.75,
                     ),
                     EvalTask(
                         name="WORD_SORTING",
@@ -178,15 +214,52 @@
                         dataset_id=DatasetId.FINEWEB,
                         normalization_id=NormalizationId.INVERSE_EXPONENTIAL,
                         normalization_kwargs={"ceiling": 20.0},
-                        weight=0.05,
+                        weight=0.1,
                     ),
                     EvalTask(
                         name="IF_EVAL_V1",
                         method_id=EvalMethodId.IF_EVAL,
                         dataset_id=DatasetId.SYNTHETIC_IF_EVAL,
                         normalization_id=NormalizationId.NONE,
+                        weight=0.1,
+                    ),
+                ],
+            ),
+            Competition(
+                CompetitionId.INSTRUCT_8B,
+                MODEL_CONSTRAINTS_BY_COMPETITION_ID[CompetitionId.INSTRUCT_8B],
+                0.1,
+                eval_tasks=[
+                    EvalTask(
+                        name="SYNTHETIC_MMLU",
+                        method_id=EvalMethodId.MULTIPLE_CHOICE,
+                        dataset_id=DatasetId.SYNTHETIC_MMLU,
+                        normalization_id=NormalizationId.NONE,
+                        weight=0.75,
+                    ),
+                    EvalTask(
+                        name="WORD_SORTING",
+                        method_id=EvalMethodId.REFERENCE_LOSS,
+                        dataset_id=DatasetId.WORD_SORTING,
+                        normalization_id=NormalizationId.INVERSE_EXPONENTIAL,
+                        normalization_kwargs={"ceiling": 40.0},
                         weight=0.05,
                     ),
+                    EvalTask(
+                        name="FINEWEB",
+                        method_id=EvalMethodId.TEXT_LOSS,
+                        dataset_id=DatasetId.FINEWEB,
+                        normalization_id=NormalizationId.INVERSE_EXPONENTIAL,
+                        normalization_kwargs={"ceiling": 20.0},
+                        weight=0.1,
+                    ),
+                    EvalTask(
+                        name="IF_EVAL_V1",
+                        method_id=EvalMethodId.IF_EVAL,
+                        dataset_id=DatasetId.SYNTHETIC_IF_EVAL,
+                        normalization_id=NormalizationId.NONE,
+                        weight=0.1,
+                    ),
                 ],
             ),
         ],

From 66ca12ed92daa2442b7b8a30ef8b862467a2ed2c Mon Sep 17 00:00:00 2001
From: Sid <sid.data.universe@gmail.com>
Date: Fri, 29 Nov 2024 16:09:02 -0800
Subject: [PATCH 05/26] Update miner and helpers to support unlocked tokenizer.

---
 docs/examples.ipynb           |  6 +--
 finetune/mining.py            | 73 +++++++++++++++++++++++++----------
 neurons/miner.py              |  7 +---
 tests/finetune/test_mining.py | 73 +++++++++++++++++++++++++++++------
 tests/utils.py                |  8 ----
 5 files changed, 118 insertions(+), 49 deletions(-)

diff --git a/docs/examples.ipynb b/docs/examples.ipynb
index 819cddc..74db6a4 100644
--- a/docs/examples.ipynb
+++ b/docs/examples.ipynb
@@ -92,8 +92,8 @@
     ")\n",
     "\n",
     "# Move the model to the appropriate device and set to eval mode.\n",
-    "model.to(device)\n",
-    "model.eval()\n",
+    "model.pt_model.to(device)\n",
+    "model.pt_model.eval()\n",
     "\n",
     "# Load the competition so we can load the right tokenizer.\n",
     "metagraph = bt.metagraph(constants.SUBNET_UID)\n",
@@ -134,7 +134,7 @@
     "    pad_token_id=tokenizer.eos_token_id,\n",
     ")\n",
     "response = ft.eval.method.generate_output(\n",
-    "    model=model,\n",
+    "    model=model.pt_model,\n",
     "    input_ids=input_ids,\n",
     "    generation_config=generation_config,\n",
     "    device=device,\n",
diff --git a/finetune/mining.py b/finetune/mining.py
index 5f60c41..b5c35e1 100644
--- a/finetune/mining.py
+++ b/finetune/mining.py
@@ -34,7 +34,7 @@
 from taoverse.model.storage.model_metadata_store import ModelMetadataStore
 from taoverse.model.storage.remote_model_store import RemoteModelStore
 from taoverse.model.utils import get_hash_of_two_strings
-from transformers import AutoModelForCausalLM, PreTrainedModel
+from transformers import AutoModelForCausalLM, AutoTokenizer
 
 import constants
 import finetune as ft
@@ -49,7 +49,7 @@ def model_path(base_dir: str, run_id: str) -> str:
 
 
 async def push(
-    model: PreTrainedModel,
+    model: Model,
     repo: str,
     competition_id: CompetitionId,
     wallet: bt.wallet,
@@ -61,7 +61,7 @@ async def push(
     """Pushes the model to Hugging Face and publishes it on the chain for evaluation by validators.
 
     Args:
-        model (PreTrainedModel): The model to push.
+        model (Model): The model to push. ModelId is overwritten based on the other parameters.
         repo (str): The repo to push to. Must be in format "namespace/name".
         competition_id (CompetitionId): The competition the miner is participating in.
         wallet (bt.wallet): The wallet of the Miner uploading the model.
@@ -89,24 +89,28 @@ async def push(
 
     # First upload the model to HuggingFace.
     namespace, name = model_utils.validate_hf_repo_id(repo)
-    model_id = ModelId(namespace=namespace, name=name, competition_id=competition_id)
-    model_id = await remote_model_store.upload_model(
-        Model(id=model_id, pt_model=model), model_constraints
-    )
+    # Overwrite the model id with the current information.
+    model.id = ModelId(namespace=namespace, name=name, competition_id=competition_id)
+    # Get the new model id which includes hash information.
+    model_id_with_hash = await remote_model_store.upload_model(model, model_constraints)
 
     bt.logging.success("Uploaded model to hugging face.")
 
-    secure_hash = get_hash_of_two_strings(model_id.hash, wallet.hotkey.ss58_address)
-    model_id = replace(model_id, secure_hash=secure_hash)
+    secure_hash = get_hash_of_two_strings(
+        model_id_with_hash.hash, wallet.hotkey.ss58_address
+    )
+    model_id_with_hash = replace(model_id_with_hash, secure_hash=secure_hash)
 
-    bt.logging.success(f"Now committing to the chain with model_id: {model_id}")
+    bt.logging.success(
+        f"Now committing to the chain with model_id: {model_id_with_hash}"
+    )
 
     # We can only commit to the chain every 20 minutes, so run this in a loop, until
     # successful.
     while True:
         try:
             await metadata_store.store_model_metadata(
-                wallet.hotkey.ss58_address, model_id
+                wallet.hotkey.ss58_address, model_id_with_hash
             )
 
             bt.logging.info(
@@ -119,13 +123,14 @@ async def push(
 
             if (
                 not model_metadata
-                or model_metadata.id.to_compressed_str() != model_id.to_compressed_str()
+                or model_metadata.id.to_compressed_str()
+                != model_id_with_hash.to_compressed_str()
             ):
                 bt.logging.error(
-                    f"Failed to read back model metadata from the chain. Expected: {model_id}, got: {model_metadata}"
+                    f"Failed to read back model metadata from the chain. Expected: {model_id_with_hash}, got: {model_metadata}"
                 )
                 raise ValueError(
-                    f"Failed to read back model metadata from the chain. Expected: {model_id}, got: {model_metadata}"
+                    f"Failed to read back model metadata from the chain. Expected: {model_id_with_hash}, got: {model_metadata}"
                 )
 
             bt.logging.success("Committed model to the chain.")
@@ -147,17 +152,23 @@ async def push(
         bt.logging.success("Model set to public")
 
 
-def save(model: PreTrainedModel, model_dir: str):
+def save(model: Model, model_dir: str):
     """Saves a model to the provided directory"""
     if not os.path.exists(model_dir):
         os.makedirs(model_dir, exist_ok=True)
 
     # Save the model state to the specified path.
-    model.save_pretrained(
+    model.pt_model.save_pretrained(
         save_directory=model_dir,
         safe_serialization=True,
     )
 
+    if model.tokenizer is not None:
+        model.tokenizer.save_pretrained(
+            save_directory=model_dir,
+            safe_serialization=True,
+        )
+
 
 async def get_repo(
     uid: int,
@@ -181,15 +192,35 @@ async def get_repo(
     return model_utils.get_hf_url(model_metadata)
 
 
-def load_local_model(model_dir: str, kwargs: Dict[str, Any]) -> PreTrainedModel:
+def load_local_model(model_dir: str, kwargs: Dict[str, Any]) -> Model:
     """Loads a model from a directory."""
-    return AutoModelForCausalLM.from_pretrained(
+    model_id = ModelId(
+        namespace="local_namespace",
+        name="local_model",
+        competition_id=CompetitionId.NONE,
+    )
+
+    pt_model = AutoModelForCausalLM.from_pretrained(
         pretrained_model_name_or_path=model_dir,
         local_files_only=True,
         use_safetensors=True,
         **kwargs,
     )
 
+    # Always try to retrieve a tokenizer from the model directory. If we do not find one leave it None on the Model.
+    tokenizer = None
+    try:
+        # Do not use the kwargs for the model load here. If needed in the future a separate kwargs can be plumbed.
+        tokenizer = AutoTokenizer.from_pretrained(
+            pretrained_model_name_or_path=model_dir,
+            local_files_only=True,
+            use_safetensors=True,
+        )
+    except Exception:
+        pass
+
+    return Model(id=model_id, pt_model=pt_model, tokenizer=tokenizer)
+
 
 async def load_remote_model(
     uid: int,
@@ -197,7 +228,7 @@ async def load_remote_model(
     metagraph: Optional[bt.metagraph] = None,
     metadata_store: Optional[ModelMetadataStore] = None,
     remote_model_store: Optional[RemoteModelStore] = None,
-) -> PreTrainedModel:
+) -> Model:
     """Loads the model currently being advertised by the Miner with the given UID.
 
     Args:
@@ -235,7 +266,7 @@ async def load_remote_model(
     model: Model = await remote_model_store.download_model(
         model_metadata.id, download_dir, model_constraints
     )
-    return model.pt_model
+    return model
 
 
 async def load_best_model(
@@ -244,7 +275,7 @@ async def load_best_model(
     metagraph: Optional[bt.metagraph] = None,
     metadata_store: Optional[ModelMetadataStore] = None,
     remote_model_store: Optional[RemoteModelStore] = None,
-) -> PreTrainedModel:
+) -> Model:
     """Loads the model from the best performing miner to download_dir"""
     best_uid = ft.graph.best_uid(competition_id=competition_id)
     if best_uid is None:
diff --git a/neurons/miner.py b/neurons/miner.py
index e603ee4..b126c68 100644
--- a/neurons/miner.py
+++ b/neurons/miner.py
@@ -20,7 +20,6 @@
 import datetime as dt
 import math
 import os
-import random
 import typing
 
 import bittensor as bt
@@ -28,6 +27,7 @@
 import wandb
 from dotenv import load_dotenv
 from taoverse.metagraph import utils as metagraph_utils
+from taoverse.model.data import Model
 from taoverse.model.storage.chain.chain_model_metadata_store import (
     ChainModelMetadataStore,
 )
@@ -35,13 +35,10 @@
     HuggingFaceModelStore,
 )
 from taoverse.model.storage.model_metadata_store import ModelMetadataStore
-from taoverse.utilities import utils
 from taoverse.utilities import wandb as wandb_utils
-from transformers import PreTrainedModel
 
 import constants
 import finetune as ft
-from finetune.datasets.subnet.prompting_subset_loader import PromptingSubsetLoader
 from neurons import config as neuron_config
 
 load_dotenv()  # take environment variables from .env.
@@ -54,7 +51,7 @@ async def load_starting_model(
     metagraph: bt.metagraph,
     metadata_store: ModelMetadataStore,
     kwargs: typing.Dict[str, typing.Any],
-) -> PreTrainedModel:
+) -> Model:
     """Loads the model to train based on the provided config."""
 
     # Initialize the model based on the best on the network.
diff --git a/tests/finetune/test_mining.py b/tests/finetune/test_mining.py
index 906aba7..8070ec1 100644
--- a/tests/finetune/test_mining.py
+++ b/tests/finetune/test_mining.py
@@ -8,13 +8,14 @@
 import bittensor as bt
 import torch
 from taoverse.model.data import Model, ModelId
+from transformers import AutoTokenizer
 
 import constants
 import finetune as ft
 from competitions.data import CompetitionId
 from tests.model.storage.fake_model_metadata_store import FakeModelMetadataStore
 from tests.model.storage.fake_remote_model_store import FakeRemoteModelStore
-from tests.utils import assert_model_equality, get_test_model
+from tests.utils import get_test_model
 
 
 class TestMining(unittest.TestCase):
@@ -43,18 +44,57 @@ def tearDown(self):
 
     def test_model_to_disk_roundtrip(self):
         """Tests that saving a model to disk and loading it gets the same model."""
+        # Use the default model id for local models.
+        model_id = ModelId(
+            namespace="local_namespace",
+            name="local_model",
+            competition_id=CompetitionId.NONE,
+        )
+        model = Model(id=model_id, pt_model=self.tiny_model, tokenizer=None)
+
+        ft.mining.save(model=model, model_dir=self.model_dir)
+        retrieved_model = ft.mining.load_local_model(
+            model_dir=self.model_dir, kwargs={}
+        )
+
+        self.assertEqual(str(model), str(retrieved_model))
 
-        ft.mining.save(model=self.tiny_model, model_dir=self.model_dir)
-        model = ft.mining.load_local_model(model_dir=self.model_dir, kwargs={})
+    def test_model_with_tokenizer_to_disk_roundtrip(self):
+        """Tests that saving a model with tokenizer to disk and loading it gets the same model."""
+        # Use the default model id for local models.
+        model_id = ModelId(
+            namespace="local_namespace",
+            name="local_model",
+            competition_id=CompetitionId.NONE,
+        )
+        tokenizer = AutoTokenizer.from_pretrained("Xenova/gpt-4")
+        model = Model(id=model_id, pt_model=self.tiny_model, tokenizer=tokenizer)
+
+        ft.mining.save(model=model, model_dir=self.model_dir)
+        retrieved_model = ft.mining.load_local_model(
+            model_dir=self.model_dir, kwargs={}
+        )
 
-        assert_model_equality(self, self.tiny_model, model)
+        # Overwrite the name of the tokenizer to avoid it using the local path.
+        retrieved_model.tokenizer.name_or_path = "Xenova/gpt-4"
+        self.assertEqual(str(model), str(retrieved_model))
 
     def _test_push(
-        self, min_expected_block: int = 1, competition_id=CompetitionId.B7_MULTI_CHOICE
+        self,
+        min_expected_block: int = 1,
+        competition_id=CompetitionId.B7_MULTI_CHOICE,
+        tokenizer=None,
     ):
+        model_id = ModelId(
+            namespace="namespace",
+            name="name",
+            competition_id=competition_id,
+        )
+        model = Model(id=model_id, pt_model=self.tiny_model, tokenizer=tokenizer)
+
         asyncio.run(
             ft.mining.push(
-                model=self.tiny_model,
+                model=model,
                 wallet=self.wallet,
                 competition_id=competition_id,
                 repo="namespace/name",
@@ -66,8 +106,10 @@ def _test_push(
         )
 
         # Check that the model was uploaded to hugging face.
-        model: Model = self.remote_store.get_only_model()
-        assert_model_equality(self, self.tiny_model, model.pt_model)
+        retrieved_model: Model = self.remote_store.get_only_model()
+        # Align the model id with the retrieved model as the hash and such will change.
+        model.id = retrieved_model.id
+        self.assertEqual(str(model), str(retrieved_model))
 
         # Check that the model ID was published on the chain.
         model_metadata = asyncio.run(
@@ -76,10 +118,12 @@ def _test_push(
         self.assertGreaterEqual(model_metadata.block, min_expected_block)
 
         # Check certain properties of the model metadata.
-        self.assertEqual(model_metadata.id.commit, model.id.commit)
-        self.assertEqual(model_metadata.id.name, model.id.name)
-        self.assertEqual(model_metadata.id.namespace, model.id.namespace)
-        self.assertEqual(model_metadata.id.competition_id, model.id.competition_id)
+        self.assertEqual(model_metadata.id.commit, retrieved_model.id.commit)
+        self.assertEqual(model_metadata.id.name, retrieved_model.id.name)
+        self.assertEqual(model_metadata.id.namespace, retrieved_model.id.namespace)
+        self.assertEqual(
+            model_metadata.id.competition_id, retrieved_model.id.competition_id
+        )
 
         self.metadata_store.reset()
         self.remote_store.reset()
@@ -88,6 +132,11 @@ def test_push_success(self):
         """Tests that pushing a model to the chain is successful."""
         self._test_push()
 
+    def test_push_success_tokenizer(self):
+        """Tests that pushing a model with a tokenizer to the chain is successful."""
+        tokenizer = AutoTokenizer.from_pretrained("Xenova/gpt-4")
+        self._test_push(tokenizer=tokenizer)
+
     def test_push_model_chain_failure(self):
         """Tests that pushing a model is eventually successful even if pushes to the chain fail."""
 
diff --git a/tests/utils.py b/tests/utils.py
index 4563291..75ec0f3 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -3,14 +3,6 @@
 from transformers import LlamaConfig, LlamaForCausalLM, PreTrainedModel
 
 
-def assert_model_equality(
-    test_case: unittest.TestCase, model1: PreTrainedModel, model2: PreTrainedModel
-):
-    """Checks if two models are equal."""
-    test_case.assertEqual(type(model1), type(model2))
-    test_case.assertEqual(str(model1.state_dict()), str(model2.state_dict()))
-
-
 def get_test_model() -> PreTrainedModel:
     """Gets a test model that is small enough to load and store quickly.
 

From 21cfa5ec69dc58ef38b39ba1f732221f2c0f8c7e Mon Sep 17 00:00:00 2001
From: rusticluftig <rusticluftig@gmail.com>
Date: Fri, 29 Nov 2024 16:24:06 -0800
Subject: [PATCH 06/26] Address feedback

---
 constants/__init__.py |  1 +
 docs/competitions.md  | 14 ++++++++++++++
 2 files changed, 15 insertions(+)

diff --git a/constants/__init__.py b/constants/__init__.py
index d25ac10..93e6e42 100644
--- a/constants/__init__.py
+++ b/constants/__init__.py
@@ -125,6 +125,7 @@
             PhiForCausalLM,
             Qwen2ForCausalLM,
         ],
+        tokenizer=None,  # Any tokenizer can be used.
         kwargs={
             "torch_dtype": torch.bfloat16,
         },
diff --git a/docs/competitions.md b/docs/competitions.md
index 6f7d29c..36796e2 100644
--- a/docs/competitions.md
+++ b/docs/competitions.md
@@ -18,6 +18,20 @@ Models submitted to this competition are evaluated on a set of evaluation tasks,
 
 [Code Link](https://github.com/macrocosm-os/finetuning/blob/94e8fd92ab4158e1e4a425a9562695eebafa27b1/constants/__init__.py#L128)
 
+## Competition INSTRUCT_8B:
+
+### Goal
+
+The goal of this competition is to train a SOTA instruct 8B model. This competition provides more freedom to miners than other competitions: there are no restrictions on the tokenizer used and miner's are allowed to use a wider range of architectures.
+
+### Evaluation
+
+The evaluation tasks are the same as the B7_MULTICHOICE competition
+
+### Definitions
+
+TODO: Fill in post check-in
+
 # Deprecated Competitions
 
 ## Competition 1: SN9_MODEL

From bca65e95748cc8eb58bc73499b748adf64e48e39 Mon Sep 17 00:00:00 2001
From: rusticluftig <rusticluftig@gmail.com>
Date: Fri, 29 Nov 2024 16:28:46 -0800
Subject: [PATCH 07/26] Grammar is hard

---
 docs/competitions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/competitions.md b/docs/competitions.md
index 36796e2..d36ad54 100644
--- a/docs/competitions.md
+++ b/docs/competitions.md
@@ -22,7 +22,7 @@ Models submitted to this competition are evaluated on a set of evaluation tasks,
 
 ### Goal
 
-The goal of this competition is to train a SOTA instruct 8B model. This competition provides more freedom to miners than other competitions: there are no restrictions on the tokenizer used and miner's are allowed to use a wider range of architectures.
+The goal of this competition is to train a SOTA instruct 8B model. This competition provides more freedom to miners than other competitions: there are no restrictions on the tokenizer used and miners are allowed to use a wider range of architectures.
 
 ### Evaluation
 

From 8d02dda72b48f559623da575854870463a9c663a Mon Sep 17 00:00:00 2001
From: Sid <sid.data.universe@gmail.com>
Date: Fri, 29 Nov 2024 16:48:18 -0800
Subject: [PATCH 08/26] Fix additional test in test mining.

---
 tests/finetune/test_mining.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/finetune/test_mining.py b/tests/finetune/test_mining.py
index 8070ec1..18caf39 100644
--- a/tests/finetune/test_mining.py
+++ b/tests/finetune/test_mining.py
@@ -215,15 +215,13 @@ async def test_load_best_model(self):
 
         # Upload the model for miner 1.
         model_store = FakeRemoteModelStore()
-        model = self._get_model()
+        model = self.tiny_model
         await model_store.upload_model(
             Model(
                 id=miner_1_model_id,
                 pt_model=model,
             ),
-            model_constraints=constants.MODEL_CONSTRAINTS_BY_COMPETITION_ID.get(
-                1, None
-            ),
+            competition=CompetitionId.SN9_MODEL,
         )
 
         # Verify that miner 1's model is loaded.

From 4ea8640883cd04314300a6ef8e01a9b399b91380 Mon Sep 17 00:00:00 2001
From: rusticluftig <rusticluftig@gmail.com>
Date: Fri, 29 Nov 2024 16:17:27 -0800
Subject: [PATCH 09/26] Update the model_validation script

Download NLTK

Fix model scoring

Add more logging

Fix logging

Remove excess logging
---
 finetune/validation.py      |  18 ++--
 scripts/model_validation.py | 195 +++++++++++++-----------------------
 2 files changed, 81 insertions(+), 132 deletions(-)

diff --git a/finetune/validation.py b/finetune/validation.py
index 7144921..48b5209 100644
--- a/finetune/validation.py
+++ b/finetune/validation.py
@@ -23,19 +23,19 @@
 
 import bittensor as bt
 import torch
-import transformers
 from taoverse.model.competition.data import Competition
 from taoverse.model.competition.epsilon import EpsilonFunc
+from taoverse.model.data import Model
 from taoverse.model.eval.normalization import normalize_score
 from taoverse.model.eval.task import EvalTask
 from transformers import GenerationConfig
 
 from finetune.eval.method import (
     EvalMethodId,
+    compute_if_eval,
     compute_multiple_choice_deviation,
     compute_reference_loss,
     compute_text_loss,
-    compute_if_eval,
 )
 from finetune.eval.sample import EvalSample
 
@@ -134,7 +134,7 @@ class ScoreDetails:
 
 
 def score_model(
-    model,
+    model: Model,
     evals: typing.List[EvalTask],
     samples: typing.List[typing.List[EvalSample]],
     competition: Competition,
@@ -159,8 +159,8 @@ def score_model(
         raise ValueError("Model does not have a tokenizer")
 
     with torch.inference_mode():
-        model.to(device)
-        model.eval()
+        model.pt_model.to(device)
+        model.pt_model.eval()
 
         score = 0
         score_details = {task.name: ScoreDetails() for task in evals}
@@ -179,7 +179,7 @@ def score_model(
                         pad_token_id=tokenizer.eos_token_id,
                     )
                     raw_score = compute_multiple_choice_deviation(
-                        model=model,
+                        model=model.pt_model,
                         tokenizer=tokenizer,
                         generation_config=compute_mc_generation_config,
                         batches=samples,
@@ -187,13 +187,13 @@ def score_model(
                     )
                 case EvalMethodId.REFERENCE_LOSS:
                     raw_score = compute_reference_loss(
-                        model=model,
+                        model=model.pt_model,
                         batches=samples,
                         device=device,
                     )
                 case EvalMethodId.TEXT_LOSS:
                     raw_score = compute_text_loss(
-                        model=model,
+                        model=model.pt_model,
                         batches=samples,
                         device=device,
                         pad_token_id=tokenizer.eos_token_id,
@@ -208,7 +208,7 @@ def score_model(
                         max_time=5.0,
                     )
                     raw_score = compute_if_eval(
-                        model=model,
+                        model=model.pt_model,
                         tokenizer=tokenizer,
                         generation_config=compute_if_generation_config,
                         batches=samples,
diff --git a/scripts/model_validation.py b/scripts/model_validation.py
index 9d35cd9..6ed0506 100644
--- a/scripts/model_validation.py
+++ b/scripts/model_validation.py
@@ -4,44 +4,25 @@
 
 import argparse
 import datetime as dt
-import math
 import random
 import sys
+from typing import List
 
+import bittensor as bt
+import nltk
+from taoverse.metagraph import utils as metagraph_utils
 from taoverse.model.competition import utils as competition_utils
-from taoverse.model.data import Model, ModelId
+from taoverse.model.eval.task import EvalTask
 from taoverse.model.model_updater import ModelUpdater
 from taoverse.utilities.enum_action import IntEnumAction
-from taoverse.utilities.perf_monitor import PerfMonitor
-from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
 
 import constants
 import finetune as ft
 from competitions.data import CompetitionId
+from finetune.datasets.factory import DatasetLoaderFactory
+from finetune.datasets.ids import DatasetId
 from finetune.datasets.subnet.prompting_subset_loader import PromptingSubsetLoader
-from finetune.eval.method import compute_multiple_choice_deviation
-
-
-def load_model(model_path, competition_id, allow_remote_code, kwargs) -> Model:
-    model_id = ModelId(
-        namespace="namespace", name="name", competition_id=competition_id
-    )
-    if allow_remote_code:
-        pt_model = AutoModelForCausalLM.from_pretrained(
-            pretrained_model_name_or_path=model_path,
-            trust_remote_code=True,
-            use_safetensors=True,
-            **kwargs,
-        )
-    else:
-        pt_model = AutoModelForCausalLM.from_pretrained(
-            pretrained_model_name_or_path=model_path,
-            local_files_only=True,
-            use_safetensors=True,
-            **kwargs,
-        )
-
-    return Model(id=model_id, pt_model=pt_model)
+from finetune.eval.sample import EvalSample
 
 
 def main():
@@ -61,12 +42,6 @@ def main():
         default=0,
         help="Random seed to use while loading data. If 0 then randomize.",
     )
-    parser.add_argument(
-        "--latest_prompting_samples",
-        type=int,
-        default=400,
-        help="Number of most recent prompting samples to eval against",
-    )
     parser.add_argument(
         "--competition_id",
         type=CompetitionId,
@@ -74,30 +49,9 @@ def main():
         action=IntEnumAction,
         help="competition to mine for (use --list-competitions to get all competitions)",
     )
-    parser.add_argument(
-        "--allow_remote_code",
-        action="store_true",
-        help="If a remote code should be allowed",
-    )
-    parser.add_argument(
-        "--skip_constraints_check",
-        action="store_true",
-        help="If the competition constraints check should be skipped",
-    )
     parser.add_argument(
         "--list_competitions", action="store_true", help="Print out all competitions"
     )
-    parser.add_argument(
-        "--tokenizer_override",
-        action="store_true",
-        help="If a custom tokenizer should be used rather than the competition one",
-    )
-    parser.add_argument(
-        "--tokenizer",
-        type=str,
-        default="Xenova/gpt-4",
-        help="Tokenizer",
-    )
     parser.add_argument(
         "--comp_block",
         type=int,
@@ -106,7 +60,11 @@ def main():
     )
     args = parser.parse_args()
     if args.list_competitions:
-        print(constants.COMPETITION_SCHEDULE_BY_BLOCK)
+        print(
+            competition_utils.get_competition_schedule_for_block(
+                args.comp_block, constants.COMPETITION_SCHEDULE_BY_BLOCK
+            )
+        )
         return
 
     competition = competition_utils.get_competition_for_block(
@@ -115,90 +73,81 @@ def main():
         constants.COMPETITION_SCHEDULE_BY_BLOCK,
     )
 
+    if not competition:
+        print(f"Competition {args.competition_id} not found.")
+        return
+
     kwargs = competition.constraints.kwargs.copy()
     kwargs["use_cache"] = True
 
-    print(f"Loading model for competition {args.competition_id}")
-    load_model_perf = PerfMonitor("Eval: Load model")
-    with load_model_perf.sample():
-        model = load_model(
-            args.model_path, competition.id, args.allow_remote_code, kwargs
-        )
-    print(load_model_perf.summary_str())
+    print(f"Loading tokenizer and model from {args.model_path}")
+    model = ft.mining.load_local_model(args.model_path, kwargs)
 
-    if not args.skip_constraints_check:
-        if not ModelUpdater.verify_model_satisfies_parameters(
-            model, competition.constraints
-        ):
-            print("Model does not satisfy competition parameters!!!")
-            return
+    if competition.constraints.tokenizer:
+        model.tokenizer = ft.model.load_tokenizer(competition.constraints)
 
-    pull_data_perf = PerfMonitor("Eval: Pull data")
-    sample_data = None
+    if not ModelUpdater.verify_model_satisfies_parameters(
+        model, competition.constraints
+    ):
+        print("Model does not satisfy competition parameters!!!")
+        return
 
     seed = args.random_seed if args.random_seed else random.randint(0, sys.maxsize)
 
-    if args.competition_id == CompetitionId.B7_MULTI_CHOICE:
-        print("Getting latest sample data from prompting.")
-        with pull_data_perf.sample():
-            sample_data = PromptingSubsetLoader(
+    print("Loading evaluation tasks")
+    eval_tasks: List[EvalTask] = []
+    samples: List[List[EvalSample]] = []
+
+    # Load data based on the competition.
+    metagraph = bt.metagraph(constants.PROMPTING_SUBNET_UID)
+    vali_uids = metagraph_utils.get_high_stake_validators(
+        metagraph, constants.SAMPLE_VALI_MIN_STAKE
+    )
+    vali_hotkeys = set([metagraph.hotkeys[uid] for uid in vali_uids])
+
+    for eval_task in competition.eval_tasks:
+        if eval_task.dataset_id == DatasetId.SYNTHETIC_MMLU:
+            data_loader = PromptingSubsetLoader(
                 random_seed=seed,
-                max_samples=args.latest_prompting_samples,
-                oldest_sample_timestamp=dt.datetime.now() - dt.timedelta(hours=4),
+                oldest_sample_timestamp=dt.datetime.now(dt.timezone.utc)
+                - dt.timedelta(hours=6),
+                validator_hotkeys=vali_hotkeys,
             )
-    else:
-        print(
-            f"Competition id: {args.competition_id} has no sample loading logic specified."
-        )
-        return
-    print(pull_data_perf.summary_str())
-
-    print("Tokenizing sample data")
-    if args.tokenizer_override:
-        tokenizer = AutoTokenizer.from_pretrained(
-            args.tokenizer, trust_remote_code=args.allow_remote_code
-        )
-    else:
-        tokenizer = ft.model.load_tokenizer(competition.constraints)
-    batches = sample_data.tokenize(tokenizer, competition.constraints.sequence_length)
-
-    print("Calculating deviations")
-    compute_deviation_perf = PerfMonitor("Eval: Compute deviation")
-
-    if args.competition_id == CompetitionId.B7_MULTI_CHOICE:
-        # Please note, this currently does not include other evaluations that may
-        # be run as part of the competition.
-        # These will be included in a future release.
-        generation_config = GenerationConfig(
-            max_new_tokens=20,
-            max_length=competition.constraints.sequence_length,
-            do_sample=False,
-            repetition_penalty=1.1,
-            eos_token_id=tokenizer.eos_token_id,
-            pad_token_id=tokenizer.eos_token_id,
-        )
-        with compute_deviation_perf.sample():
-            deviations = compute_multiple_choice_deviation(
-                model.pt_model,
-                tokenizer,
-                generation_config,
-                batches,
-                device=args.device,
+        else:
+            data_loader = DatasetLoaderFactory.get_loader(
+                dataset_id=eval_task.dataset_id,
+                dataset_kwargs=eval_task.dataset_kwargs,
+                seed=seed,
+                validator_hotkeys=vali_hotkeys,
             )
-    else:
-        print(
-            f"Competition id: {args.competition_id} has no evaluation logic specified."
-        )
-        return
 
-    print(compute_deviation_perf.summary_str())
+        if data_loader:
+            eval_tasks.append(eval_task)
+            print(f"Loaded {len(data_loader)} samples for task {eval_task.name}")
+            samples.append(
+                data_loader.tokenize(
+                    model.tokenizer, competition.constraints.sequence_length
+                )
+            )
 
-    average_model_deviation = (
-        sum(deviations) / len(deviations) if len(deviations) > 0 else math.inf
+    print(f"Scoring model on tasks {eval_tasks}")
+    # Run each computation in a subprocess so that the GPU is reset between each model.
+    score, score_details = ft.validation.score_model(
+        model,
+        eval_tasks,
+        samples,
+        competition,
+        args.device,
     )
 
-    print(f"The average deviation for {args.model_path} is {average_model_deviation}")
+    print(f"Computed score: {score}. Details: {score_details}")
 
 
 if __name__ == "__main__":
+    # Make sure we can download the needed ntlk modules
+    # Used for generating words in word sorting evals
+    nltk.download("words", raise_on_error=True)
+    # Used for counting sentences in sentence count evals
+    nltk.download("punkt", raise_on_error=True)
+
     main()

From c9891d37ebbf07fdaf6f3f46659ef00157efc7f8 Mon Sep 17 00:00:00 2001
From: Sid <sid.data.universe@gmail.com>
Date: Sat, 30 Nov 2024 19:09:56 -0800
Subject: [PATCH 10/26] Use taoverse based min comp threshold.

---
 neurons/validator.py | 6 ++++--
 requirements.txt     | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/neurons/validator.py b/neurons/validator.py
index e9cbce0..abc3404 100644
--- a/neurons/validator.py
+++ b/neurons/validator.py
@@ -1129,8 +1129,10 @@ async def run_step(self):
         # Align competition_tracker to only track active competitions.
         self.competition_tracker.reset_competitions(active_competition_ids)
         # Update self.weights to the merged values across active competitions.
-        self.weights = self.competition_tracker.get_subnet_weights(competition_schedule)
-        self.weights[self.weights < constants.MIN_WEIGHT_THRESHOLD] = 0.0
+        self.weights = self.competition_tracker.get_subnet_weights(
+            competitions=competition_schedule,
+            min_comp_weight_threshold=constants.MIN_WEIGHT_THRESHOLD
+        )
 
         # Prioritize models for keeping up to the sample_min for the next eval loop.
         # If the model has any significant weight, prioritize by weight with greater weights being kept first.
diff --git a/requirements.txt b/requirements.txt
index 90da8a1..eab2ee5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,4 +8,4 @@ safetensors
 torch==2.3.1
 transformers==4.44.1
 wandb==0.18.0
-taoverse==1.2.0
+taoverse==1.2.1

From df67658276acc289449b0558d938735d1dc102ae Mon Sep 17 00:00:00 2001
From: Sid <sid.data.universe@gmail.com>
Date: Sat, 30 Nov 2024 19:11:24 -0800
Subject: [PATCH 11/26] Move IfEval weight back to 5% from 10%.

---
 constants/__init__.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/constants/__init__.py b/constants/__init__.py
index 93e6e42..549f4c3 100644
--- a/constants/__init__.py
+++ b/constants/__init__.py
@@ -199,7 +199,7 @@
                         method_id=EvalMethodId.MULTIPLE_CHOICE,
                         dataset_id=DatasetId.SYNTHETIC_MMLU,
                         normalization_id=NormalizationId.NONE,
-                        weight=0.75,
+                        weight=0.8,
                     ),
                     EvalTask(
                         name="WORD_SORTING",
@@ -222,7 +222,7 @@
                         method_id=EvalMethodId.IF_EVAL,
                         dataset_id=DatasetId.SYNTHETIC_IF_EVAL,
                         normalization_id=NormalizationId.NONE,
-                        weight=0.1,
+                        weight=0.05,
                     ),
                 ],
             ),
@@ -236,7 +236,7 @@
                         method_id=EvalMethodId.MULTIPLE_CHOICE,
                         dataset_id=DatasetId.SYNTHETIC_MMLU,
                         normalization_id=NormalizationId.NONE,
-                        weight=0.75,
+                        weight=0.8,
                     ),
                     EvalTask(
                         name="WORD_SORTING",
@@ -259,7 +259,7 @@
                         method_id=EvalMethodId.IF_EVAL,
                         dataset_id=DatasetId.SYNTHETIC_IF_EVAL,
                         normalization_id=NormalizationId.NONE,
-                        weight=0.1,
+                        weight=0.05,
                     ),
                 ],
             ),

From 98542bc7d050aa8e2c0c7bad852db7ffa6ea8bbd Mon Sep 17 00:00:00 2001
From: Sid <sid.data.universe@gmail.com>
Date: Sat, 30 Nov 2024 19:13:41 -0800
Subject: [PATCH 12/26] Adjust top miner weight percent given new comp.

---
 constants/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/constants/__init__.py b/constants/__init__.py
index 549f4c3..36e2fae 100644
--- a/constants/__init__.py
+++ b/constants/__init__.py
@@ -78,7 +78,8 @@
 WEIGHT_SYNC_VALI_MIN_STAKE = 100_000
 # Minimum percent of weight on a vali for a miner to be considered a top miner.
 # Since there can be multiple competitions at different reward percentages we can't just check biggest.
-WEIGHT_SYNC_MINER_MIN_PERCENT = 0.10
+# Since we only set weights per competition with a threshold of 0.18 we can just take any percent here.
+WEIGHT_SYNC_MINER_MIN_PERCENT = 0.01
 # The root directory of this project.
 ROOT_DIR = Path(__file__).parent.parent
 # The maximum bytes for the hugging face repo.

From 5c2a5074b06edae3a061a8a4f428911ecb1e35c9 Mon Sep 17 00:00:00 2001
From: Sid <sid.data.universe@gmail.com>
Date: Sat, 30 Nov 2024 19:22:20 -0800
Subject: [PATCH 13/26] Adjust sync block and alpha for increased cycle time.

---
 constants/__init__.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/constants/__init__.py b/constants/__init__.py
index 36e2fae..8dc1ef5 100644
--- a/constants/__init__.py
+++ b/constants/__init__.py
@@ -48,16 +48,16 @@
 # Block the subnet was registered.
 GENESIS_BLOCK = 3138611
 # Define the number of blocks per vali "sync". This cadence is used to align validator behavior for better vtrust.
-SYNC_BLOCK_CADENCE = 180
+SYNC_BLOCK_CADENCE = 270
 # Rough estimate of the number of seconds per block.
 SECONDS_PER_BLOCK = 12
 # Validator weight moving average term.
-# At 0.9 a model will go from 0 -> 0.190 in 2 cycles and from 0 -> 0.83 in 17 cycles.
-ALPHA = 0.9
+# At 0.85 a model will go from 0 -> 0.278 in 2 cycles and from 0 -> 0.833 in 11 cycles.
+ALPHA = 0.85
 # Any miners with a combined competition weight below this threshold will instead receive 0 weight.
 # This is intended to help vtrust in conjunction with a low alpha by handling the tail ends.
-# At 1 eval per 180 blocks, newly winning models will start recieving weight after ~360 blocks.
-# Previously winning models will phase out after ~3060 blocks, at which point only the new winner will have weight.
+# At 1 eval per 270 blocks, newly winning models will start recieving weight after ~540 blocks.
+# Previously winning models will phase out after ~2970 blocks, at which point only the new winner will have weight.
 MIN_WEIGHT_THRESHOLD = 0.18
 
 # The validator WANDB project.

From d04f8fa8b89c4525f7f88870d14e622b697ee0a7 Mon Sep 17 00:00:00 2001
From: Sid <sid.data.universe@gmail.com>
Date: Sat, 30 Nov 2024 23:21:43 -0800
Subject: [PATCH 14/26] Log weights above top miner sync threshold.

---
 neurons/validator.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/neurons/validator.py b/neurons/validator.py
index abc3404..7c6f120 100644
--- a/neurons/validator.py
+++ b/neurons/validator.py
@@ -1131,7 +1131,7 @@ async def run_step(self):
         # Update self.weights to the merged values across active competitions.
         self.weights = self.competition_tracker.get_subnet_weights(
             competitions=competition_schedule,
-            min_comp_weight_threshold=constants.MIN_WEIGHT_THRESHOLD
+            min_comp_weight_threshold=constants.MIN_WEIGHT_THRESHOLD,
         )
 
         # Prioritize models for keeping up to the sample_min for the next eval loop.
@@ -1326,13 +1326,13 @@ def log_step(
         console.print(table)
 
         ws, ui = self.weights.topk(len(self.weights))
-        table = Table(title=f"Weights >= {constants.MIN_WEIGHT_THRESHOLD}")
+        table = Table(title=f"Weights >= {constants.WEIGHT_SYNC_MINER_MIN_PERCENT}")
         table.add_column("uid", justify="right", style="cyan", no_wrap=True)
         table.add_column("weight", style="magenta")
         table.add_column("comp", style="magenta")
         for index, weight in list(zip(ui.tolist(), ws.tolist())):
-            # All remaining weights should be above the threshold so this check mainly filters out 0s.
-            if weight >= constants.MIN_WEIGHT_THRESHOLD:
+            # Show anything with weight high enough to be considered for top model checks.
+            if weight >= constants.WEIGHT_SYNC_MINER_MIN_PERCENT:
                 table.add_row(
                     str(index), str(round(weight, 4)), str(uid_to_competition_id[index])
                 )

From 3d9c2549520536291cbd95bdb6868580a4cbea22 Mon Sep 17 00:00:00 2001
From: Sid <sid.data.universe@gmail.com>
Date: Sat, 30 Nov 2024 23:22:39 -0800
Subject: [PATCH 15/26] Increase eval delay from 4 hours to 5 hours.

---
 constants/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/constants/__init__.py b/constants/__init__.py
index 8dc1ef5..4f87f97 100644
--- a/constants/__init__.py
+++ b/constants/__init__.py
@@ -102,7 +102,7 @@
         kwargs={
             "torch_dtype": torch.bfloat16,
         },
-        eval_block_delay=1200,  # ~4 hours.
+        eval_block_delay=1600,  # ~5 hours.
         norm_validation_constraints=NormValidationConstraints(
             norm_eps_soft=200,
             norm_eps_soft_percent_threshold=0.15,
@@ -130,7 +130,7 @@
         kwargs={
             "torch_dtype": torch.bfloat16,
         },
-        eval_block_delay=1200,  # ~4 hours.
+        eval_block_delay=1600,  # ~5 hours.
         norm_validation_constraints=NormValidationConstraints(
             norm_eps_soft=200,
             norm_eps_soft_percent_threshold=0.15,

From 5b8cb1b72395f63e4fe683bfc09f3f1d46fdf910 Mon Sep 17 00:00:00 2001
From: rusticluftig <rusticluftig@gmail.com>
Date: Fri, 29 Nov 2024 14:52:30 -0800
Subject: [PATCH 16/26] Allow benchmark to process multiple competitions

---
 scripts/run_benchmarks.py | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/scripts/run_benchmarks.py b/scripts/run_benchmarks.py
index 5ef42ea..f7d1af8 100644
--- a/scripts/run_benchmarks.py
+++ b/scripts/run_benchmarks.py
@@ -3,6 +3,7 @@
 import dataclasses
 import json
 import os
+import pickle
 import shutil
 import time
 import traceback
@@ -12,7 +13,9 @@
 import dotenv
 import lm_eval
 import wandb
+from huggingface_hub import login
 from lm_eval.models.huggingface import HFLM
+from taoverse.model import utils as model_utils
 from taoverse.model.competition import utils as competition_utils
 from taoverse.model.competition.data import Competition
 from taoverse.model.data import ModelMetadata
@@ -23,13 +26,9 @@
     HuggingFaceModelStore,
 )
 from transformers import AutoTokenizer
-from taoverse.model import utils as model_utils
-from huggingface_hub import login
-
-from utils import benchmark_helpers
 
 import constants
-import pickle
+from utils import benchmark_helpers
 
 
 class CompletedEvalStore:
@@ -120,7 +119,11 @@ def _run_benchmarks(
             "leaderboard_bbh",
             "leaderboard_gpqa",
             "leaderboard_ifeval",
+            "leaderboard_musr",
             "mmlu",
+            "agieval_en",
+            "arc_challenge",
+            "gsm8k_cot",
         ],
         verbosity="DEBUG",
         batch_size="auto",
@@ -128,12 +131,12 @@ def _run_benchmarks(
     )
 
 
-def save_state(state: CompletedEvalStore.State, filepath: str):
+def save_state(state: Dict[int, CompletedEvalStore.State], filepath: str):
     with open(filepath, "wb") as f:
         pickle.dump(state, f)
 
 
-def load_state(filepath: str) -> CompletedEvalStore.State:
+def load_state(filepath: str) -> Dict[int, CompletedEvalStore.State]:
     with open(filepath, "rb") as f:
         return pickle.load(f)
 
@@ -197,9 +200,9 @@ def main(args: argparse.Namespace):
     step = 0
 
     # Load state from previous runs.
-    last_model = None
+    last_model_per_comp = {}
     try:
-        last_model = load_state(args.file)
+        last_model_per_comp = load_state(args.file)
     except FileNotFoundError:
         pass
 
@@ -230,7 +233,7 @@ def main(args: argparse.Namespace):
                 repo=f"{model_metadata.id.namespace}/{model_metadata.id.name}",
                 commit=model_metadata.id.commit,
             )
-            if state == last_model:
+            if state == last_model_per_comp.get(competition.id, None):
                 print(
                     f"Model {state.repo} at commit {state.commit} has already been benchmarked."
                 )
@@ -270,8 +273,8 @@ def main(args: argparse.Namespace):
             wandb_run.log(results | lb_results)
             wandb_run.finish()
 
-            last_model = state
-            save_state(last_model, args.file)
+            last_model_per_comp[competition.id] = state
+            save_state(last_model_per_comp, args.file)
 
             if step % 50:
                 print("Deleting HF cache.")

From 974fd39e90bf3d68d5dd01e1582f51593dacecef Mon Sep 17 00:00:00 2001
From: rusticluftig <rusticluftig@gmail.com>
Date: Fri, 29 Nov 2024 15:15:03 -0800
Subject: [PATCH 17/26] Include competition ID

---
 scripts/run_benchmarks.py  | 1 +
 utils/benchmark_helpers.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/run_benchmarks.py b/scripts/run_benchmarks.py
index f7d1af8..23debb7 100644
--- a/scripts/run_benchmarks.py
+++ b/scripts/run_benchmarks.py
@@ -267,6 +267,7 @@ def main(args: argparse.Namespace):
                     "uid": uid,
                     "model": model_utils.get_hf_url(model_metadata),
                     "block": model_metadata.block,
+                    "competition_id": competition.id,
                 },
                 allow_val_change=True,
             )
diff --git a/utils/benchmark_helpers.py b/utils/benchmark_helpers.py
index a8f91be..502c896 100644
--- a/utils/benchmark_helpers.py
+++ b/utils/benchmark_helpers.py
@@ -296,6 +296,6 @@ def get_leaderboard_scores(results: Dict[str, Any]) -> Dict[str, float]:
         "bbh": compute_bbh(results),
         # "math": compute_math(results),
         "gpqa": compute_gpqa(results),
-        # "musr": compute_musr(results),
+        "musr": compute_musr(results),
         "mmlu_pro": compute_mmlu_pro(results),
     }

From 528abaa77634e8110628878762b34e5520017fde Mon Sep 17 00:00:00 2001
From: Sid <sid.data.universe@gmail.com>
Date: Sun, 1 Dec 2024 13:31:08 -0800
Subject: [PATCH 18/26] Bump bittensor to 8.4.3 and taoverse 1.3.0.

---
 constants/__init__.py | 1 -
 neurons/validator.py  | 9 ++++++---
 requirements.txt      | 6 +++---
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/constants/__init__.py b/constants/__init__.py
index 4f87f97..eafa9bd 100644
--- a/constants/__init__.py
+++ b/constants/__init__.py
@@ -124,7 +124,6 @@
             MistralForCausalLM,
             Phi3ForCausalLM,
             PhiForCausalLM,
-            Qwen2ForCausalLM,
         ],
         tokenizer=None,  # Any tokenizer can be used.
         kwargs={
diff --git a/neurons/validator.py b/neurons/validator.py
index 7c6f120..26ed899 100644
--- a/neurons/validator.py
+++ b/neurons/validator.py
@@ -126,6 +126,9 @@ def state_path(self) -> str:
 
     def __init__(self):
         self.config = neuron_config.validator_config()
+        # Manually default to info before overriding with arguments.
+        # If this is not done then info logging does not work in the cases where other modes are not specified.
+        bt.logging.set_info()
         bt.logging(config=self.config)
 
         bt.logging.info(f"Starting validator with config: {self.config}")
@@ -201,7 +204,7 @@ def __init__(self):
             self._new_wandb_run()
 
         # === Running args ===
-        self.weights = torch.zeros_like(self.metagraph.S)
+        self.weights = torch.zeros_like(torch.from_numpy(self.metagraph.S))
         self.global_step = 0
         self.last_epoch = self.metagraph.block.item()
 
@@ -716,7 +719,7 @@ async def _try_set_weights():
                     netuid=self.config.netuid,
                     wallet=self.wallet,
                     uids=uids,
-                    weights=self.weights,
+                    weights=self.weights.numpy(),
                     wait_for_inclusion=False,
                     version_key=constants.weights_version_key,
                 )
@@ -1114,7 +1117,7 @@ async def run_step(self):
 
         # Fill in metagraph sized tensor with the step weights of the evaluated models.
         with self.metagraph_lock:
-            competition_weights = torch.zeros_like(self.metagraph.S)
+            competition_weights = torch.zeros_like(torch.from_numpy(self.metagraph.S))
 
         for i, uid_i in enumerate(uids):
             competition_weights[uid_i] = step_weights[i]
diff --git a/requirements.txt b/requirements.txt
index eab2ee5..cbb386f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,11 +1,11 @@
-bittensor==6.9.4
+bittensor==8.4.3
 huggingface_hub
 nltk
-numpy==1.26.4
+numpy==2.0.2
 python-dotenv
 rich
 safetensors
 torch==2.3.1
 transformers==4.44.1
 wandb==0.18.0
-taoverse==1.2.1
+taoverse==1.3.0

From 57854d716c5704ad94bd12708c0d011c96aede3b Mon Sep 17 00:00:00 2001
From: Sid <sid.data.universe@gmail.com>
Date: Tue, 3 Dec 2024 20:09:09 -0800
Subject: [PATCH 19/26] Bump taoverse to 2.3.1.

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index cbb386f..081e5b2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,4 +8,4 @@ safetensors
 torch==2.3.1
 transformers==4.44.1
 wandb==0.18.0
-taoverse==1.3.0
+taoverse==1.3.1

From 2f9a4aa48c2a52dab10fb00bd7f7e3cdd5ce812b Mon Sep 17 00:00:00 2001
From: Sid <sid.data.universe@gmail.com>
Date: Tue, 3 Dec 2024 20:15:18 -0800
Subject: [PATCH 20/26] Bump subnet version to 2.6.0.

---
 constants/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/constants/__init__.py b/constants/__init__.py
index eafa9bd..d399c92 100644
--- a/constants/__init__.py
+++ b/constants/__init__.py
@@ -33,7 +33,7 @@
 # Project Constants.
 # ---------------------------------
 
-__version__ = "2.5.2"
+__version__ = "2.6.0"
 version_split = __version__.split(".")
 __spec_version__ = (
     (1000 * int(version_split[0]))

From 4b2f71d43da96fe18370aab9dcdfdd321bf573df Mon Sep 17 00:00:00 2001
From: rusticluftig <rusticluftig@gmail.com>
Date: Tue, 3 Dec 2024 19:11:35 -0800
Subject: [PATCH 21/26] Set HF to only log warnings+

Remove unnecessary os.environ

Clean up
---
 neurons/validator.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/neurons/validator.py b/neurons/validator.py
index 26ed899..527d129 100644
--- a/neurons/validator.py
+++ b/neurons/validator.py
@@ -47,6 +47,8 @@
 from collections import defaultdict
 
 import bittensor as bt
+from bittensor.utils.btlogging.helpers import all_loggers
+from bittensor.utils.btlogging.defines import BITTENSOR_LOGGER_NAME
 import nltk
 import torch
 import wandb
@@ -131,6 +133,11 @@ def __init__(self):
         bt.logging.set_info()
         bt.logging(config=self.config)
 
+        # Setting logging level on bittensor messes with all loggers, which we don't want, so set explicitly to warning here.
+        for logger in all_loggers():
+            if not logger.name.startswith(BITTENSOR_LOGGER_NAME):
+                logger.setLevel(logging.WARNING)
+
         bt.logging.info(f"Starting validator with config: {self.config}")
 
         # === Bittensor objects ====
@@ -733,15 +740,6 @@ async def _try_set_weights():
             except:
                 bt.logging.warning("Failed to set weights. Trying again later.")
 
-            ws, ui = self.weights.topk(len(self.weights))
-            table = Table(title="All Weights")
-            table.add_column("uid", justify="right", style="cyan", no_wrap=True)
-            table.add_column("weight", style="magenta")
-            for index, weight in list(zip(ui.tolist(), ws.tolist())):
-                table.add_row(str(index), str(round(weight, 4)))
-            console = Console()
-            console.print(table)
-
         try:
             bt.logging.debug(f"Setting weights.")
             await asyncio.wait_for(_try_set_weights(), ttl)

From 629f2b481758cd8e6acedcf73d01c9e3610efdb1 Mon Sep 17 00:00:00 2001
From: rusticluftig <rusticluftig@gmail.com>
Date: Tue, 3 Dec 2024 20:49:29 -0800
Subject: [PATCH 22/26] PR feedback

---
 neurons/validator.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/neurons/validator.py b/neurons/validator.py
index 527d129..0fcd99b 100644
--- a/neurons/validator.py
+++ b/neurons/validator.py
@@ -18,6 +18,7 @@
 
 # Due to the implementation of disable_progress_bars(), this has to be the first import+call in the application relating to huggingface
 import dataclasses
+import logging
 
 from huggingface_hub.utils import disable_progress_bars
 from retry import retry
@@ -47,11 +48,11 @@
 from collections import defaultdict
 
 import bittensor as bt
-from bittensor.utils.btlogging.helpers import all_loggers
-from bittensor.utils.btlogging.defines import BITTENSOR_LOGGER_NAME
 import nltk
 import torch
 import wandb
+from bittensor.utils.btlogging.defines import BITTENSOR_LOGGER_NAME
+from bittensor.utils.btlogging.helpers import all_loggers
 from dotenv import load_dotenv
 from rich.console import Console
 from rich.table import Table

From b24e6b9432341effe7adfcbac240b8f8bfc70b1b Mon Sep 17 00:00:00 2001
From: rusticluftig <rusticluftig@gmail.com>
Date: Tue, 3 Dec 2024 21:12:45 -0800
Subject: [PATCH 23/26] Delay update loop

---
 neurons/validator.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/neurons/validator.py b/neurons/validator.py
index 0fcd99b..5825cc9 100644
--- a/neurons/validator.py
+++ b/neurons/validator.py
@@ -420,6 +420,9 @@ def update_models(self):
         # Track how recently we checked the list of top models.
         last_checked_top_models_time = None
 
+        # Delay the first update loop until the metagraph has been synced.
+        time.sleep(60)
+
         # The below loop iterates across all miner uids and checks to see
         # if they should be updated.
         while not self.stop_event.is_set():

From 135120d54e2d4d1f11a84d74a8f630345a45cca0 Mon Sep 17 00:00:00 2001
From: rusticluftig <rusticluftig@gmail.com>
Date: Tue, 3 Dec 2024 21:58:56 -0800
Subject: [PATCH 24/26] Add retries to get the current block number

---
 neurons/validator.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/neurons/validator.py b/neurons/validator.py
index 5825cc9..0ab0feb 100644
--- a/neurons/validator.py
+++ b/neurons/validator.py
@@ -753,11 +753,16 @@ async def _try_set_weights():
 
     def _get_current_block(self) -> int:
         """Returns the current block."""
-        try:
+
+        @retry(tries=5, delay=1, backoff=2)
+        def _get_block_with_retry():
             return self.subtensor.block
+
+        try:
+            return _get_block_with_retry()
         except:
             bt.logging.debug(
-                "Failed to get the latest block from the chain. Using the block from the cached metagraph."
+                f"Failed to get the latest block from the chain. Using the block from the cached metagraph."
             )
             # Network call failed. Fallback to using the block from the metagraph,
             # even though it'll be a little stale.

From 1d3ee42a038e7ca8bd1f23f8365e0ae2ae8d874b Mon Sep 17 00:00:00 2001
From: rusticluftig <rusticluftig@gmail.com>
Date: Wed, 4 Dec 2024 07:39:40 -0800
Subject: [PATCH 25/26] Clean up

---
 neurons/validator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/neurons/validator.py b/neurons/validator.py
index 0ab0feb..2934628 100644
--- a/neurons/validator.py
+++ b/neurons/validator.py
@@ -762,7 +762,7 @@ def _get_block_with_retry():
             return _get_block_with_retry()
         except:
             bt.logging.debug(
-                f"Failed to get the latest block from the chain. Using the block from the cached metagraph."
+                "Failed to get the latest block from the chain. Using the block from the cached metagraph."
             )
             # Network call failed. Fallback to using the block from the metagraph,
             # even though it'll be a little stale.

From 1c3f823bda127feb6a9d28db553bb4d668a36f40 Mon Sep 17 00:00:00 2001
From: rusticluftig <rusticluftig@gmail.com>
Date: Wed, 4 Dec 2024 19:26:40 -0800
Subject: [PATCH 26/26] Change comp to start on Tuesday morning PST

---
 constants/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/constants/__init__.py b/constants/__init__.py
index d399c92..5a2b974 100644
--- a/constants/__init__.py
+++ b/constants/__init__.py
@@ -140,7 +140,7 @@
     ),
 }
 
-INSTRUCT_8B_BLOCK = 4_423_335
+INSTRUCT_8B_BLOCK = 4_451_695
 
 # Schedule of competitions by block.
 COMPETITION_SCHEDULE_BY_BLOCK: List[Tuple[int, List[Competition]]] = [