Skip to content

Commit

Permalink
num_nodes in settings
Browse files Browse the repository at this point in the history
  • Loading branch information
Almaz Dautov committed Dec 5, 2024
1 parent 0837aee commit 4f7afe2
Show file tree
Hide file tree
Showing 6 changed files with 12 additions and 15 deletions.
8 changes: 4 additions & 4 deletions turbo_alignment/cli/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,18 +130,18 @@ def reinforce_training(

experiment_settings = pipeline_settings.REINFORCETrainExperimentSettings.parse_file(experiment_settings_path)

policy_models = RayGroup(num_nodes=2, num_gpus_per_node=8, ray_actor_type=pipelines.TrainREINFORCEStrategy)
policy_models = RayGroup(num_nodes=experiment_settings.trainer_settings.num_nodes, num_gpus_per_node=8, ray_actor_type=pipelines.TrainREINFORCEStrategy)
reward_model = RayGroup(num_nodes=1, num_gpus_per_node=1, ray_actor_type=RewardModel)
# reference_model = RayGroup(num_nodes=1, num_gpus_per_node=1, ray_actor_type=ReferenceModel)

# TODO_RLOO if possible hide init inside RayGroup
# TODO add settings fields to reward model
ray.get(policy_models.async_init_model_from_pretrained())
ray.get(reward_model.async_init_model_from_pretrained(rm_model=experiment_settings.reward_model_settings.model_path))
# ray.get(reference_model.async_init_model_from_pretrained(pretrain=experiment_settings.model_settings.model_path))

'''
TODO_RLOO:
1. SEED FIX
TODO_RLOO:
2. PARAMS to REINFORCETrainExperimentSettings
3. if possible hide creating of vllm engines inside trainer
'''
Expand All @@ -150,7 +150,7 @@ def reinforce_training(
num_engines=experiment_settings.trainer_settings.actor_settings.vllm_num_engines,
tensor_parallel_size=experiment_settings.trainer_settings.actor_settings.vllm_tensor_parallel_size,
pretrain=str(experiment_settings.model_settings.model_path),
seed=0,
seed=experiment_settings.seed,
enable_prefix_caching=False,
enforce_eager=False,
max_model_len=experiment_settings.trainer_settings.actor_settings.max_model_len,
Expand Down
4 changes: 0 additions & 4 deletions turbo_alignment/generators/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,6 @@ def __init__(
# if transformers_settings.num_beams > 1:
# beam_search_params['use_beam_search'] = True
# beam_search_params['best_of'] = transformers_settings.num_beams
print(f'Generation Params:{transformers_settings.stop_strings=}\n{transformers_settings.num_return_sequences=}\n\
{transformers_settings.repetition_penalty=}\n{transformers_settings.temperature=}\n\
{transformers_settings.top_p=}\n{transformers_settings.top_k=}\n\
{custom_generation_settings.skip_special_tokens=}\n{self.eos_token_id=}\n{transformers_settings.max_new_tokens=}', flush=True)

self._sampling_params = SamplingParams(
n=transformers_settings.num_return_sequences,
Expand Down
3 changes: 0 additions & 3 deletions turbo_alignment/pipelines/train/reinforce.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,6 @@ class ReinforceDataCollator(DataCollatorForTokenClassification):
def torch_call(self, features):
import torch
from transformers.data.data_collator import pad_without_fast_tokenizer_warning

for _ in features:
print(f'{_.keys()=}')

label_name = "label" if "label" in features[0].keys() else "labels"
labels = [feature[label_name] for feature in features] if label_name in features[0].keys() else None
Expand Down
2 changes: 2 additions & 0 deletions turbo_alignment/settings/pipelines/train/reinforce.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from typing import Union

class REINFORCETrainerSettings(TrainerSettings):

num_nodes: int = 2
max_new_tokens: int = 1024
stop_token: str = '<eos>'

Expand Down
8 changes: 5 additions & 3 deletions turbo_alignment/trainers/online/reinforce.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
# FIXME
@dataclass
class REINFORCETrainingArguments(TrainingArguments):
num_nodes: int = 2
max_new_tokens: int = 1024
stop_token: str = '<eos>'

Expand Down Expand Up @@ -195,11 +196,15 @@ def __init__(
mean_baseline_coef=args.mean_baseline_coef,
num_generations=args.num_generations,
)

print("Generations Params:\n" + "\n".join([f"{attr}: {getattr(self.generator_transformers_settings, attr, None)}" for attr, _ in self.generator_transformers_settings.__annotations__.items()]))

start = time.time()
self.print_readable_stats()
self.norm_reward_mean, self.norm_reward_std = self.reward_stats(
model=self.model, dataloader=self.get_train_dataloader()
)

logging.info(f'statictis in __init__ elapsed time:{time.time() - start}')
self.print_readable_stats()

Expand Down Expand Up @@ -467,9 +472,6 @@ def prediction_step(
ignore_keys: Optional[List[str]] = None,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[torch.Tensor]]:

import logging
logging.info(f'{isinstance(model, DeepSpeedEngine)=}')

with torch.no_grad():
loss, metrics = self.get_batch_loss_metrics(model, inputs, 'eval')

Expand Down
2 changes: 1 addition & 1 deletion turbo_alignment/trainers/online/reward_actor.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def __init__(self, world_size, rank, local_rank, master_addr, master_port):

def init_model_from_pretrained(self, rm_model):
self._setup_distributed()
self.model = AutoModelForSequenceClassification.from_pretrained(rm_model, device_map='cuda', torch_dtype=torch.bfloat16)
self.model = AutoModelForSequenceClassification.from_pretrained(rm_model, device_map='cuda', torch_dtype=torch.bfloat16, attn_implementation='flash_attention_2')
self.tokenizer = AutoTokenizer.from_pretrained(rm_model, trust_remote_code=True)

self.model.config.pad_token_id = self.model.config.eos_token_id
Expand Down

0 comments on commit 4f7afe2

Please sign in to comment.