Skip to content

Commit

Permalink
[docs] Update the training snippets for some losses that should use…
Browse files Browse the repository at this point in the history
… the v3 Trainer (#2987)
  • Loading branch information
tomaarsen authored Oct 15, 2024
1 parent a4be00f commit a1db32d
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 38 deletions.
26 changes: 14 additions & 12 deletions sentence_transformers/losses/Matryoshka2dLoss.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,21 +95,23 @@ def __init__(
Example:
::
from sentence_transformers import SentenceTransformer, losses, InputExample
from torch.utils.data import DataLoader
from sentence_transformers import SentenceTransformer, SentenceTransformerTrainer, losses
from datasets import Dataset
model = SentenceTransformer("microsoft/mpnet-base")
train_examples = [
InputExample(texts=['Anchor 1', 'Positive 1']),
InputExample(texts=['Anchor 2', 'Positive 2']),
]
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=32)
train_loss = losses.MultipleNegativesRankingLoss(model=model)
train_loss = losses.Matryoshka2dLoss(model, train_loss, [768, 512, 256, 128, 64])
model.fit(
[(train_dataloader, train_loss)],
epochs=10,
train_dataset = Dataset.from_dict({
"anchor": ["It's nice weather outside today.", "He drove to work."],
"positive": ["It's so sunny.", "He took the car to the office."],
})
loss = losses.MultipleNegativesRankingLoss(model)
loss = losses.Matryoshka2dLoss(model, loss, [768, 512, 256, 128, 64])
trainer = SentenceTransformerTrainer(
model=model,
train_dataset=train_dataset,
loss=loss,
)
trainer.train()
"""
matryoshka_loss = MatryoshkaLoss(
model,
Expand Down
26 changes: 14 additions & 12 deletions sentence_transformers/losses/MatryoshkaLoss.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,21 +101,23 @@ def __init__(
Example:
::
from sentence_transformers import SentenceTransformer, losses, InputExample
from torch.utils.data import DataLoader
from sentence_transformers import SentenceTransformer, SentenceTransformerTrainer, losses
from datasets import Dataset
model = SentenceTransformer("microsoft/mpnet-base")
train_examples = [
InputExample(texts=['Anchor 1', 'Positive 1']),
InputExample(texts=['Anchor 2', 'Positive 2']),
]
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=32)
train_loss = losses.MultipleNegativesRankingLoss(model=model)
train_loss = losses.MatryoshkaLoss(model, train_loss, [768, 512, 256, 128, 64])
model.fit(
[(train_dataloader, train_loss)],
epochs=10,
train_dataset = Dataset.from_dict({
"anchor": ["It's nice weather outside today.", "He drove to work."],
"positive": ["It's so sunny.", "He took the car to the office."],
})
loss = losses.MultipleNegativesRankingLoss(model)
loss = losses.MatryoshkaLoss(model, loss, [768, 512, 256, 128, 64])
trainer = SentenceTransformerTrainer(
model=model,
train_dataset=train_dataset,
loss=loss,
)
trainer.train()
"""
super().__init__()
self.model = model
Expand Down
33 changes: 19 additions & 14 deletions sentence_transformers/losses/MegaBatchMarginLoss.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,25 +59,30 @@ def __init__(
Example:
::
from sentence_transformers import SentenceTransformer, InputExample, losses
from torch.utils.data import DataLoader
from sentence_transformers import SentenceTransformer, SentenceTransformerTrainingArguments, SentenceTransformerTrainer, losses
from datasets import Dataset
model = SentenceTransformer('all-MiniLM-L6-v2')
total_examples = 500
train_batch_size = 250
train_mini_batch_size = 32
train_examples = [
InputExample(texts=[f"This is sentence number {i}", f"This is sentence number {i+1}"]) for i in range(total_examples)
]
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=train_batch_size)
train_loss = losses.MegaBatchMarginLoss(model=model, mini_batch_size=train_mini_batch_size)
model.fit(
[(train_dataloader, train_loss)],
epochs=10,
model = SentenceTransformer('all-MiniLM-L6-v2')
train_dataset = Dataset.from_dict({
"anchor": [f"This is sentence number {i}" for i in range(500)],
"positive": [f"This is sentence number {i}" for i in range(1, 501)],
})
loss = losses.MegaBatchMarginLoss(model=model, mini_batch_size=train_mini_batch_size)
args = SentenceTransformerTrainingArguments(
output_dir="output",
per_device_train_batch_size=train_batch_size,
)
trainer = SentenceTransformerTrainer(
model=model,
args=args,
train_dataset=train_dataset,
loss=loss,
)
trainer.train()
"""
super().__init__()
self.model = model
Expand Down

0 comments on commit a1db32d

Please sign in to comment.