Skip to content

Commit

Permalink
[deprecate] Drop Python 3.8 support due to EOL (#3033)
Browse files Browse the repository at this point in the history
* Drop Python 3.8 support due to EOL

* Apply ruff improvements due to Python 3.8 no longer being supported
  • Loading branch information
tomaarsen authored Nov 6, 2024
1 parent 4b7a2d6 commit 1cb196a
Show file tree
Hide file tree
Showing 54 changed files with 122 additions and 89 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
name: Run unit tests
strategy:
matrix:
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
python-version: ['3.9', '3.10', '3.11', '3.12']
os: [ubuntu-latest, windows-latest]
fail-fast: false
runs-on: ${{ matrix.os }}
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ For the **full documentation**, see **[www.SBERT.net](https://www.sbert.net)**.

## Installation

We recommend **Python 3.8+**, **[PyTorch 1.11.0+](https://pytorch.org/get-started/locally/)**, and **[transformers v4.34.0+](https://github.com/huggingface/transformers)**.
We recommend **Python 3.9+**, **[PyTorch 1.11.0+](https://pytorch.org/get-started/locally/)**, and **[transformers v4.34.0+](https://github.com/huggingface/transformers)**.

**Install with pip**

Expand Down
2 changes: 1 addition & 1 deletion docs/installation.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Installation

We recommend **Python 3.8+**, **[PyTorch 1.11.0+](https://pytorch.org/get-started/locally/)**, and **[transformers v4.41.0+](https://github.com/huggingface/transformers)**. There are 5 extra options to install Sentence Transformers:
We recommend **Python 3.9+**, **[PyTorch 1.11.0+](https://pytorch.org/get-started/locally/)**, and **[transformers v4.41.0+](https://github.com/huggingface/transformers)**. There are 5 extra options to install Sentence Transformers:
* **Default:** This allows for loading, saving, and inference (i.e., getting embeddings) of models.
* **ONNX:** This allows for loading, saving, inference, optimizing, and quantizing of models using the ONNX backend.
* **OpenVINO:** This allows for loading, saving, and inference of models using the OpenVINO backend.
Expand Down
16 changes: 8 additions & 8 deletions examples/training/matryoshka/matryoshka_eval_stsb.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import argparse
import os
from typing import Dict, List, Optional, Tuple, cast
from typing import Optional, cast

import matplotlib.pyplot as plt
import numpy as np
Expand All @@ -21,7 +21,7 @@

# Dimension plot
def _grouped_barplot_ratios(
group_name_to_x_to_y: Dict[str, Dict[int, float]], ax: Optional[plt.Axes] = None
group_name_to_x_to_y: dict[str, dict[int, float]], ax: Optional[plt.Axes] = None
) -> plt.Axes:
# To save a pandas dependency, do from scratch in matplotlib
if ax is None:
Expand Down Expand Up @@ -72,9 +72,9 @@ def _grouped_barplot_ratios(


def plot_across_dimensions(
model_name_to_dim_to_score: Dict[str, Dict[int, float]],
model_name_to_dim_to_score: dict[str, dict[int, float]],
filename: str,
figsize: Tuple[float, float] = (7, 7),
figsize: tuple[float, float] = (7, 7),
title: str = "STSB test score for various embedding dimensions (via truncation),\nwith and without Matryoshka loss",
) -> None:
# Sort each by key
Expand Down Expand Up @@ -139,8 +139,8 @@ def plot_across_dimensions(

args = parser.parse_args()
plot_filename: str = args.plot_filename
model_names: List[str] = args.model_names
DIMENSIONS: List[int] = args.dimensions
model_names: list[str] = args.model_names
DIMENSIONS: list[int] = args.dimensions

# Load STSb
stsb_test = load_dataset("mteb/stsbenchmark-sts", split="test")
Expand All @@ -153,10 +153,10 @@ def plot_across_dimensions(
)

# Run test_evaluator
model_name_to_dim_to_score: Dict[str, Dict[int, float]] = {}
model_name_to_dim_to_score: dict[str, dict[int, float]] = {}
for model_name in tqdm(model_names, desc="Evaluating models"):
model = SentenceTransformer(model_name)
dim_to_score: Dict[int, float] = {}
dim_to_score: dict[int, float] = {}
for dim in tqdm(DIMENSIONS, desc=f"Evaluating {model_name}"):
output_path = os.path.join(model_name, f"dim-{dim}")
os.makedirs(output_path)
Expand Down
8 changes: 5 additions & 3 deletions examples/training/quora_duplicate_questions/create_splits.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,9 +481,11 @@ def write_mining_files(name, ids, dups):


###### Classification dataset #####
with open("quora-IR-dataset/classification/train_pairs.tsv", "w", encoding="utf8") as fOutTrain, open(
"quora-IR-dataset/classification/dev_pairs.tsv", "w", encoding="utf8"
) as fOutDev, open("quora-IR-dataset/classification/test_pairs.tsv", "w", encoding="utf8") as fOutTest:
with (
open("quora-IR-dataset/classification/train_pairs.tsv", "w", encoding="utf8") as fOutTrain,
open("quora-IR-dataset/classification/dev_pairs.tsv", "w", encoding="utf8") as fOutDev,
open("quora-IR-dataset/classification/test_pairs.tsv", "w", encoding="utf8") as fOutTest,
):
fOutTrain.write("\t".join(["qid1", "qid2", "question1", "question2", "is_duplicate"]) + "\n")
fOutDev.write("\t".join(["qid1", "qid2", "question1", "question2", "is_duplicate"]) + "\n")
fOutTest.write("\t".join(["qid1", "qid2", "question1", "question2", "is_duplicate"]) + "\n")
Expand Down
6 changes: 3 additions & 3 deletions examples/unsupervised_learning/CT/train_ct_from_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@

################# Read the train corpus #################
train_sentences = []
with gzip.open(filepath, "rt", encoding="utf8") if filepath.endswith(".gz") else open(
filepath, encoding="utf8"
) as fIn:
with (
gzip.open(filepath, "rt", encoding="utf8") if filepath.endswith(".gz") else open(filepath, encoding="utf8") as fIn
):
for line in tqdm.tqdm(fIn, desc="Read file"):
line = line.strip()
if len(line) >= 10:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@

################# Read the train corpus #################
train_sentences = []
with gzip.open(filepath, "rt", encoding="utf8") if filepath.endswith(".gz") else open(
filepath, encoding="utf8"
) as fIn:
with (
gzip.open(filepath, "rt", encoding="utf8") if filepath.endswith(".gz") else open(filepath, encoding="utf8") as fIn
):
for line in tqdm.tqdm(fIn, desc="Read file"):
line = line.strip()
if len(line) >= 10:
Expand Down
16 changes: 10 additions & 6 deletions examples/unsupervised_learning/MLM/train_mlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,11 @@

train_sentences = []
train_path = sys.argv[2]
with gzip.open(train_path, "rt", encoding="utf8") if train_path.endswith(".gz") else open(
train_path, encoding="utf8"
) as fIn:
with (
gzip.open(train_path, "rt", encoding="utf8")
if train_path.endswith(".gz")
else open(train_path, encoding="utf8") as fIn
):
for line in fIn:
line = line.strip()
if len(line) >= 10:
Expand All @@ -61,9 +63,11 @@
dev_sentences = []
if len(sys.argv) >= 4:
dev_path = sys.argv[3]
with gzip.open(dev_path, "rt", encoding="utf8") if dev_path.endswith(".gz") else open(
dev_path, encoding="utf8"
) as fIn:
with (
gzip.open(dev_path, "rt", encoding="utf8")
if dev_path.endswith(".gz")
else open(dev_path, encoding="utf8") as fIn
):
for line in fIn:
line = line.strip()
if len(line) >= 10:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@

################# Read the train corpus #################
train_samples = []
with gzip.open(filepath, "rt", encoding="utf8") if filepath.endswith(".gz") else open(
filepath, encoding="utf8"
) as fIn:
with (
gzip.open(filepath, "rt", encoding="utf8") if filepath.endswith(".gz") else open(filepath, encoding="utf8") as fIn
):
for line in tqdm.tqdm(fIn, desc="Read file"):
line = line.strip()
if len(line) >= 10:
Expand Down
6 changes: 3 additions & 3 deletions examples/unsupervised_learning/TSDAE/train_tsdae_from_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@

################# Read the train corpus #################
train_sentences = []
with gzip.open(filepath, "rt", encoding="utf8") if filepath.endswith(".gz") else open(
filepath, encoding="utf8"
) as fIn:
with (
gzip.open(filepath, "rt", encoding="utf8") if filepath.endswith(".gz") else open(filepath, encoding="utf8") as fIn
):
for line in tqdm.tqdm(fIn, desc="Read file"):
line = line.strip()
if len(line) >= 10:
Expand Down
2 changes: 1 addition & 1 deletion index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ Using Sentence Transformer models is elementary:
pip install -U sentence-transformers
We recommend **Python 3.8+** and **PyTorch 1.11.0+**. See `installation <docs/installation.html>`_ for further installation options.
We recommend **Python 3.9+** and **PyTorch 1.11.0+**. See `installation <docs/installation.html>`_ for further installation options.

.. code-block:: python
Expand Down
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ authors = [
maintainers = [
{ name = "Tom Aarsen", email = "[email protected]" }
]
requires-python = ">=3.8"
requires-python = ">=3.9"
keywords = [
"Transformer Networks",
"BERT",
Expand All @@ -25,7 +25,6 @@ classifiers = [
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
Expand Down
3 changes: 2 additions & 1 deletion sentence_transformers/SentenceTransformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@
import traceback
import warnings
from collections import OrderedDict
from collections.abc import Iterable, Iterator
from contextlib import contextmanager
from multiprocessing import Queue
from pathlib import Path
from typing import Any, Callable, Iterable, Iterator, Literal, overload
from typing import Any, Callable, Literal, overload

import numpy as np
import torch
Expand Down
8 changes: 5 additions & 3 deletions sentence_transformers/datasets/ParallelSentencesDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,11 @@ def load_data(
logger.info("Load " + filepath)
parallel_sentences = []

with gzip.open(filepath, "rt", encoding="utf8") if filepath.endswith(".gz") else open(
filepath, encoding="utf8"
) as fIn:
with (
gzip.open(filepath, "rt", encoding="utf8")
if filepath.endswith(".gz")
else open(filepath, encoding="utf8") as fIn
):
count = 0
for line in fIn:
sentences = line.strip().split("\t")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,10 @@ def compute_metrices(

# Encode chunk of corpus
if corpus_embeddings is None:
with nullcontext() if self.truncate_dim is None else corpus_model.truncate_sentence_embeddings(
self.truncate_dim
with (
nullcontext()
if self.truncate_dim is None
else corpus_model.truncate_sentence_embeddings(self.truncate_dim)
):
sub_corpus_embeddings = corpus_model.encode(
self.corpus[corpus_start_idx:corpus_end_idx],
Expand Down
6 changes: 4 additions & 2 deletions sentence_transformers/evaluation/MSEEvaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,10 @@ def __init__(
):
super().__init__()
self.truncate_dim = truncate_dim
with nullcontext() if self.truncate_dim is None else teacher_model.truncate_sentence_embeddings(
self.truncate_dim
with (
nullcontext()
if self.truncate_dim is None
else teacher_model.truncate_sentence_embeddings(self.truncate_dim)
):
self.source_embeddings = teacher_model.encode(
source_sentences, show_progress_bar=show_progress_bar, batch_size=batch_size, convert_to_numpy=True
Expand Down
6 changes: 4 additions & 2 deletions sentence_transformers/evaluation/MSEEvaluatorFromDataFrame.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,10 @@ def __init__(
self.csv_headers.append(f"{src_lang}-{trg_lang}")

all_source_sentences = list(all_source_sentences)
with nullcontext() if self.truncate_dim is None else teacher_model.truncate_sentence_embeddings(
self.truncate_dim
with (
nullcontext()
if self.truncate_dim is None
else teacher_model.truncate_sentence_embeddings(self.truncate_dim)
):
all_src_embeddings = teacher_model.encode(all_source_sentences, batch_size=self.batch_size)
self.teacher_embeddings = {sent: emb for sent, emb in zip(all_source_sentences, all_src_embeddings)}
Expand Down
3 changes: 2 additions & 1 deletion sentence_transformers/evaluation/SequentialEvaluator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Iterable
from collections.abc import Iterable
from typing import TYPE_CHECKING

from sentence_transformers.evaluation.SentenceEvaluator import SentenceEvaluator

Expand Down
3 changes: 2 additions & 1 deletion sentence_transformers/fit_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
import logging
import os
import shutil
from collections.abc import Iterable
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable, Iterable
from typing import TYPE_CHECKING, Any, Callable

import numpy as np
import torch
Expand Down
3 changes: 2 additions & 1 deletion sentence_transformers/losses/AdaptiveLayerLoss.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

import random
import warnings
from typing import Any, Iterable
from collections.abc import Iterable
from typing import Any

import torch
from torch import Tensor, nn
Expand Down
2 changes: 1 addition & 1 deletion sentence_transformers/losses/BatchAllTripletLoss.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import Iterable
from collections.abc import Iterable

from torch import Tensor, nn

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import Iterable
from collections.abc import Iterable

import torch
from torch import Tensor
Expand Down
2 changes: 1 addition & 1 deletion sentence_transformers/losses/BatchHardTripletLoss.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import Iterable
from collections.abc import Iterable

import torch
from torch import Tensor, nn
Expand Down
2 changes: 1 addition & 1 deletion sentence_transformers/losses/BatchSemiHardTripletLoss.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import Iterable
from collections.abc import Iterable

import torch
from torch import Tensor, nn
Expand Down
3 changes: 2 additions & 1 deletion sentence_transformers/losses/CachedGISTEmbedLoss.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from __future__ import annotations

from collections.abc import Iterable, Iterator
from contextlib import nullcontext
from functools import partial
from typing import Any, Iterable, Iterator
from typing import Any

import torch
import tqdm
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from __future__ import annotations

from collections.abc import Iterable, Iterator
from contextlib import nullcontext
from functools import partial
from typing import Any, Iterable, Iterator
from typing import Any

import torch
import tqdm
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from __future__ import annotations

from collections.abc import Iterable, Iterator
from contextlib import nullcontext
from functools import partial
from typing import Any, Iterable, Iterator
from typing import Any

import torch
import tqdm
Expand Down
3 changes: 2 additions & 1 deletion sentence_transformers/losses/CoSENTLoss.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from typing import Any, Iterable
from collections.abc import Iterable
from typing import Any

import torch
from torch import Tensor, nn
Expand Down
3 changes: 2 additions & 1 deletion sentence_transformers/losses/ContrastiveLoss.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from __future__ import annotations

from collections.abc import Iterable
from enum import Enum
from typing import Any, Iterable
from typing import Any

import torch.nn.functional as F
from torch import Tensor, nn
Expand Down
2 changes: 1 addition & 1 deletion sentence_transformers/losses/ContrastiveTensionLoss.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import copy
import math
import random
from typing import Iterable
from collections.abc import Iterable

import numpy as np
import torch
Expand Down
Loading

0 comments on commit 1cb196a

Please sign in to comment.