Skip to content

Commit

Permalink
Upgrade to PyO3 0.23 (#1708)
Browse files Browse the repository at this point in the history
* Upgrade to PyO3 0.23

* Macos-12 deprecated?

* Clippy.

* Clippy auto ellision.
  • Loading branch information
Narsil authored Dec 31, 2024
1 parent 555d44c commit 3a6504d
Show file tree
Hide file tree
Showing 19 changed files with 283 additions and 154 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ jobs:
strategy:
matrix:
platform:
- runner: macos-12
- runner: macos-13
target: x86_64
- runner: macos-14
target: aarch64
Expand Down
8 changes: 4 additions & 4 deletions bindings/python/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,17 @@ serde = { version = "1.0", features = ["rc", "derive"] }
serde_json = "1.0"
libc = "0.2"
env_logger = "0.11"
pyo3 = { version = "0.22", features = ["abi3", "abi3-py39"] }
numpy = "0.22"
ndarray = "0.15"
pyo3 = { version = "0.23", features = ["abi3", "abi3-py39"] }
numpy = "0.23"
ndarray = "0.16"
itertools = "0.12"

[dependencies.tokenizers]
path = "../../tokenizers"

[dev-dependencies]
tempfile = "3.10"
pyo3 = { version = "0.22", features = ["auto-initialize"] }
pyo3 = { version = "0.23", features = ["auto-initialize"] }

[features]
defaut = ["pyo3/extension-module"]
1 change: 1 addition & 0 deletions bindings/python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ dynamic = [
'description',
'license',
'readme',
'version',
]
dependencies = ["huggingface_hub>=0.16.4,<1.0"]

Expand Down
73 changes: 52 additions & 21 deletions bindings/python/src/decoders.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,22 +43,48 @@ impl PyDecoder {
pub(crate) fn get_as_subtype(&self, py: Python<'_>) -> PyResult<PyObject> {
let base = self.clone();
Ok(match &self.decoder {
PyDecoderWrapper::Custom(_) => Py::new(py, base)?.into_py(py),
PyDecoderWrapper::Custom(_) => Py::new(py, base)?.into_pyobject(py)?.into_any().into(),
PyDecoderWrapper::Wrapped(inner) => match &*inner.as_ref().read().unwrap() {
DecoderWrapper::Metaspace(_) => Py::new(py, (PyMetaspaceDec {}, base))?.into_py(py),
DecoderWrapper::WordPiece(_) => Py::new(py, (PyWordPieceDec {}, base))?.into_py(py),
DecoderWrapper::ByteFallback(_) => {
Py::new(py, (PyByteFallbackDec {}, base))?.into_py(py)
}
DecoderWrapper::Strip(_) => Py::new(py, (PyStrip {}, base))?.into_py(py),
DecoderWrapper::Fuse(_) => Py::new(py, (PyFuseDec {}, base))?.into_py(py),
DecoderWrapper::ByteLevel(_) => Py::new(py, (PyByteLevelDec {}, base))?.into_py(py),
DecoderWrapper::Replace(_) => Py::new(py, (PyReplaceDec {}, base))?.into_py(py),
DecoderWrapper::BPE(_) => Py::new(py, (PyBPEDecoder {}, base))?.into_py(py),
DecoderWrapper::CTC(_) => Py::new(py, (PyCTCDecoder {}, base))?.into_py(py),
DecoderWrapper::Sequence(_) => {
Py::new(py, (PySequenceDecoder {}, base))?.into_py(py)
}
DecoderWrapper::Metaspace(_) => Py::new(py, (PyMetaspaceDec {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
DecoderWrapper::WordPiece(_) => Py::new(py, (PyWordPieceDec {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
DecoderWrapper::ByteFallback(_) => Py::new(py, (PyByteFallbackDec {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
DecoderWrapper::Strip(_) => Py::new(py, (PyStrip {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
DecoderWrapper::Fuse(_) => Py::new(py, (PyFuseDec {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
DecoderWrapper::ByteLevel(_) => Py::new(py, (PyByteLevelDec {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
DecoderWrapper::Replace(_) => Py::new(py, (PyReplaceDec {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
DecoderWrapper::BPE(_) => Py::new(py, (PyBPEDecoder {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
DecoderWrapper::CTC(_) => Py::new(py, (PyCTCDecoder {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
DecoderWrapper::Sequence(_) => Py::new(py, (PySequenceDecoder {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
},
})
}
Expand All @@ -85,7 +111,7 @@ impl PyDecoder {
e
))
})?;
Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
Ok(PyBytes::new(py, data.as_bytes()).into())
}

fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
Expand Down Expand Up @@ -484,8 +510,8 @@ impl PySequenceDecoder {
Ok((PySequenceDecoder {}, Sequence::new(decoders).into()))
}

fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
PyTuple::new_bound(py, [PyList::empty_bound(py)])
fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<Bound<'p, PyTuple>> {
PyTuple::new(py, [PyList::empty(py)])
}
}

Expand All @@ -504,7 +530,7 @@ impl Decoder for CustomDecoder {
Python::with_gil(|py| {
let decoded = self
.inner
.call_method_bound(py, "decode", (tokens,), None)?
.call_method(py, "decode", (tokens,), None)?
.extract(py)?;
Ok(decoded)
})
Expand All @@ -514,7 +540,7 @@ impl Decoder for CustomDecoder {
Python::with_gil(|py| {
let decoded = self
.inner
.call_method_bound(py, "decode_chain", (tokens,), None)?
.call_method(py, "decode_chain", (tokens,), None)?
.extract(py)?;
Ok(decoded)
})
Expand Down Expand Up @@ -693,7 +719,12 @@ mod test {

let obj = Python::with_gil(|py| {
let py_msp = PyDecoder::new(Metaspace::default().into());
let obj: PyObject = Py::new(py, py_msp).unwrap().into_py(py);
let obj: PyObject = Py::new(py, py_msp)
.unwrap()
.into_pyobject(py)
.unwrap()
.into_any()
.into();
obj
});
let py_seq = PyDecoderWrapper::Custom(Arc::new(RwLock::new(CustomDecoder::new(obj))));
Expand Down
2 changes: 1 addition & 1 deletion bindings/python/src/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ impl PyEncoding {
e
))
})?;
Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
Ok(PyBytes::new(py, data.as_bytes()).into())
}

fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
Expand Down
5 changes: 3 additions & 2 deletions bindings/python/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use pyo3::exceptions;
use pyo3::prelude::*;
use pyo3::type_object::PyTypeInfo;
use std::ffi::CString;
use std::fmt::{Display, Formatter, Result as FmtResult};
use tokenizers::tokenizer::Result;

Expand Down Expand Up @@ -35,7 +36,7 @@ impl<T> ToPyResult<T> {
}

pub(crate) fn deprecation_warning(py: Python<'_>, version: &str, message: &str) -> PyResult<()> {
let deprecation_warning = py.import_bound("builtins")?.getattr("DeprecationWarning")?;
let deprecation_warning = py.import("builtins")?.getattr("DeprecationWarning")?;
let full_message = format!("Deprecated in {}: {}", version, message);
pyo3::PyErr::warn_bound(py, &deprecation_warning, &full_message, 0)
pyo3::PyErr::warn(py, &deprecation_warning, &CString::new(full_message)?, 0)
}
22 changes: 17 additions & 5 deletions bindings/python/src/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,22 @@ impl PyModel {
pub(crate) fn get_as_subtype(&self, py: Python<'_>) -> PyResult<PyObject> {
let base = self.clone();
Ok(match *self.model.as_ref().read().unwrap() {
ModelWrapper::BPE(_) => Py::new(py, (PyBPE {}, base))?.into_py(py),
ModelWrapper::WordPiece(_) => Py::new(py, (PyWordPiece {}, base))?.into_py(py),
ModelWrapper::WordLevel(_) => Py::new(py, (PyWordLevel {}, base))?.into_py(py),
ModelWrapper::Unigram(_) => Py::new(py, (PyUnigram {}, base))?.into_py(py),
ModelWrapper::BPE(_) => Py::new(py, (PyBPE {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
ModelWrapper::WordPiece(_) => Py::new(py, (PyWordPiece {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
ModelWrapper::WordLevel(_) => Py::new(py, (PyWordLevel {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
ModelWrapper::Unigram(_) => Py::new(py, (PyUnigram {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
})
}
}
Expand Down Expand Up @@ -105,7 +117,7 @@ impl PyModel {
e
))
})?;
Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
Ok(PyBytes::new(py, data.as_bytes()).into())
}

fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
Expand Down
97 changes: 66 additions & 31 deletions bindings/python/src/normalizers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,38 +54,73 @@ impl PyNormalizer {
pub(crate) fn get_as_subtype(&self, py: Python<'_>) -> PyResult<PyObject> {
let base = self.clone();
Ok(match self.normalizer {
PyNormalizerTypeWrapper::Sequence(_) => Py::new(py, (PySequence {}, base))?.into_py(py),
PyNormalizerTypeWrapper::Sequence(_) => Py::new(py, (PySequence {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
PyNormalizerTypeWrapper::Single(ref inner) => match &*inner.as_ref().read().unwrap() {
PyNormalizerWrapper::Custom(_) => Py::new(py, base)?.into_py(py),
PyNormalizerWrapper::Custom(_) => {
Py::new(py, base)?.into_pyobject(py)?.into_any().into()
}
PyNormalizerWrapper::Wrapped(ref inner) => match inner {
NormalizerWrapper::Sequence(_) => {
Py::new(py, (PySequence {}, base))?.into_py(py)
}
NormalizerWrapper::Sequence(_) => Py::new(py, (PySequence {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
NormalizerWrapper::BertNormalizer(_) => {
Py::new(py, (PyBertNormalizer {}, base))?.into_py(py)
}
NormalizerWrapper::StripNormalizer(_) => {
Py::new(py, (PyStrip {}, base))?.into_py(py)
}
NormalizerWrapper::Prepend(_) => Py::new(py, (PyPrepend {}, base))?.into_py(py),
NormalizerWrapper::ByteLevel(_) => {
Py::new(py, (PyByteLevel {}, base))?.into_py(py)
}
NormalizerWrapper::StripAccents(_) => {
Py::new(py, (PyStripAccents {}, base))?.into_py(py)
}
NormalizerWrapper::NFC(_) => Py::new(py, (PyNFC {}, base))?.into_py(py),
NormalizerWrapper::NFD(_) => Py::new(py, (PyNFD {}, base))?.into_py(py),
NormalizerWrapper::NFKC(_) => Py::new(py, (PyNFKC {}, base))?.into_py(py),
NormalizerWrapper::NFKD(_) => Py::new(py, (PyNFKD {}, base))?.into_py(py),
NormalizerWrapper::Lowercase(_) => {
Py::new(py, (PyLowercase {}, base))?.into_py(py)
}
NormalizerWrapper::Precompiled(_) => {
Py::new(py, (PyPrecompiled {}, base))?.into_py(py)
Py::new(py, (PyBertNormalizer {}, base))?
.into_pyobject(py)?
.into_any()
.into()
}
NormalizerWrapper::Replace(_) => Py::new(py, (PyReplace {}, base))?.into_py(py),
NormalizerWrapper::Nmt(_) => Py::new(py, (PyNmt {}, base))?.into_py(py),
NormalizerWrapper::StripNormalizer(_) => Py::new(py, (PyStrip {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
NormalizerWrapper::Prepend(_) => Py::new(py, (PyPrepend {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
NormalizerWrapper::ByteLevel(_) => Py::new(py, (PyByteLevel {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
NormalizerWrapper::StripAccents(_) => Py::new(py, (PyStripAccents {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
NormalizerWrapper::NFC(_) => Py::new(py, (PyNFC {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
NormalizerWrapper::NFD(_) => Py::new(py, (PyNFD {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
NormalizerWrapper::NFKC(_) => Py::new(py, (PyNFKC {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
NormalizerWrapper::NFKD(_) => Py::new(py, (PyNFKD {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
NormalizerWrapper::Lowercase(_) => Py::new(py, (PyLowercase {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
NormalizerWrapper::Precompiled(_) => Py::new(py, (PyPrecompiled {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
NormalizerWrapper::Replace(_) => Py::new(py, (PyReplace {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
NormalizerWrapper::Nmt(_) => Py::new(py, (PyNmt {}, base))?
.into_pyobject(py)?
.into_any()
.into(),
},
},
})
Expand Down Expand Up @@ -114,7 +149,7 @@ impl PyNormalizer {
e
))
})?;
Ok(PyBytes::new_bound(py, data.as_bytes()).to_object(py))
Ok(PyBytes::new(py, data.as_bytes()).into())
}

fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> {
Expand Down Expand Up @@ -371,8 +406,8 @@ impl PySequence {
))
}

fn __getnewargs__<'p>(&self, py: Python<'p>) -> Bound<'p, PyTuple> {
PyTuple::new_bound(py, [PyList::empty_bound(py)])
fn __getnewargs__<'p>(&self, py: Python<'p>) -> PyResult<Bound<'p, PyTuple>> {
PyTuple::new(py, [PyList::empty(py)])
}

fn __len__(&self) -> usize {
Expand Down
Loading

0 comments on commit 3a6504d

Please sign in to comment.