From 9a4fcfe6cf1f4edab98b7d928a697ad9dc596746 Mon Sep 17 00:00:00 2001 From: ijl Date: Fri, 29 Jul 2022 21:40:01 +0000 Subject: [PATCH] PyDictIter --- build.rs | 9 +- ci/azure-debug.yml | 13 ++- src/ffi/dict.rs | 179 +++++++++++++++++++++++++++++++++++++ src/ffi/mod.rs | 2 + src/lib.rs | 7 +- src/serialize/dataclass.rs | 24 ++--- src/serialize/dict.rs | 83 +++++------------ 7 files changed, 232 insertions(+), 85 deletions(-) create mode 100644 src/ffi/dict.rs diff --git a/build.rs b/build.rs index 38e5a960..98c2df60 100644 --- a/build.rs +++ b/build.rs @@ -7,9 +7,12 @@ fn main() { println!("cargo:rerun-if-env-changed=CFLAGS"); println!("cargo:rerun-if-env-changed=LDFLAGS"); println!("cargo:rerun-if-env-changed=RUSTFLAGS"); + println!("cargo:rerun-if-env-changed=ORJSON_DISABLE_PYDICTITER"); println!("cargo:rerun-if-env-changed=ORJSON_DISABLE_YYJSON"); - pyo3_build_config::use_pyo3_cfgs(); + let py_cfg = pyo3_build_config::get(); + py_cfg.emit_pyo3_cfgs(); + let py_version_minor = py_cfg.version.minor; if let Some(true) = version_check::supports_feature("core_intrinsics") { println!("cargo:rustc-cfg=feature=\"intrinsics\""); @@ -19,6 +22,10 @@ fn main() { println!("cargo:rustc-cfg=feature=\"optimize\""); } + if std::env::var("ORJSON_DISABLE_PYDICTITER").is_err() && py_version_minor < 11 { + println!("cargo:rustc-cfg=feature=\"pydictiter\""); + } + if std::env::var("ORJSON_DISABLE_YYJSON").is_ok() { if std::env::var("CARGO_FEATURE_YYJSON").is_ok() { panic!("ORJSON_DISABLE_YYJSON and --features=yyjson both enabled.") diff --git a/ci/azure-debug.yml b/ci/azure-debug.yml index 5e2faa15..16f626fa 100644 --- a/ci/azure-debug.yml +++ b/ci/azure-debug.yml @@ -24,8 +24,19 @@ steps: displayName: build dependencies - bash: PATH=$(path) $(interpreter) -m pip install --user -r test/requirements.txt -r integration/requirements.txt displayName: test dependencies + +- bash: PATH=$(path) maturin build --strip $(extra) --compatibility $(compatibility) --interpreter $(interpreter) + env: + ORJSON_DISABLE_PYDICTITER: 1 + ORJSON_DISABLE_YYJSON: 1 + displayName: build debug with disabled features +- bash: PATH=$(path) $(interpreter) -m pip install --user target/wheels/orjson*.whl + displayName: install +- bash: PATH=$(path) pytest -s -rxX -v test + displayName: pytest + - bash: PATH=$(path) maturin build --strip $(extra) --compatibility $(compatibility) --interpreter $(interpreter) - displayName: build debug + displayName: build debug default - bash: PATH=$(path) $(interpreter) -m pip install --user target/wheels/orjson*.whl displayName: install - bash: PATH=$(path) pytest -s -rxX -v test diff --git a/src/ffi/dict.rs b/src/ffi/dict.rs new file mode 100644 index 00000000..85f1577b --- /dev/null +++ b/src/ffi/dict.rs @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: (Apache-2.0 OR MIT) + +#[cfg(feature = "pydictiter")] +use pyo3_ffi::{PyObject, Py_hash_t, Py_ssize_t}; + +#[cfg(feature = "pydictiter")] +use std::os::raw::{c_char, c_void}; + +// dictobject.h +#[cfg(feature = "pydictiter")] +#[repr(C)] +pub struct PyDictObject { + pub ob_refcnt: pyo3_ffi::Py_ssize_t, + pub ob_type: *mut pyo3_ffi::PyTypeObject, + pub ma_used: pyo3_ffi::Py_ssize_t, + pub ma_version_tag: u64, + pub ma_keys: *mut PyDictKeysObject, + pub ma_values: *mut *mut PyObject, +} + +// dict-common.h +#[cfg(feature = "pydictiter")] +#[repr(C)] +pub struct PyDictKeyEntry { + pub me_hash: Py_hash_t, + pub me_key: *mut PyObject, + pub me_value: *mut PyObject, +} + +// dict-common.h +#[cfg(feature = "pydictiter")] +#[repr(C)] +pub struct PyDictKeysObject { + pub dk_refcnt: Py_ssize_t, + pub dk_size: Py_ssize_t, + pub dk_lookup: *mut c_void, // dict_lookup_func + pub dk_usable: Py_ssize_t, + pub dk_nentries: Py_ssize_t, + pub dk_indices: [c_char; 1], +} + +// dictobject.c +#[allow(non_snake_case)] +#[cfg(feature = "pydictiter")] +#[cfg(target_pointer_width = "64")] +fn DK_IXSIZE(dk: *mut PyDictKeysObject) -> isize { + unsafe { + if (*dk).dk_size <= 0xff { + 1 + } else if (*dk).dk_size <= 0xffff { + 2 + } else if (*dk).dk_size <= 0xffffffff { + 4 + } else { + 8 + } + } +} + +// dictobject.c +#[allow(non_snake_case)] +#[cfg(feature = "pydictiter")] +#[cfg(target_pointer_width = "32")] +fn DK_IXSIZE(dk: *mut PyDictKeysObject) -> isize { + unsafe { + if (*dk).dk_size <= 0xff { + 1 + } else if (*dk).dk_size <= 0xffff { + 2 + } else { + 4 + } + } +} + +// dictobject.c +#[allow(non_snake_case)] +#[cfg(feature = "pydictiter")] +fn DK_ENTRIES(dk: *mut PyDictKeysObject) -> *mut PyDictKeyEntry { + unsafe { + std::mem::transmute::<*mut [c_char; 1], *mut u8>(std::ptr::addr_of_mut!((*dk).dk_indices)) + .offset((*dk).dk_size * DK_IXSIZE(dk)) as *mut PyDictKeyEntry + } +} + +#[cfg(feature = "pydictiter")] +pub struct PyDictIter { + idx: usize, + len: usize, + values_ptr: *mut *mut pyo3_ffi::PyObject, + indices_ptr: *mut PyDictKeyEntry, +} + +#[cfg(feature = "pydictiter")] +impl PyDictIter { + #[inline] + pub fn from_pyobject(obj: *mut pyo3_ffi::PyObject) -> Self { + unsafe { + let dict_ptr = obj as *mut PyDictObject; + PyDictIter { + indices_ptr: DK_ENTRIES((*dict_ptr).ma_keys), + values_ptr: (*dict_ptr).ma_values, + idx: 0, + len: (*(*dict_ptr).ma_keys).dk_nentries as usize, + } + } + } +} + +#[cfg(feature = "pydictiter")] +impl Iterator for PyDictIter { + type Item = (*mut pyo3_ffi::PyObject, *mut pyo3_ffi::PyObject); + + #[inline] + fn next(&mut self) -> Option { + unsafe { + if unlikely!(self.idx >= self.len) { + None + } else if !self.values_ptr.is_null() { + let key = (*(self.indices_ptr.add(self.idx))).me_key; + let value = (*self.values_ptr).add(self.idx); + self.idx += 1; + Some((key, value)) + } else { + let mut entry_ptr = self.indices_ptr.add(self.idx); + while self.idx < self.len && (*entry_ptr).me_value.is_null() { + entry_ptr = entry_ptr.add(1); + self.idx += 1; + } + self.idx += 1; + Some(((*entry_ptr).me_key, (*entry_ptr).me_value)) + } + } + } +} + +#[cfg(not(feature = "pydictiter"))] +pub struct PyDictIter { + dict_ptr: *mut pyo3_ffi::PyObject, + pos: isize, +} + +#[cfg(not(feature = "pydictiter"))] +impl PyDictIter { + #[inline] + pub fn from_pyobject(obj: *mut pyo3_ffi::PyObject) -> Self { + unsafe { + PyDictIter { + dict_ptr: obj, + pos: 0, + } + } + } +} + +#[cfg(not(feature = "pydictiter"))] +impl Iterator for PyDictIter { + type Item = (*mut pyo3_ffi::PyObject, *mut pyo3_ffi::PyObject); + + #[inline] + fn next(&mut self) -> Option { + let mut key: *mut pyo3_ffi::PyObject = std::ptr::null_mut(); + let mut value: *mut pyo3_ffi::PyObject = std::ptr::null_mut(); + unsafe { + if pyo3_ffi::_PyDict_Next( + self.dict_ptr, + &mut self.pos, + &mut key, + &mut value, + std::ptr::null_mut(), + ) == 1 + { + Some((key, value)) + } else { + None + } + } + } +} diff --git a/src/ffi/mod.rs b/src/ffi/mod.rs index 2a155898..2b7e2481 100644 --- a/src/ffi/mod.rs +++ b/src/ffi/mod.rs @@ -2,8 +2,10 @@ mod buffer; mod bytes; +mod dict; mod pytype; pub use buffer::*; pub use bytes::*; +pub use dict::*; pub use pytype::*; diff --git a/src/lib.rs b/src/lib.rs index e1a44eac..1277becc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -366,12 +366,7 @@ pub unsafe extern "C" fn dumps( } if !kwds.is_null() { - let len = ffi!(Py_SIZE(kwds)); - let mut pos = 0isize; - let mut arg: *mut PyObject = null_mut(); - let mut val: *mut PyObject = null_mut(); - for _ in 0..=len.saturating_sub(1) { - unsafe { _PyDict_Next(kwds, &mut pos, &mut arg, &mut val, null_mut()) }; + for (arg, val) in crate::ffi::PyDictIter::from_pyobject(kwds) { if arg == typeref::DEFAULT { if unlikely!(num_args & 2 == 2) { return raise_dumps_exception(Cow::Borrowed( diff --git a/src/serialize/dataclass.rs b/src/serialize/dataclass.rs index 5ca779e7..5f0301fd 100644 --- a/src/serialize/dataclass.rs +++ b/src/serialize/dataclass.rs @@ -6,6 +6,7 @@ use crate::serialize::serializer::*; use crate::typeref::*; use crate::unicode::*; +use crate::ffi::PyDictIter; use serde::ser::{Serialize, SerializeMap, Serializer}; use std::ptr::addr_of_mut; @@ -125,19 +126,7 @@ impl Serialize for DataclassFallbackSerializer { return serializer.serialize_map(Some(0)).unwrap().end(); } let mut map = serializer.serialize_map(None).unwrap(); - let mut pos = 0isize; - let mut attr: *mut pyo3_ffi::PyObject = std::ptr::null_mut(); - let mut field: *mut pyo3_ffi::PyObject = std::ptr::null_mut(); - for _ in 0..=len - 1 { - unsafe { - pyo3_ffi::_PyDict_Next( - fields, - addr_of_mut!(pos), - addr_of_mut!(attr), - addr_of_mut!(field), - std::ptr::null_mut(), - ) - }; + for (attr, field) in PyDictIter::from_pyobject(fields) { let field_type = ffi!(PyObject_GetAttr(field, FIELD_TYPE_STR)); ffi!(Py_DECREF(field_type)); if unsafe { field_type != FIELD_TYPE.as_ptr() } { @@ -154,15 +143,16 @@ impl Serialize for DataclassFallbackSerializer { let value = ffi!(PyObject_GetAttr(self.ptr, attr)); ffi!(Py_DECREF(value)); - - map.serialize_key(key_as_str).unwrap(); - map.serialize_value(&PyObjectSerializer::new( + let pyvalue = PyObjectSerializer::new( value, self.opts, self.default_calls, self.recursion + 1, self.default, - ))? + ); + + map.serialize_key(key_as_str).unwrap(); + map.serialize_value(&pyvalue)? } map.end() } diff --git a/src/serialize/dict.rs b/src/serialize/dict.rs index 04a8239f..2247931c 100644 --- a/src/serialize/dict.rs +++ b/src/serialize/dict.rs @@ -1,5 +1,6 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) +use crate::ffi::PyDictIter; use crate::opt::*; use crate::serialize::datetime::*; use crate::serialize::datetimelike::*; @@ -12,7 +13,6 @@ use crate::unicode::*; use compact_str::CompactString; use serde::ser::{Serialize, SerializeMap, Serializer}; use smallvec::SmallVec; -use std::ptr::addr_of_mut; use std::ptr::NonNull; pub struct Dict { @@ -48,19 +48,7 @@ impl Serialize for Dict { S: Serializer, { let mut map = serializer.serialize_map(None).unwrap(); - let mut pos = 0isize; - let mut key: *mut pyo3_ffi::PyObject = std::ptr::null_mut(); - let mut value: *mut pyo3_ffi::PyObject = std::ptr::null_mut(); - for _ in 0..ffi!(Py_SIZE(self.ptr)) as usize { - unsafe { - pyo3_ffi::_PyDict_Next( - self.ptr, - addr_of_mut!(pos), - addr_of_mut!(key), - addr_of_mut!(value), - std::ptr::null_mut(), - ) - }; + for (key, value) in PyDictIter::from_pyobject(self.ptr) { if unlikely!(unsafe { ob_type!(key) != STR_TYPE }) { err!(SerializeError::KeyMustBeStr) } @@ -117,19 +105,7 @@ impl Serialize for DictSortedKey { let len = ffi!(Py_SIZE(self.ptr)) as usize; let mut items: SmallVec<[(&str, *mut pyo3_ffi::PyObject); 8]> = SmallVec::with_capacity(len); - let mut pos = 0isize; - let mut key: *mut pyo3_ffi::PyObject = std::ptr::null_mut(); - let mut value: *mut pyo3_ffi::PyObject = std::ptr::null_mut(); - for _ in 0..=len - 1 { - unsafe { - pyo3_ffi::_PyDict_Next( - self.ptr, - addr_of_mut!(pos), - addr_of_mut!(key), - addr_of_mut!(value), - std::ptr::null_mut(), - ) - }; + for (key, value) in PyDictIter::from_pyobject(self.ptr) { if unlikely!(unsafe { ob_type!(key) != STR_TYPE }) { err!(SerializeError::KeyMustBeStr) } @@ -144,16 +120,15 @@ impl Serialize for DictSortedKey { let mut map = serializer.serialize_map(None).unwrap(); for (key, val) in items.iter() { - map.serialize_entry( - key, - &PyObjectSerializer::new( - *val, - self.opts, - self.default_calls, - self.recursion + 1, - self.default, - ), - )?; + let pyvalue = PyObjectSerializer::new( + *val, + self.opts, + self.default_calls, + self.recursion + 1, + self.default, + ); + map.serialize_key(key).unwrap(); + map.serialize_value(&pyvalue)?; } map.end() } @@ -292,20 +267,8 @@ impl Serialize for DictNonStrKey { let len = ffi!(Py_SIZE(self.ptr)) as usize; let mut items: SmallVec<[(CompactString, *mut pyo3_ffi::PyObject); 8]> = SmallVec::with_capacity(len); - let mut pos = 0isize; - let mut key: *mut pyo3_ffi::PyObject = std::ptr::null_mut(); - let mut value: *mut pyo3_ffi::PyObject = std::ptr::null_mut(); let opts = self.opts & NOT_PASSTHROUGH; - for _ in 0..=len - 1 { - unsafe { - pyo3_ffi::_PyDict_Next( - self.ptr, - addr_of_mut!(pos), - addr_of_mut!(key), - addr_of_mut!(value), - std::ptr::null_mut(), - ) - }; + for (key, value) in PyDictIter::from_pyobject(self.ptr) { if is_type!(ob_type!(key), STR_TYPE) { let uni = unicode_to_str(key); if unlikely!(uni.is_none()) { @@ -326,16 +289,16 @@ impl Serialize for DictNonStrKey { let mut map = serializer.serialize_map(None).unwrap(); for (key, val) in items.iter() { - map.serialize_entry( - str_from_slice!(key.as_ptr(), key.len()), - &PyObjectSerializer::new( - *val, - self.opts, - self.default_calls, - self.recursion + 1, - self.default, - ), - )?; + let pyvalue = PyObjectSerializer::new( + *val, + self.opts, + self.default_calls, + self.recursion + 1, + self.default, + ); + let key_as_str = str_from_slice!(key.as_ptr(), key.len()); + map.serialize_key(key_as_str).unwrap(); + map.serialize_value(&pyvalue)?; } map.end() }