Skip to content

Commit

Permalink
Support get_entity, put_entity, columns of Widecolumns (#126)
Browse files Browse the repository at this point in the history
  • Loading branch information
Congyuwang authored May 15, 2024
1 parent 128ef0f commit 88724dc
Show file tree
Hide file tree
Showing 13 changed files with 684 additions and 569 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/rust-clippy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v2
with:
submodules: recursive

- name: Install Rust toolchain
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af #@v1
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "RocksDict"
version = "0.3.23"
version = "0.3.24-beta0"
edition = "2021"
description = "Rocksdb Python Binding"

Expand Down
860 changes: 300 additions & 560 deletions docs/rocksdict.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/search.js

Large diffs are not rendered by default.

59 changes: 59 additions & 0 deletions examples/wide_columns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from rocksdict import Rdict, Options

test_dict = None
opt = None
path = "./temp_wide_columns"

opt = Options()
opt.create_if_missing(True)
test_dict = Rdict(path, opt)

# write
test_dict.put_entity(key="Guangdong", names=["population", "language", "city"], values=[1.27, "Cantonese", "Shenzhen"]);
test_dict.put_entity(key="Sichuan", names=["language", "city"], values=["Mandarin", "Chengdu"]);

# read
assert test_dict["Guangdong"] == ""
assert test_dict.get_entity("Guangdong") == [("city", "Shenzhen"), ("language", "Cantonese"), ("population", 1.27)]

assert test_dict["Sichuan"] == ""
assert test_dict.get_entity("Sichuan") == [("city", "Chengdu"), ("language", "Mandarin")]

# overwrite
test_dict.put_entity(key="Sichuan", names=["language", "city"], values=["Sichuanhua", "Chengdu"]);
test_dict["Beijing"] = "Beijing"

# assertions
assert test_dict["Beijing"] == "Beijing"
assert test_dict.get_entity("Beijing") == [("", "Beijing")]

assert test_dict["Guangdong"] == ""
assert test_dict.get_entity("Guangdong") == [("city", "Shenzhen"), ("language", "Cantonese"), ("population", 1.27)]

assert test_dict["Sichuan"] == ""
assert test_dict.get_entity("Sichuan") == [("city", "Chengdu"), ("language", "Sichuanhua")]

# iterator
it = test_dict.iter()
it.seek_to_first()

assert it.valid()
assert it.key() == "Beijing"
assert it.value() == "Beijing"
assert it.columns() == [("", "Beijing")]

it.next()
assert it.valid()
assert it.key() == "Guangdong"
assert it.value() == ""
assert it.columns() == [("city", "Shenzhen"), ("language", "Cantonese"), ("population", 1.27)]

it.next()
assert it.valid()
assert it.key() == "Sichuan"
assert it.value() == ""
assert it.columns() == [("city", "Chengdu"), ("language", "Sichuanhua")]

del it, test_dict

Rdict.destroy(path)
59 changes: 59 additions & 0 deletions examples/wide_columns_raw.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from rocksdict import Rdict, Options

test_dict = None
opt = None
path = "./temp_wide_columns_raw"

opt = Options(True)
opt.create_if_missing(True)
test_dict = Rdict(path, opt)

# write
test_dict.put_entity(key=b"Guangdong", names=[b"language", b"city"], values=[b"Cantonese", b"Shenzhen"]);
test_dict.put_entity(key=b"Sichuan", names=[b"language", b"city"], values=[b"Mandarin", b"Chengdu"]);

# read
assert test_dict[b"Guangdong"] == b""
assert test_dict.get_entity(b"Guangdong") == [(b"city", b"Shenzhen"), (b"language", b"Cantonese")]

assert test_dict[b"Sichuan"] == b""
assert test_dict.get_entity(b"Sichuan") == [(b"city", b"Chengdu"), (b"language", b"Mandarin")]

# overwrite
test_dict.put_entity(key=b"Sichuan", names=[b"language", b"city"], values=[b"Sichuanhua", b"Chengdu"]);
test_dict[b"Beijing"] = b"Beijing"

# assertions
assert test_dict[b"Beijing"] == b"Beijing"
assert test_dict.get_entity(b"Beijing") == [(b"", b"Beijing")]

assert test_dict[b"Guangdong"] == b""
assert test_dict.get_entity(b"Guangdong") == [(b"city", b"Shenzhen"), (b"language", b"Cantonese")]

assert test_dict[b"Sichuan"] == b""
assert test_dict.get_entity(b"Sichuan") == [(b"city", b"Chengdu"), (b"language", b"Sichuanhua")]

# iterator
it = test_dict.iter()
it.seek_to_first()

assert it.valid()
assert it.key() == b"Beijing"
assert it.value() == b"Beijing"
assert it.columns() == [(b"", b"Beijing")]

it.next()
assert it.valid()
assert it.key() == b"Guangdong"
assert it.value() == b""
assert it.columns() == [(b"city", b"Shenzhen"), (b"language", b"Cantonese")]

it.next()
assert it.valid()
assert it.key() == b"Sichuan"
assert it.value() == b""
assert it.columns() == [(b"city", b"Chengdu"), (b"language", b"Sichuanhua")]

del it, test_dict

Rdict.destroy(path)
2 changes: 1 addition & 1 deletion gen_doc.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash

maturin develop --release || exit
just develop
pdoc -o ./docs/ -d google rocksdict
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "rocksdict"
version = "0.3.23"
version = "0.3.24-beta0"
description = "Rocksdb Python Binding"
long_description_content_type = "text/markdown"
readme = "README.md"
Expand Down
10 changes: 10 additions & 0 deletions python/rocksdict/rocksdict.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -389,10 +389,19 @@ class Rdict:
key: Union[str, int, float, bytes, bool, List[Union[str, int, float, bytes, bool]]],
default: Any = None,
read_opt: Union[ReadOptions, None] = None) -> Any | None: ...
def get_entity(self,
key: Union[str, int, float, bytes, bool, List[Union[str, int, float, bytes, bool]]],
default: Any = None,
read_opt: Union[ReadOptions, None] = None) -> List[Tuple[Any, Any]] | None: ...
def put(self,
key: Union[str, int, float, bytes, bool],
value: Any,
write_opt: Union[WriteOptions, None] = None) -> None: ...
def put_entity(self,
key: Union[str, int, float, bytes, bool],
names: List[Any],
values: List[Any],
write_opt: Union[WriteOptions, None] = None) -> None: ...
def delete(self, key: Union[str, int, float, bytes, bool], write_opt: Union[WriteOptions, None] = None) -> None: ...
def key_may_exist(self,
key: Union[str, int, float, bytes, bool],
Expand Down Expand Up @@ -464,6 +473,7 @@ class RdictIter:
def prev(self) -> None: ...
def key(self) -> Any: ...
def value(self) -> Any: ...
def columns(self) -> List[Tuple[Any, Any]]: ...

class IngestExternalFileOptions:
def __init__(self) -> None: ...
Expand Down
3 changes: 2 additions & 1 deletion src/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,8 @@ pub(crate) fn decode_value(
return Ok(PyBytes::new_bound(py, bytes).to_object(py));
}
match bytes.first() {
None => Err(PyException::new_err("Unknown value type")),
// deal with empty value returned by entities
None => Ok(PyString::new_bound(py, "").to_object(py)),
Some(byte) => match byte {
1 => Ok(PyBytes::new_bound(py, &bytes[1..]).to_object(py)),
2 => {
Expand Down
27 changes: 26 additions & 1 deletion src/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ use core::slice;
use libc::{c_char, c_uchar, size_t};
use pyo3::exceptions::PyException;
use pyo3::prelude::*;
use rocksdb::{AsColumnFamilyRef, UnboundColumnFamily};
use pyo3::types::{PyList, PyTuple};
use rocksdb::{AsColumnFamilyRef, Iterable as _, UnboundColumnFamily};
use std::ptr::null_mut;
use std::sync::Arc;

Expand Down Expand Up @@ -285,6 +286,30 @@ impl RdictIter {
Ok(py.None())
}
}

/// Returns the current wide-column.
///
/// Returns:
/// A list of `(name, value)` tuples.
/// If the value is not an entity, returns a single-column
/// with default column name (empty bytes/string).
/// None or default value if the key does not exist.
pub fn columns(&self, py: Python) -> PyResult<PyObject> {
if self.valid() {
let columns = unsafe {
rocksdb::WideColumns::from_c(librocksdb_sys::rocksdb_iter_columns(self.inner))
};
let result = PyList::empty_bound(py);
for column in columns.iter() {
let name = decode_value(py, column.name, &self.loads, self.raw_mode)?;
let value = decode_value(py, column.value, &self.loads, self.raw_mode)?;
result.append(PyTuple::new_bound(py, [name, value]))?;
}
Ok(result.to_object(py))
} else {
Ok(py.None())
}
}
}

impl Drop for RdictIter {
Expand Down
119 changes: 116 additions & 3 deletions src/rdict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ use crate::{
};
use pyo3::exceptions::{PyException, PyKeyError};
use pyo3::prelude::*;
use pyo3::types::{PyDict, PyList};
use pyo3::types::{PyDict, PyList, PyTuple};
use rocksdb::{
ColumnFamilyDescriptor, FlushOptions, LiveFile, ReadOptions, UnboundColumnFamily, WriteOptions,
DEFAULT_COLUMN_FAMILY_NAME,
ColumnFamilyDescriptor, FlushOptions, Iterable as _, LiveFile, ReadOptions,
UnboundColumnFamily, WriteOptions, DEFAULT_COLUMN_FAMILY_NAME,
};
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
Expand Down Expand Up @@ -406,6 +406,69 @@ impl Rdict {
}
}

/// Get a wide-column from a key.
///
/// Args:
/// key: a single key or list of keys.
/// default: the default value to return if key not found.
/// read_opt: override preset read options
/// (or use Rdict.set_read_options to preset a read options used by default).
///
/// Returns:
/// A list of `(name, value)` tuples.
/// If the value is not an entity, returns a single-column
/// with default column name (empty bytes/string).
/// None or default value if the key does not exist.
#[inline]
#[pyo3(signature = (key, default = None, read_opt = None))]
fn get_entity(
&self,
key: &Bound<PyAny>,
default: Option<&Bound<PyAny>>,
read_opt: Option<&ReadOptionsPy>,
py: Python,
) -> PyResult<Option<PyObject>> {
let db = self.get_db()?;
let read_opt_option = match read_opt {
None => None,
Some(opt) => Some(opt.to_read_options(self.opt_py.raw_mode, py)?),
};
let read_opt = match &read_opt_option {
None => &self.read_opt,
Some(opt) => opt,
};
let cf = match &self.column_family {
None => {
self.get_column_family_handle(DEFAULT_COLUMN_FAMILY_NAME)?
.cf
}
Some(cf) => cf.clone(),
};
let key_bytes = encode_key(key, self.opt_py.raw_mode)?;
let column_result = db
.get_entity_cf_opt(&cf, key_bytes, read_opt)
.map_err(|e| PyException::new_err(e.to_string()))?;
match column_result {
None => {
// try to return default value
if let Some(default) = default {
Ok(Some(default.to_object(py)))
} else {
Ok(None)
}
}
Some(columns) => {
let result = PyList::empty_bound(py);
for column in columns.iter() {
let name = decode_value(py, column.name, &self.loads, self.opt_py.raw_mode)?;
let value = decode_value(py, column.value, &self.loads, self.opt_py.raw_mode)?;
result.append(PyTuple::new_bound(py, [name, value]))?;
}
Ok(Some(result.to_object(py)))
}
}
}

fn __setitem__(&self, key: &Bound<PyAny>, value: &Bound<PyAny>) -> PyResult<()> {
self.put(key, value, None)
}
Expand Down Expand Up @@ -441,6 +504,56 @@ impl Rdict {
.map_err(|e| PyException::new_err(e.to_string()))
}

/// Insert a wide-column.
///
/// The length of `names` and `values` must be the same.
///
/// Args:
/// key: the key.
/// names: the names of the columns.
/// values: the values of the columns.
/// write_opt: override preset write options
/// (or use Rdict.set_write_options to preset a write options used by default).
#[inline]
#[pyo3(signature = (key, names, values, write_opt = None))]
fn put_entity(
&self,
key: &Bound<PyAny>,
names: Vec<Bound<PyAny>>,
values: Vec<Bound<PyAny>>,
write_opt: Option<&WriteOptionsPy>,
) -> PyResult<()> {
let db = self.get_db()?;
let key = encode_key(key, self.opt_py.raw_mode)?;
let write_opt_option = write_opt.map(WriteOptions::from);
let write_opt = match &write_opt_option {
None => &self.write_opt,
Some(opt) => opt,
};
let cf = match &self.column_family {
None => {
self.get_column_family_handle(DEFAULT_COLUMN_FAMILY_NAME)?
.cf
}
Some(cf) => cf.clone(),
};
if names.len() != values.len() {
return Err(PyException::new_err(
"names and values must have the same length",
));
}
let mut names_vec = Vec::with_capacity(names.len());
let mut values_vec = Vec::with_capacity(values.len());
for name in names.iter() {
names_vec.push(encode_value(name, &self.dumps, self.opt_py.raw_mode)?);
}
for value in values.iter() {
values_vec.push(encode_value(value, &self.dumps, self.opt_py.raw_mode)?);
}
db.put_entity_cf_opt(&cf, key, &names_vec, &values_vec, write_opt)
.map_err(|e| PyException::new_err(e.to_string()))
}

fn __contains__(&self, key: &Bound<PyAny>) -> PyResult<bool> {
let db = self.get_db()?;
let key = encode_key(key, self.opt_py.raw_mode)?;
Expand Down
Loading

0 comments on commit 88724dc

Please sign in to comment.