From 2acbded98073fe0c61055d2063a63a58184ebb8a Mon Sep 17 00:00:00 2001 From: Jonathan Johnson Date: Tue, 22 Feb 2022 10:30:31 -0800 Subject: [PATCH] Added support for prefix searches --- CHANGELOG.md | 10 + crates/bonsaidb-core/src/connection.rs | 67 +++++- crates/bonsaidb-core/src/key.rs | 205 +++++++++++++++++- crates/bonsaidb-core/src/schema/collection.rs | 38 +++- 4 files changed, 316 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index df4e9756552..b7a973fea03 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 `v0.2`. Backwards compatibility is now automatically tested to help ensure this sort of issue won't happen in the future again. +### Added + +- `SerializedCollection::list_with_prefix`, + `connection::Collection::list_with_prefix`, and + `connection::View::with_key_prefix` have been added as an easy way to filter + results based on whether the key starts with the given prefix. + + This is supported by a new trait, `IntoPrefixRange`. This trait has been + implemented for all byte-based key implementations as well as for `String`. + ## v0.2.0 ### Breaking Changes diff --git a/crates/bonsaidb-core/src/connection.rs b/crates/bonsaidb-core/src/connection.rs index 91a388ba353..567589e6ba5 100644 --- a/crates/bonsaidb-core/src/connection.rs +++ b/crates/bonsaidb-core/src/connection.rs @@ -13,7 +13,7 @@ use crate::{ document::{ AnyDocumentId, CollectionDocument, CollectionHeader, Document, HasHeader, OwnedDocument, }, - key::Key, + key::{IntoPrefixRange, Key}, permissions::Permissions, schema::{ self, @@ -706,6 +706,38 @@ where ) } + /// Retrieves all documents with ids that start with `prefix`. + /// + /// ```rust + /// use bonsaidb_core::{ + /// connection::Connection, + /// document::OwnedDocument, + /// schema::{Collection, Schematic, SerializedCollection}, + /// Error, + /// }; + /// use serde::{Deserialize, Serialize}; + /// + /// #[derive(Debug, Serialize, Deserialize, Default, Collection)] + /// #[collection(name = "MyCollection", primary_key = String)] + /// # #[collection(core = bonsaidb_core)] + /// pub struct MyCollection; + /// + /// async fn starts_with_a(db: &C) -> Result, Error> { + /// db.collection::() + /// .list_with_prefix(String::from("a")) + /// .await + /// } + /// ``` + pub fn list_with_prefix(&'a self, prefix: Cl::PrimaryKey) -> List<'a, Cn, Cl> + where + Cl::PrimaryKey: IntoPrefixRange, + { + List::new( + PossiblyOwned::Borrowed(self), + prefix.into_prefix_range().map(AnyDocumentId::Deserialized), + ) + } + /// Retrieves all documents. /// /// ```rust @@ -1019,6 +1051,39 @@ where self } + /// Filters for entries in the view with keys that begin with `prefix`. + /// + /// ```rust + /// # bonsaidb_core::__doctest_prelude!(); + /// # fn test_fn(db: C) -> Result<(), Error> { + /// # tokio::runtime::Runtime::new().unwrap().block_on(async { + /// #[derive(View, Debug, Clone)] + /// #[view(name = "by-name", key = String, collection = MyCollection)] + /// # #[view(core = bonsaidb_core)] + /// struct ByName; + /// + /// // score is an f32 in this example + /// for mapping in db + /// .view::() + /// .with_key_prefix(String::from("a")) + /// .query() + /// .await? + /// { + /// assert!(mapping.key.starts_with("a")); + /// println!("{} in document {:?}", mapping.key, mapping.source); + /// } + /// # Ok(()) + /// # }) + /// # } + /// ``` + pub fn with_key_prefix(mut self, prefix: V::Key) -> Self + where + V::Key: IntoPrefixRange, + { + self.key = Some(QueryKey::Range(prefix.into_prefix_range())); + self + } + /// Sets the access policy for queries. /// /// ```rust diff --git a/crates/bonsaidb-core/src/key.rs b/crates/bonsaidb-core/src/key.rs index 1f45aedb0db..b2ac0a7b010 100644 --- a/crates/bonsaidb-core/src/key.rs +++ b/crates/bonsaidb-core/src/key.rs @@ -10,7 +10,7 @@ use num_traits::{FromPrimitive, ToPrimitive}; use ordered_varint::{Signed, Unsigned, Variable}; use serde::{Deserialize, Serialize}; -use crate::AnyError; +use crate::{connection::Range, AnyError}; /// A trait that enables a type to convert itself into a `memcmp`-compatible /// sequence of bytes. @@ -56,6 +56,27 @@ pub enum NextValueError { WouldWrap, } +/// A type that can be used as a prefix range in range-based queries. +pub trait IntoPrefixRange: Sized { + /// Returns the value as a prefix-range, which will match all values that + /// start with `self`. + fn into_prefix_range(self) -> Range; +} + +fn next_byte_sequence(start: &[u8]) -> Option> { + let mut end = start.to_vec(); + // Modify the last byte by adding one. If it would wrap, we proceed to the + // next byte. + while let Some(last_byte) = end.pop() { + if let Some(next) = last_byte.checked_add(1) { + end.push(next); + return Some(end); + } + } + + None +} + impl<'k> Key<'k> for Cow<'k, [u8]> { type Error = Infallible; @@ -70,6 +91,33 @@ impl<'k> Key<'k> for Cow<'k, [u8]> { } } +impl<'k> IntoPrefixRange for Cow<'k, [u8]> { + fn into_prefix_range(self) -> Range { + if let Some(next) = next_byte_sequence(&self) { + Range::from(self..Cow::Owned(next)) + } else { + Range::from(self..) + } + } +} + +#[test] +fn cow_prefix_range_tests() { + use std::ops::RangeBounds; + assert!(Cow::<'_, [u8]>::Borrowed(b"a") + .into_prefix_range() + .contains(&Cow::Borrowed(b"aa"))); + assert!(!Cow::<'_, [u8]>::Borrowed(b"a") + .into_prefix_range() + .contains(&Cow::Borrowed(b"b"))); + assert!(Cow::<'_, [u8]>::Borrowed(b"\xff") + .into_prefix_range() + .contains(&Cow::Borrowed(b"\xff\xff"))); + assert!(!Cow::<'_, [u8]>::Borrowed(b"\xff") + .into_prefix_range() + .contains(&Cow::Borrowed(b"\xfe"))); +} + impl<'a> Key<'a> for Vec { type Error = Infallible; @@ -84,6 +132,31 @@ impl<'a> Key<'a> for Vec { } } +impl<'k> IntoPrefixRange for Vec { + fn into_prefix_range(self) -> Range { + if let Some(next) = next_byte_sequence(&self) { + Range::from(self..next) + } else { + Range::from(self..) + } + } +} + +#[test] +fn vec_prefix_range_tests() { + use std::ops::RangeBounds; + assert!(b"a".to_vec().into_prefix_range().contains(&b"aa".to_vec())); + assert!(!b"a".to_vec().into_prefix_range().contains(&b"b".to_vec())); + assert!(b"\xff" + .to_vec() + .into_prefix_range() + .contains(&b"\xff\xff".to_vec())); + assert!(!b"\xff" + .to_vec() + .into_prefix_range() + .contains(&b"\xfe".to_vec())); +} + impl<'a> Key<'a> for ArcBytes<'a> { type Error = Infallible; @@ -98,6 +171,33 @@ impl<'a> Key<'a> for ArcBytes<'a> { } } +impl<'k> IntoPrefixRange for ArcBytes<'k> { + fn into_prefix_range(self) -> Range { + if let Some(next) = next_byte_sequence(&self) { + Range::from(self..Self::owned(next)) + } else { + Range::from(self..) + } + } +} + +#[test] +fn arcbytes_prefix_range_tests() { + use std::ops::RangeBounds; + assert!(ArcBytes::from(b"a") + .into_prefix_range() + .contains(&ArcBytes::from(b"aa"))); + assert!(!ArcBytes::from(b"a") + .into_prefix_range() + .contains(&ArcBytes::from(b"b"))); + assert!(ArcBytes::from(b"\xff") + .into_prefix_range() + .contains(&ArcBytes::from(b"\xff\xff"))); + assert!(!ArcBytes::from(b"\xff") + .into_prefix_range() + .contains(&ArcBytes::from(b"\xfe"))); +} + impl<'a> Key<'a> for CowBytes<'a> { type Error = Infallible; @@ -112,6 +212,33 @@ impl<'a> Key<'a> for CowBytes<'a> { } } +impl<'k> IntoPrefixRange for CowBytes<'k> { + fn into_prefix_range(self) -> Range { + if let Some(next) = next_byte_sequence(&self) { + Range::from(self..Self::from(next)) + } else { + Range::from(self..) + } + } +} + +#[test] +fn cowbytes_prefix_range_tests() { + use std::ops::RangeBounds; + assert!(CowBytes::from(&b"a"[..]) + .into_prefix_range() + .contains(&CowBytes::from(&b"aa"[..]))); + assert!(!CowBytes::from(&b"a"[..]) + .into_prefix_range() + .contains(&CowBytes::from(&b"b"[..]))); + assert!(CowBytes::from(&b"\xff"[..]) + .into_prefix_range() + .contains(&CowBytes::from(&b"\xff\xff"[..]))); + assert!(!CowBytes::from(&b"\xff"[..]) + .into_prefix_range() + .contains(&CowBytes::from(&b"\xfe"[..]))); +} + impl<'a> Key<'a> for Bytes { type Error = Infallible; @@ -126,6 +253,33 @@ impl<'a> Key<'a> for Bytes { } } +impl IntoPrefixRange for Bytes { + fn into_prefix_range(self) -> Range { + if let Some(next) = next_byte_sequence(&self) { + Range::from(self..Self::from(next)) + } else { + Range::from(self..) + } + } +} + +#[test] +fn bytes_prefix_range_tests() { + use std::ops::RangeBounds; + assert!(Bytes::from(b"a".to_vec()) + .into_prefix_range() + .contains(&Bytes::from(b"aa".to_vec()))); + assert!(!Bytes::from(b"a".to_vec()) + .into_prefix_range() + .contains(&Bytes::from(b"b".to_vec()))); + assert!(Bytes::from(b"\xff".to_vec()) + .into_prefix_range() + .contains(&Bytes::from(b"\xff\xff".to_vec()))); + assert!(!Bytes::from(b"\xff".to_vec()) + .into_prefix_range() + .contains(&Bytes::from(b"\xfe".to_vec()))); +} + impl<'a> Key<'a> for String { type Error = FromUtf8Error; @@ -140,6 +294,55 @@ impl<'a> Key<'a> for String { } } +impl IntoPrefixRange for String { + fn into_prefix_range(self) -> Range { + let mut bytes = self.as_bytes().to_vec(); + for (index, char) in self.char_indices().rev() { + let mut next_char = u32::from(char) + 1; + if next_char == 0xd800 { + next_char = 0xE000; + } else if next_char > u32::from(char::MAX) { + continue; + } + + let mut char_bytes = [0; 6]; + bytes.splice( + index.., + char::try_from(next_char) + .unwrap() + .encode_utf8(&mut char_bytes) + .bytes(), + ); + return Range::from(self..Self::from_utf8(bytes).unwrap()); + } + + Range::from(self..) + } +} + +#[test] +fn string_prefix_range_tests() { + use std::ops::RangeBounds; + assert!(String::from("a") + .into_prefix_range() + .contains(&String::from("aa"))); + assert!(!String::from("a") + .into_prefix_range() + .contains(&String::from("b"))); + assert!(String::from("\u{d799}") + .into_prefix_range() + .contains(&String::from("\u{d799}a"))); + assert!(!String::from("\u{d799}") + .into_prefix_range() + .contains(&String::from("\u{e000}"))); + assert!(String::from("\u{10ffff}") + .into_prefix_range() + .contains(&String::from("\u{10ffff}a"))); + assert!(!String::from("\u{10ffff}") + .into_prefix_range() + .contains(&String::from("\u{10fffe}"))); +} + impl<'a> Key<'a> for () { type Error = Infallible; diff --git a/crates/bonsaidb-core/src/schema/collection.rs b/crates/bonsaidb-core/src/schema/collection.rs index a6390a5512e..6f63eaf2f67 100644 --- a/crates/bonsaidb-core/src/schema/collection.rs +++ b/crates/bonsaidb-core/src/schema/collection.rs @@ -12,7 +12,7 @@ use crate::{ AnyDocumentId, BorrowedDocument, CollectionDocument, Document, DocumentId, KeyId, OwnedDocument, OwnedDocuments, }, - key::Key, + key::{IntoPrefixRange, Key}, schema::{CollectionName, Schematic}, Error, }; @@ -234,7 +234,7 @@ where /// /// ```rust /// use bonsaidb_core::{ -/// schema::{Collection, CollectionName, DefaultSerialization, Schematic}, +/// schema::{Collection, DefaultSerialization, Schematic}, /// Error, /// }; /// use serde::{Deserialize, Serialize}; @@ -395,6 +395,40 @@ pub trait SerializedCollection: Collection { )) } + /// Retrieves all documents with ids that start with `prefix`. + /// + /// ```rust + /// use bonsaidb_core::{ + /// connection::Connection, + /// document::CollectionDocument, + /// schema::{Collection, Schematic, SerializedCollection}, + /// Error, + /// }; + /// use serde::{Deserialize, Serialize}; + /// + /// #[derive(Debug, Serialize, Deserialize, Default, Collection)] + /// #[collection(name = "MyCollection", primary_key = String)] + /// # #[collection(core = bonsaidb_core)] + /// pub struct MyCollection; + /// + /// async fn starts_with_a( + /// db: &C, + /// ) -> Result>, Error> { + /// MyCollection::list_with_prefix(String::from("a"), db).await + /// } + /// ``` + fn list_with_prefix(prefix: Self::PrimaryKey, connection: &'_ C) -> List<'_, C, Self> + where + C: Connection, + Self: Sized, + Self::PrimaryKey: IntoPrefixRange, + { + List(connection::List::new( + connection::PossiblyOwned::Owned(connection.collection::()), + prefix.into_prefix_range().map(AnyDocumentId::Deserialized), + )) + } + /// Retrieves all documents. /// /// ```rust