Skip to content

Commit

Permalink
Added support for prefix searches
Browse files Browse the repository at this point in the history
  • Loading branch information
ecton committed Feb 22, 2022
1 parent 99c9b61 commit 2acbded
Show file tree
Hide file tree
Showing 4 changed files with 316 additions and 4 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
`v0.2`. Backwards compatibility is now automatically tested to help ensure
this sort of issue won't happen in the future again.

### Added

- `SerializedCollection::list_with_prefix`,
`connection::Collection::list_with_prefix`, and
`connection::View::with_key_prefix` have been added as an easy way to filter
results based on whether the key starts with the given prefix.

This is supported by a new trait, `IntoPrefixRange`. This trait has been
implemented for all byte-based key implementations as well as for `String`.

## v0.2.0

### Breaking Changes
Expand Down
67 changes: 66 additions & 1 deletion crates/bonsaidb-core/src/connection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::{
document::{
AnyDocumentId, CollectionDocument, CollectionHeader, Document, HasHeader, OwnedDocument,
},
key::Key,
key::{IntoPrefixRange, Key},
permissions::Permissions,
schema::{
self,
Expand Down Expand Up @@ -706,6 +706,38 @@ where
)
}

/// Retrieves all documents with ids that start with `prefix`.
///
/// ```rust
/// use bonsaidb_core::{
/// connection::Connection,
/// document::OwnedDocument,
/// schema::{Collection, Schematic, SerializedCollection},
/// Error,
/// };
/// use serde::{Deserialize, Serialize};
///
/// #[derive(Debug, Serialize, Deserialize, Default, Collection)]
/// #[collection(name = "MyCollection", primary_key = String)]
/// # #[collection(core = bonsaidb_core)]
/// pub struct MyCollection;
///
/// async fn starts_with_a<C: Connection>(db: &C) -> Result<Vec<OwnedDocument>, Error> {
/// db.collection::<MyCollection>()
/// .list_with_prefix(String::from("a"))
/// .await
/// }
/// ```
pub fn list_with_prefix(&'a self, prefix: Cl::PrimaryKey) -> List<'a, Cn, Cl>
where
Cl::PrimaryKey: IntoPrefixRange,
{
List::new(
PossiblyOwned::Borrowed(self),
prefix.into_prefix_range().map(AnyDocumentId::Deserialized),
)
}

/// Retrieves all documents.
///
/// ```rust
Expand Down Expand Up @@ -1019,6 +1051,39 @@ where
self
}

/// Filters for entries in the view with keys that begin with `prefix`.
///
/// ```rust
/// # bonsaidb_core::__doctest_prelude!();
/// # fn test_fn<C: Connection>(db: C) -> Result<(), Error> {
/// # tokio::runtime::Runtime::new().unwrap().block_on(async {
/// #[derive(View, Debug, Clone)]
/// #[view(name = "by-name", key = String, collection = MyCollection)]
/// # #[view(core = bonsaidb_core)]
/// struct ByName;
///
/// // score is an f32 in this example
/// for mapping in db
/// .view::<ByName>()
/// .with_key_prefix(String::from("a"))
/// .query()
/// .await?
/// {
/// assert!(mapping.key.starts_with("a"));
/// println!("{} in document {:?}", mapping.key, mapping.source);
/// }
/// # Ok(())
/// # })
/// # }
/// ```
pub fn with_key_prefix(mut self, prefix: V::Key) -> Self
where
V::Key: IntoPrefixRange,
{
self.key = Some(QueryKey::Range(prefix.into_prefix_range()));
self
}

/// Sets the access policy for queries.
///
/// ```rust
Expand Down
205 changes: 204 additions & 1 deletion crates/bonsaidb-core/src/key.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use num_traits::{FromPrimitive, ToPrimitive};
use ordered_varint::{Signed, Unsigned, Variable};
use serde::{Deserialize, Serialize};

use crate::AnyError;
use crate::{connection::Range, AnyError};

/// A trait that enables a type to convert itself into a `memcmp`-compatible
/// sequence of bytes.
Expand Down Expand Up @@ -56,6 +56,27 @@ pub enum NextValueError {
WouldWrap,
}

/// A type that can be used as a prefix range in range-based queries.
pub trait IntoPrefixRange: Sized {
/// Returns the value as a prefix-range, which will match all values that
/// start with `self`.
fn into_prefix_range(self) -> Range<Self>;
}

fn next_byte_sequence(start: &[u8]) -> Option<Vec<u8>> {
let mut end = start.to_vec();
// Modify the last byte by adding one. If it would wrap, we proceed to the
// next byte.
while let Some(last_byte) = end.pop() {
if let Some(next) = last_byte.checked_add(1) {
end.push(next);
return Some(end);
}
}

None
}

impl<'k> Key<'k> for Cow<'k, [u8]> {
type Error = Infallible;

Expand All @@ -70,6 +91,33 @@ impl<'k> Key<'k> for Cow<'k, [u8]> {
}
}

impl<'k> IntoPrefixRange for Cow<'k, [u8]> {
fn into_prefix_range(self) -> Range<Self> {
if let Some(next) = next_byte_sequence(&self) {
Range::from(self..Cow::Owned(next))
} else {
Range::from(self..)
}
}
}

#[test]
fn cow_prefix_range_tests() {
use std::ops::RangeBounds;
assert!(Cow::<'_, [u8]>::Borrowed(b"a")
.into_prefix_range()
.contains(&Cow::Borrowed(b"aa")));
assert!(!Cow::<'_, [u8]>::Borrowed(b"a")
.into_prefix_range()
.contains(&Cow::Borrowed(b"b")));
assert!(Cow::<'_, [u8]>::Borrowed(b"\xff")
.into_prefix_range()
.contains(&Cow::Borrowed(b"\xff\xff")));
assert!(!Cow::<'_, [u8]>::Borrowed(b"\xff")
.into_prefix_range()
.contains(&Cow::Borrowed(b"\xfe")));
}

impl<'a> Key<'a> for Vec<u8> {
type Error = Infallible;

Expand All @@ -84,6 +132,31 @@ impl<'a> Key<'a> for Vec<u8> {
}
}

impl<'k> IntoPrefixRange for Vec<u8> {
fn into_prefix_range(self) -> Range<Self> {
if let Some(next) = next_byte_sequence(&self) {
Range::from(self..next)
} else {
Range::from(self..)
}
}
}

#[test]
fn vec_prefix_range_tests() {
use std::ops::RangeBounds;
assert!(b"a".to_vec().into_prefix_range().contains(&b"aa".to_vec()));
assert!(!b"a".to_vec().into_prefix_range().contains(&b"b".to_vec()));
assert!(b"\xff"
.to_vec()
.into_prefix_range()
.contains(&b"\xff\xff".to_vec()));
assert!(!b"\xff"
.to_vec()
.into_prefix_range()
.contains(&b"\xfe".to_vec()));
}

impl<'a> Key<'a> for ArcBytes<'a> {
type Error = Infallible;

Expand All @@ -98,6 +171,33 @@ impl<'a> Key<'a> for ArcBytes<'a> {
}
}

impl<'k> IntoPrefixRange for ArcBytes<'k> {
fn into_prefix_range(self) -> Range<Self> {
if let Some(next) = next_byte_sequence(&self) {
Range::from(self..Self::owned(next))
} else {
Range::from(self..)
}
}
}

#[test]
fn arcbytes_prefix_range_tests() {
use std::ops::RangeBounds;
assert!(ArcBytes::from(b"a")
.into_prefix_range()
.contains(&ArcBytes::from(b"aa")));
assert!(!ArcBytes::from(b"a")
.into_prefix_range()
.contains(&ArcBytes::from(b"b")));
assert!(ArcBytes::from(b"\xff")
.into_prefix_range()
.contains(&ArcBytes::from(b"\xff\xff")));
assert!(!ArcBytes::from(b"\xff")
.into_prefix_range()
.contains(&ArcBytes::from(b"\xfe")));
}

impl<'a> Key<'a> for CowBytes<'a> {
type Error = Infallible;

Expand All @@ -112,6 +212,33 @@ impl<'a> Key<'a> for CowBytes<'a> {
}
}

impl<'k> IntoPrefixRange for CowBytes<'k> {
fn into_prefix_range(self) -> Range<Self> {
if let Some(next) = next_byte_sequence(&self) {
Range::from(self..Self::from(next))
} else {
Range::from(self..)
}
}
}

#[test]
fn cowbytes_prefix_range_tests() {
use std::ops::RangeBounds;
assert!(CowBytes::from(&b"a"[..])
.into_prefix_range()
.contains(&CowBytes::from(&b"aa"[..])));
assert!(!CowBytes::from(&b"a"[..])
.into_prefix_range()
.contains(&CowBytes::from(&b"b"[..])));
assert!(CowBytes::from(&b"\xff"[..])
.into_prefix_range()
.contains(&CowBytes::from(&b"\xff\xff"[..])));
assert!(!CowBytes::from(&b"\xff"[..])
.into_prefix_range()
.contains(&CowBytes::from(&b"\xfe"[..])));
}

impl<'a> Key<'a> for Bytes {
type Error = Infallible;

Expand All @@ -126,6 +253,33 @@ impl<'a> Key<'a> for Bytes {
}
}

impl IntoPrefixRange for Bytes {
fn into_prefix_range(self) -> Range<Self> {
if let Some(next) = next_byte_sequence(&self) {
Range::from(self..Self::from(next))
} else {
Range::from(self..)
}
}
}

#[test]
fn bytes_prefix_range_tests() {
use std::ops::RangeBounds;
assert!(Bytes::from(b"a".to_vec())
.into_prefix_range()
.contains(&Bytes::from(b"aa".to_vec())));
assert!(!Bytes::from(b"a".to_vec())
.into_prefix_range()
.contains(&Bytes::from(b"b".to_vec())));
assert!(Bytes::from(b"\xff".to_vec())
.into_prefix_range()
.contains(&Bytes::from(b"\xff\xff".to_vec())));
assert!(!Bytes::from(b"\xff".to_vec())
.into_prefix_range()
.contains(&Bytes::from(b"\xfe".to_vec())));
}

impl<'a> Key<'a> for String {
type Error = FromUtf8Error;

Expand All @@ -140,6 +294,55 @@ impl<'a> Key<'a> for String {
}
}

impl IntoPrefixRange for String {
fn into_prefix_range(self) -> Range<Self> {
let mut bytes = self.as_bytes().to_vec();
for (index, char) in self.char_indices().rev() {
let mut next_char = u32::from(char) + 1;
if next_char == 0xd800 {
next_char = 0xE000;
} else if next_char > u32::from(char::MAX) {
continue;
}

let mut char_bytes = [0; 6];
bytes.splice(
index..,
char::try_from(next_char)
.unwrap()
.encode_utf8(&mut char_bytes)
.bytes(),
);
return Range::from(self..Self::from_utf8(bytes).unwrap());
}

Range::from(self..)
}
}

#[test]
fn string_prefix_range_tests() {
use std::ops::RangeBounds;
assert!(String::from("a")
.into_prefix_range()
.contains(&String::from("aa")));
assert!(!String::from("a")
.into_prefix_range()
.contains(&String::from("b")));
assert!(String::from("\u{d799}")
.into_prefix_range()
.contains(&String::from("\u{d799}a")));
assert!(!String::from("\u{d799}")
.into_prefix_range()
.contains(&String::from("\u{e000}")));
assert!(String::from("\u{10ffff}")
.into_prefix_range()
.contains(&String::from("\u{10ffff}a")));
assert!(!String::from("\u{10ffff}")
.into_prefix_range()
.contains(&String::from("\u{10fffe}")));
}

impl<'a> Key<'a> for () {
type Error = Infallible;

Expand Down
Loading

0 comments on commit 2acbded

Please sign in to comment.