-
Notifications
You must be signed in to change notification settings - Fork 236
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(rust): introduce catalog interface for rust module
- Loading branch information
Showing
6 changed files
with
375 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// SPDX-FileCopyrightText: Copyright The Lance Authors | ||
|
||
pub(crate) mod catalog_trait; | ||
pub(crate) mod dataset_identifier; | ||
pub(crate) mod namespace; | ||
|
||
pub use catalog_trait::Catalog; | ||
pub use dataset_identifier::DatasetIdentifier; | ||
pub use namespace::Namespace; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// SPDX-FileCopyrightText: Copyright The Lance Authors | ||
|
||
use crate::catalog::dataset_identifier::DatasetIdentifier; | ||
use crate::catalog::namespace::Namespace; | ||
use crate::dataset::Dataset; | ||
use std::collections::HashMap; | ||
|
||
pub trait Catalog { | ||
/// List all datasets under a specified namespace. | ||
fn list_datasets(&self, namespace: &Namespace) -> Vec<DatasetIdentifier>; | ||
|
||
/// Create a new dataset in the catalog. | ||
fn create_dataset( | ||
&self, | ||
identifier: &DatasetIdentifier, | ||
location: &str, | ||
) -> Result<Dataset, String>; | ||
|
||
/// Check if a dataset exists in the catalog. | ||
fn dataset_exists(&self, identifier: &DatasetIdentifier) -> bool; | ||
|
||
/// Drop a dataset from the catalog. | ||
fn drop_dataset(&self, identifier: &DatasetIdentifier) -> Result<(), String>; | ||
|
||
/// Drop a dataset from the catalog and purge the metadata. | ||
fn drop_dataset_with_purge( | ||
&self, | ||
identifier: &DatasetIdentifier, | ||
purge: &bool, | ||
) -> Result<(), String>; | ||
|
||
/// Rename a dataset in the catalog. | ||
fn rename_dataset( | ||
&self, | ||
from: &DatasetIdentifier, | ||
to: &DatasetIdentifier, | ||
) -> Result<(), String>; | ||
|
||
/// Load a dataset from the catalog. | ||
fn load_dataset(&self, name: &DatasetIdentifier) -> Result<Dataset, String>; | ||
|
||
/// Invalidate cached table metadata from current catalog. | ||
fn invalidate_dataset(&self, identifier: &DatasetIdentifier) -> Result<(), String>; | ||
|
||
/// Register a dataset in the catalog. | ||
fn register_dataset(&self, identifier: &DatasetIdentifier) -> Result<Dataset, String>; | ||
|
||
/// Initialize the catalog. | ||
fn initialize(&self, name: &str, properties: &HashMap<&str, &str>) -> Result<(), String>; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// SPDX-FileCopyrightText: Copyright The Lance Authors | ||
|
||
use crate::catalog::namespace::Namespace; | ||
use std::fmt; | ||
use std::hash::{Hash, Hasher}; | ||
|
||
#[derive(Clone, Debug)] | ||
pub struct DatasetIdentifier { | ||
namespace: Namespace, | ||
name: String, | ||
} | ||
|
||
impl DatasetIdentifier { | ||
pub fn of(names: &[&str]) -> Self { | ||
assert!( | ||
!names.is_empty(), | ||
"Cannot create dataset identifier without a dataset name" | ||
); | ||
let namespace = Namespace::of(&names[..names.len() - 1]); | ||
let name = names[names.len() - 1].to_string(); | ||
Self { namespace, name } | ||
} | ||
|
||
pub fn of_namespace(namespace: Namespace, name: &str) -> Self { | ||
assert!(!name.is_empty(), "Invalid dataset name: null or empty"); | ||
Self { | ||
namespace, | ||
name: name.to_string(), | ||
} | ||
} | ||
|
||
pub fn parse(identifier: &str) -> Self { | ||
let parts: Vec<&str> = identifier.split('.').collect(); | ||
Self::of(&parts) | ||
} | ||
|
||
pub fn has_namespace(&self) -> bool { | ||
!self.namespace.is_empty() | ||
} | ||
|
||
pub fn namespace(&self) -> &Namespace { | ||
&self.namespace | ||
} | ||
|
||
pub fn name(&self) -> &str { | ||
&self.name | ||
} | ||
|
||
pub fn to_lowercase(&self) -> Self { | ||
let new_levels: Vec<String> = self | ||
.namespace | ||
.levels() | ||
.iter() | ||
.map(|s| s.to_lowercase()) | ||
.collect(); | ||
let new_name = self.name.to_lowercase(); | ||
Self::of_namespace( | ||
Namespace::of(&new_levels.iter().map(String::as_str).collect::<Vec<&str>>()), | ||
&new_name, | ||
) | ||
} | ||
} | ||
|
||
impl PartialEq for DatasetIdentifier { | ||
fn eq(&self, other: &Self) -> bool { | ||
self.namespace == other.namespace && self.name == other.name | ||
} | ||
} | ||
|
||
impl Eq for DatasetIdentifier {} | ||
|
||
impl Hash for DatasetIdentifier { | ||
fn hash<H: Hasher>(&self, state: &mut H) { | ||
self.namespace.hash(state); | ||
self.name.hash(state); | ||
} | ||
} | ||
|
||
impl fmt::Display for DatasetIdentifier { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
if self.has_namespace() { | ||
write!(f, "{}.{}", self.namespace, self.name) | ||
} else { | ||
write!(f, "{}", self.name) | ||
} | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
use std::hash::DefaultHasher; | ||
|
||
#[test] | ||
fn test_dataset_identifier_of() { | ||
let ds_id = DatasetIdentifier::of(&["namespace1", "namespace2", "dataset"]); | ||
assert_eq!( | ||
ds_id.namespace().levels(), | ||
&vec!["namespace1".to_string(), "namespace2".to_string()] | ||
); | ||
assert_eq!(ds_id.name(), "dataset"); | ||
} | ||
|
||
#[test] | ||
fn test_dataset_identifier_of_namespace() { | ||
let namespace = Namespace::of(&["namespace1", "namespace2"]); | ||
let ds_id = DatasetIdentifier::of_namespace(namespace.clone(), "dataset"); | ||
assert_eq!(ds_id.namespace(), &namespace); | ||
assert_eq!(ds_id.name(), "dataset"); | ||
} | ||
|
||
#[test] | ||
fn test_dataset_identifier_parse() { | ||
let ds_id = DatasetIdentifier::parse("namespace1.namespace2.dataset"); | ||
assert_eq!( | ||
ds_id.namespace().levels(), | ||
&vec!["namespace1".to_string(), "namespace2".to_string()] | ||
); | ||
assert_eq!(ds_id.name(), "dataset"); | ||
} | ||
|
||
#[test] | ||
fn test_dataset_identifier_has_namespace() { | ||
let ds_id = DatasetIdentifier::parse("namespace1.namespace2.dataset"); | ||
assert!(ds_id.has_namespace()); | ||
|
||
let ds_id_no_ns = DatasetIdentifier::of(&["dataset"]); | ||
assert!(!ds_id_no_ns.has_namespace()); | ||
} | ||
|
||
#[test] | ||
fn test_dataset_identifier_to_lowercase() { | ||
let ds_id = DatasetIdentifier::parse("Namespace1.Namespace2.Dataset"); | ||
let lower_ds_id = ds_id.to_lowercase(); | ||
assert_eq!( | ||
lower_ds_id.namespace().levels(), | ||
&vec!["namespace1".to_string(), "namespace2".to_string()] | ||
); | ||
assert_eq!(lower_ds_id.name(), "dataset"); | ||
} | ||
|
||
#[test] | ||
fn test_dataset_identifier_equality() { | ||
let ds_id1 = DatasetIdentifier::parse("namespace1.namespace2.dataset"); | ||
let ds_id2 = DatasetIdentifier::parse("namespace1.namespace2.dataset"); | ||
let ds_id3 = DatasetIdentifier::parse("namespace1.namespace2.other_dataset"); | ||
assert_eq!(ds_id1, ds_id2); | ||
assert_ne!(ds_id1, ds_id3); | ||
} | ||
|
||
#[test] | ||
fn test_dataset_identifier_hash() { | ||
let ds_id1 = DatasetIdentifier::parse("namespace1.namespace2.dataset"); | ||
let ds_id2 = DatasetIdentifier::parse("namespace1.namespace2.dataset"); | ||
let mut hasher1 = DefaultHasher::new(); | ||
ds_id1.hash(&mut hasher1); | ||
let mut hasher2 = DefaultHasher::new(); | ||
ds_id2.hash(&mut hasher2); | ||
assert_eq!(hasher1.finish(), hasher2.finish()); | ||
} | ||
|
||
#[test] | ||
fn test_dataset_identifier_display() { | ||
let ds_id = DatasetIdentifier::parse("namespace1.namespace2.dataset"); | ||
assert_eq!(format!("{}", ds_id), "namespace1.namespace2.dataset"); | ||
|
||
let ds_id_no_ns = DatasetIdentifier::of(&["dataset"]); | ||
assert_eq!(format!("{}", ds_id_no_ns), "dataset"); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// SPDX-FileCopyrightText: Copyright The Lance Authors | ||
|
||
use std::fmt; | ||
use std::hash::{Hash, Hasher}; | ||
|
||
#[derive(Clone)] | ||
pub struct Namespace { | ||
levels: Vec<String>, | ||
} | ||
|
||
impl Namespace { | ||
pub fn empty() -> Self { | ||
Self { levels: Vec::new() } | ||
} | ||
|
||
pub fn of(levels: &[&str]) -> Self { | ||
assert!( | ||
levels.iter().all(|&level| level != "\0"), | ||
"Cannot create a namespace with the null-byte character" | ||
); | ||
Self { | ||
levels: levels.iter().map(|&s| s.to_string()).collect(), | ||
} | ||
} | ||
|
||
pub fn levels(&self) -> &[String] { | ||
&self.levels | ||
} | ||
|
||
pub fn level(&self, pos: usize) -> &str { | ||
&self.levels[pos] | ||
} | ||
|
||
pub fn is_empty(&self) -> bool { | ||
self.levels.is_empty() | ||
} | ||
|
||
pub fn length(&self) -> usize { | ||
self.levels.len() | ||
} | ||
} | ||
|
||
impl PartialEq for Namespace { | ||
fn eq(&self, other: &Self) -> bool { | ||
self.levels == other.levels | ||
} | ||
} | ||
|
||
impl Eq for Namespace {} | ||
|
||
impl Hash for Namespace { | ||
fn hash<H: Hasher>(&self, state: &mut H) { | ||
self.levels.hash(state); | ||
} | ||
} | ||
|
||
impl fmt::Display for Namespace { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
write!(f, "{}", self.levels.join(".")) | ||
} | ||
} | ||
|
||
impl fmt::Debug for Namespace { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
f.debug_struct("Namespace") | ||
.field("levels", &self.levels) | ||
.finish() | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
use std::hash::DefaultHasher; | ||
|
||
#[test] | ||
fn test_empty_namespace() { | ||
let ns = Namespace::empty(); | ||
assert!(ns.is_empty()); | ||
assert_eq!(ns.length(), 0); | ||
assert_eq!(ns.levels().len(), 0); | ||
} | ||
|
||
#[test] | ||
fn test_namespace_of() { | ||
let ns = Namespace::of(&["level1", "level2"]); | ||
assert!(!ns.is_empty()); | ||
assert_eq!(ns.length(), 2); | ||
assert_eq!(ns.level(0), "level1"); | ||
assert_eq!(ns.level(1), "level2"); | ||
} | ||
|
||
#[test] | ||
#[should_panic(expected = "Cannot create a namespace with the null-byte character")] | ||
fn test_namespace_of_with_null_byte() { | ||
Namespace::of(&["level1", "\0"]); | ||
} | ||
|
||
#[test] | ||
fn test_namespace_levels() { | ||
let ns = Namespace::of(&["level1", "level2"]); | ||
let levels = ns.levels(); | ||
assert_eq!(levels, &vec!["level1".to_string(), "level2".to_string()]); | ||
} | ||
|
||
#[test] | ||
fn test_namespace_equality() { | ||
let ns1 = Namespace::of(&["level1", "level2"]); | ||
let ns2 = Namespace::of(&["level1", "level2"]); | ||
let ns3 = Namespace::of(&["level1", "level3"]); | ||
assert_eq!(ns1, ns2); | ||
assert_ne!(ns1, ns3); | ||
} | ||
|
||
#[test] | ||
fn test_namespace_hash() { | ||
let ns1 = Namespace::of(&["level1", "level2"]); | ||
let ns2 = Namespace::of(&["level1", "level2"]); | ||
let mut hasher1 = DefaultHasher::new(); | ||
ns1.hash(&mut hasher1); | ||
let mut hasher2 = DefaultHasher::new(); | ||
ns2.hash(&mut hasher2); | ||
assert_eq!(hasher1.finish(), hasher2.finish()); | ||
} | ||
|
||
#[test] | ||
fn test_namespace_display() { | ||
let ns = Namespace::of(&["level1", "level2"]); | ||
assert_eq!(format!("{}", ns), "level1.level2"); | ||
} | ||
|
||
#[test] | ||
fn test_namespace_debug() { | ||
let ns = Namespace::of(&["level1", "level2"]); | ||
assert_eq!( | ||
format!("{:?}", ns), | ||
"Namespace { levels: [\"level1\", \"level2\"] }" | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters