From ca08cc25bcb2ff386f4965f20c93a120c634e014 Mon Sep 17 00:00:00 2001 From: Joe Hellerstein Date: Fri, 20 Sep 2024 17:29:58 -0700 Subject: [PATCH] TupleSet storage now working. On to Column Storage! --- hydroflow/tests/surface_lattice_bimorphism.rs | 23 +++- .../surface_lattice_generalized_hash_trie.rs | 7 +- lattices/src/ght.rs | 55 ++++---- lattices/src/ght_lattice.rs | 47 ++++--- lattices/src/ght_lazy.rs | 104 ++++++++------- lattices/src/ght_test.rs | 55 ++++---- variadics/src/hash_set.rs | 120 +++++++++++++++++- 7 files changed, 289 insertions(+), 122 deletions(-) diff --git a/hydroflow/tests/surface_lattice_bimorphism.rs b/hydroflow/tests/surface_lattice_bimorphism.rs index a8316eb57a90..0887b4917d2d 100644 --- a/hydroflow/tests/surface_lattice_bimorphism.rs +++ b/hydroflow/tests/surface_lattice_bimorphism.rs @@ -2,12 +2,13 @@ use std::collections::{HashMap, HashSet}; use hydroflow::util::collect_ready; use hydroflow::{assert_graphvis_snapshots, hydroflow_syntax}; -use lattices::ght::GeneralizedHashTrieNode; +use lattices::ght::{GeneralizedHashTrieNode, GhtInner}; use lattices::ght_lattice::{DeepJoinLatticeBimorphism, GhtBimorphism}; use lattices::map_union::{KeyedBimorphism, MapUnionHashMap, MapUnionSingletonMap}; use lattices::set_union::{CartesianProductBimorphism, SetUnionHashSet, SetUnionSingletonSet}; use lattices::GhtType; use multiplatform_test::multiplatform_test; +use variadics::hash_set::VariadicHashSet; use variadics::{var_expr, CloneVariadic}; #[multiplatform_test] @@ -146,14 +147,28 @@ fn test_ght_join_bimorphism() { // type MyGhtATrie = ::Trie; // type MyGhtBTrie = ::Trie; type MyGhtATrie = GhtType!(u32, u64, u16 => &'static str); + let ght_a = MyGhtATrie::default(); type MyGhtBTrie = GhtType!(u32, u64, u16 => &'static str); type Output = variadics::var_type!(u32, u64, u16, &'static str, &'static str); - type MyNodeBim = - <(MyGhtATrie, MyGhtBTrie) as DeepJoinLatticeBimorphism>::DeepJoinLatticeBimorphism; + type MyNodeBim = <(MyGhtATrie, MyGhtBTrie) as DeepJoinLatticeBimorphism< + VariadicHashSet, + >>::DeepJoinLatticeBimorphism; + let me_node_bim = MyNodeBim::default(); type MyBim = GhtBimorphism; - // let me_bim = MyBim::default(); + let me_bim = MyBim::default(); + + let mut hf = hydroflow_syntax! { + lhs = source_iter_delta([ + var_expr!(123, 2, 5, "hello"), + var_expr!(50, 1, 1, "hi"), + var_expr!(5, 1, 7, "hi"), + var_expr!(5, 1, 7, "bye"), + ]) + -> map(|row| MyGhtATrie::new_from([row])) + -> state::<'tick, MyGhtATrie>(); + }; let mut hf = hydroflow_syntax! { lhs = source_iter_delta([ diff --git a/hydroflow/tests/surface_lattice_generalized_hash_trie.rs b/hydroflow/tests/surface_lattice_generalized_hash_trie.rs index ad21059d13c9..c03a28a44913 100644 --- a/hydroflow/tests/surface_lattice_generalized_hash_trie.rs +++ b/hydroflow/tests/surface_lattice_generalized_hash_trie.rs @@ -3,7 +3,8 @@ use hydroflow::lattices::ght::GeneralizedHashTrieNode; use hydroflow::lattices::ght_lattice::{DeepJoinLatticeBimorphism, GhtBimorphism}; use hydroflow::lattices::GhtType; use hydroflow::util::collect_ready; -use hydroflow::variadics::{var_expr, var_type}; // Import the Insert trait +use hydroflow::variadics::{var_expr, var_type}; +use variadics::hash_set::VariadicHashSet; // Import the Insert trait #[test] fn test_basic() { @@ -45,7 +46,9 @@ fn test_join() { ]; let s = vec![var_expr!(1, 10), var_expr!(5, 50)]; - type MyNodeBim = <(MyGht, MyGht) as DeepJoinLatticeBimorphism>::DeepJoinLatticeBimorphism; + type MyNodeBim = <(MyGht, MyGht) as DeepJoinLatticeBimorphism< + VariadicHashSet, + >>::DeepJoinLatticeBimorphism; type MyBim = GhtBimorphism; let mut df = hydroflow_syntax! { diff --git a/lattices/src/ght.rs b/lattices/src/ght.rs index c7c15cd1745e..9be32396de0c 100644 --- a/lattices/src/ght.rs +++ b/lattices/src/ght.rs @@ -4,6 +4,7 @@ use std::hash::Hash; use std::marker::PhantomData; use sealed::sealed; +use variadics::hash_set::VariadicHashSet; use variadics::{ var_args, var_type, PartialEqVariadic, RefVariadic, Split, SplitBySuffix, VariadicExt, }; @@ -23,7 +24,7 @@ pub trait GeneralizedHashTrieNode: Default { /// This type is the same in all nodes of the trie. type ValType: VariadicExt + Eq + Hash + Clone; /// The type that holds the data in the leaves - type Storage: TupleSet + Default; + type Storage: TupleSet + Default + IntoIterator; /// SuffixSchema variadic: the suffix of the schema *from this node of the trie /// downward*. The first entry in this variadic is of type Head. @@ -65,10 +66,10 @@ pub trait GeneralizedHashTrieNode: Default { // ) -> impl Iterator::AsRefVar<'_>>; /// Bimorphism for joining on full tuple keys (all GhtInner keys) in the trie - type DeepJoin + type DeepJoin where Other: GeneralizedHashTrieNode, - (Self, Other): DeepJoinLatticeBimorphism; + (Self, Other): DeepJoinLatticeBimorphism; // /// For Inner nodes only, this is the type of the Child node // type ChildNode: GeneralizedHashTrieNode; @@ -171,10 +172,10 @@ where // .flat_map(|(_k, vs)| vs.recursive_iter_keys().map(move |v| v)) // } - type DeepJoin = <(Self, Other) as DeepJoinLatticeBimorphism>::DeepJoinLatticeBimorphism + type DeepJoin = <(Self, Other) as DeepJoinLatticeBimorphism>::DeepJoinLatticeBimorphism where Other: GeneralizedHashTrieNode, - (Self, Other): DeepJoinLatticeBimorphism; + (Self, Other): DeepJoinLatticeBimorphism; fn find_containing_leaf( &self, @@ -231,28 +232,33 @@ pub trait TupleSet { fn len(&self) -> usize; /// Return true if empty - fn is_empty(&self) -> bool; + fn is_empty(&self) -> bool { + self.len() == 0 + } + + fn drain(&mut self) -> impl Iterator; /// Check for containment - fn contains(&self, value: &Self::Schema) -> bool; + fn contains(&self, value: ::AsRefVar<'_>) -> bool; } -impl TupleSet for HashSet +impl TupleSet for VariadicHashSet where Schema: 'static + Eq + Hash + PartialEqVariadic, + for<'a> ::AsRefVar<'a>: Hash, { type Schema = Schema; fn insert(&mut self, element: Self::Schema) -> bool { - HashSet::insert(self, element) + self.insert(element) } fn iter(&self) -> impl Iterator::AsRefVar<'_>> { - self.iter().map(Self::Schema::as_ref_var) + self.iter() } fn len(&self) -> usize { - HashSet::len(self) + self.len() } /// Return true if empty @@ -260,9 +266,12 @@ where self.len() == 0 } - fn contains(&self, value: &Self::Schema) -> bool { - let t = value; - HashSet::contains(self, &t) + fn drain(&mut self) -> impl Iterator { + self.drain() + } + + fn contains(&self, value: ::AsRefVar<'_>) -> bool { + self.get(value).is_some() } } @@ -308,7 +317,7 @@ where var_type!(ValHead, ...ValRest): Clone + Eq + Hash + PartialEqVariadic, >::Prefix: Eq + Hash + Clone, // for<'a> Schema::AsRefVar<'a>: PartialEq, - Storage: TupleSet + Default, + Storage: TupleSet + Default + IntoIterator, { type Schema = Schema; type SuffixSchema = var_type!(ValHead, ...ValRest); @@ -346,10 +355,10 @@ where self.elements.iter() // .map(Schema::as_ref_var) } - type DeepJoin = <(Self, Other) as DeepJoinLatticeBimorphism>::DeepJoinLatticeBimorphism + type DeepJoin = <(Self, Other) as DeepJoinLatticeBimorphism>::DeepJoinLatticeBimorphism where Other: GeneralizedHashTrieNode, - (Self, Other): DeepJoinLatticeBimorphism; + (Self, Other): DeepJoinLatticeBimorphism; fn find_containing_leaf( &self, @@ -378,7 +387,7 @@ where + Clone // + SplitBySuffix + PartialEqVariadic, - Storage: TupleSet + Default, + Storage: TupleSet + Default + IntoIterator, // ValHead: Clone + Eq + Hash, // var_type!(ValHead, ...ValRest): Clone + Eq + Hash + PartialEqVariadic, // >::Prefix: Eq + Hash + Clone, @@ -420,10 +429,10 @@ where self.elements.iter() //.map(Schema::as_ref_var) } - type DeepJoin = <(Self, Other) as DeepJoinLatticeBimorphism>::DeepJoinLatticeBimorphism + type DeepJoin = <(Self, Other) as DeepJoinLatticeBimorphism>::DeepJoinLatticeBimorphism where Other: GeneralizedHashTrieNode, - (Self, Other): DeepJoinLatticeBimorphism; + (Self, Other): DeepJoinLatticeBimorphism; fn find_containing_leaf( &self, @@ -776,17 +785,17 @@ macro_rules! GhtTypeWithSchema { // Empty key (Leaf) (() => $( $z:ty ),* => $schema:ty ) => ( - $crate::ght::GhtLeaf::<$schema, $crate::variadics::var_type!($( $z ),* ), HashSet<$schema> > + $crate::ght::GhtLeaf::<$schema, $crate::variadics::var_type!($( $z ),* ), $crate::variadics::hash_set::VariadicHashSet<$schema> > ); // Singleton key & Empty val (Inner over Leaf) ($a:ty => () => $schema:ty ) => ( - $crate::ght::GhtInner::<$a, $crate::ght::GhtLeaf::<$schema, (), HashSet<$schema> >> + $crate::ght::GhtInner::<$a, $crate::ght::GhtLeaf::<$schema, (), $crate::variadics::hash_set::VariadicHashSet<$schema> >> ); // Singleton key (Inner over Leaf) ($a:ty => $( $z:ty ),* => $schema:ty ) => ( - $crate::ght::GhtInner::<$a, $crate::ght::GhtLeaf::<$schema, $crate::variadics::var_type!($( $z ),*), HashSet<$schema> >> + $crate::ght::GhtInner::<$a, $crate::ght::GhtLeaf::<$schema, $crate::variadics::var_type!($( $z ),*), $crate::variadics::hash_set::VariadicHashSet<$schema> >> ); // Recursive case with empty val diff --git a/lattices/src/ght_lattice.rs b/lattices/src/ght_lattice.rs index 133a5b80659c..c3e145b4dc46 100644 --- a/lattices/src/ght_lattice.rs +++ b/lattices/src/ght_lattice.rs @@ -25,7 +25,7 @@ use crate::{IsBot, IsTop, LatticeBimorphism, LatticeOrd, Merge}; impl Merge> for GhtInner where - Node: GeneralizedHashTrieNode + Merge + Clone, + Node: GeneralizedHashTrieNode + Merge, Self: GeneralizedHashTrieNode, Head: Hash + Eq + Clone, { @@ -47,13 +47,13 @@ where } } -impl Merge> - for GhtLeaf +impl Merge> + for GhtLeaf where Schema: Eq + Hash, - Storage: TupleSet + Extend + Iterator, + Storage: TupleSet + Extend + IntoIterator, { - fn merge(&mut self, other: GhtLeaf) -> bool { + fn merge(&mut self, other: GhtLeaf) -> bool { let old_len = self.elements.len(); self.elements.extend(other.elements); self.elements.len() > old_len @@ -472,48 +472,47 @@ where } /// bimorphism trait for equijoin on full tuple (keys in all GhtInner nodes) -pub trait DeepJoinLatticeBimorphism { +pub trait DeepJoinLatticeBimorphism { /// bimorphism type for equijoin on full tuple (keys in all GhtInner nodes) type DeepJoinLatticeBimorphism; } /// bimorphism implementation for equijoin on full tuple (keys in all GhtInner nodes) -impl DeepJoinLatticeBimorphism +impl DeepJoinLatticeBimorphism for (GhtInner, GhtInner) where Head: 'static + Hash + Eq + Clone, NodeA: 'static + GeneralizedHashTrieNode, NodeB: 'static + GeneralizedHashTrieNode, - (NodeA, NodeB): DeepJoinLatticeBimorphism, + (NodeA, NodeB): DeepJoinLatticeBimorphism, + Storage: TupleSet, { type DeepJoinLatticeBimorphism = GhtNodeKeyedBimorphism< - <(NodeA, NodeB) as DeepJoinLatticeBimorphism>::DeepJoinLatticeBimorphism, + <(NodeA, NodeB) as DeepJoinLatticeBimorphism>::DeepJoinLatticeBimorphism, >; } -impl DeepJoinLatticeBimorphism +impl + DeepJoinLatticeBimorphism for ( - GhtLeaf, - GhtLeaf, + GhtLeaf, + GhtLeaf, ) where - SchemaA: 'static - + VariadicExt = SchemaA> - + Eq - + Hash - + SplitBySuffix, // + AsRefVariadicPartialEq - SuffixSchemaA: 'static + VariadicExt + Eq + Hash, // + AsRefVariadicPartialEq - SchemaB: 'static + VariadicExt + Eq + Hash + SplitBySuffix, /* + AsRefVariadicPartialEq */ - SuffixSchemaB: 'static + VariadicExt + Eq + Hash, // + AsRefVariadicPartialEq + SchemaA: 'static + VariadicExt + Eq + Hash + SplitBySuffix, /* + AsRefVariadicPartialEq */ + ValTypeA: 'static + VariadicExt + Eq + Hash, // + AsRefVariadicPartialEq + SchemaB: 'static + VariadicExt + Eq + Hash + SplitBySuffix, /* + AsRefVariadicPartialEq */ + ValTypeB: 'static + VariadicExt + Eq + Hash, // + AsRefVariadicPartialEq StorageA: TupleSet, StorageB: TupleSet, + StorageOut: TupleSet, for<'x> SchemaA::AsRefVar<'x>: CloneVariadic, for<'x> SchemaB::AsRefVar<'x>: CloneVariadic, - var_type!(...SchemaA, ...SuffixSchemaB): Eq + Hash, + var_type!(...SchemaA, ...ValTypeB): Eq + Hash, { type DeepJoinLatticeBimorphism = GhtValTypeProductBimorphism< GhtLeaf< - var_type!(...SchemaA, ...SuffixSchemaB), - var_type!(...SuffixSchemaA, ...SuffixSchemaB), - StorageA, + var_type!(...SchemaA, ...ValTypeB), + var_type!(...ValTypeA, ...ValTypeB), + StorageOut, >, >; } diff --git a/lattices/src/ght_lazy.rs b/lattices/src/ght_lazy.rs index 9cd6a113179c..4c60de02f1cc 100644 --- a/lattices/src/ght_lazy.rs +++ b/lattices/src/ght_lazy.rs @@ -4,7 +4,7 @@ use std::marker::PhantomData; use sealed::sealed; use variadics::{var_expr, var_type, PartialEqVariadic, Split, SplitBySuffix, VariadicExt}; -use crate::ght::{GeneralizedHashTrieNode, GhtGet, GhtInner, GhtLeaf, GhtTakeLeaf}; +use crate::ght::{GeneralizedHashTrieNode, GhtGet, GhtInner, GhtLeaf, GhtTakeLeaf, TupleSet}; // Remaining Questions // 1. Should the first element in the forest be a single GhtLeaf? @@ -69,7 +69,8 @@ where } #[sealed] -impl ColumnLazyTrieNode for GhtLeaf +impl ColumnLazyTrieNode + for GhtLeaf where Head: 'static + Clone + Hash + Eq, Rest: 'static + Clone + Hash + Eq + VariadicExt, @@ -83,8 +84,11 @@ where Schema: SplitBySuffix, >::Prefix: Eq + Hash + Clone, >::Prefix: Eq + Hash + Clone, - GhtLeaf: GeneralizedHashTrieNode, - GhtInner>: GeneralizedHashTrieNode, + Storage: TupleSet + + Default // + Iterator + + IntoIterator, + GhtLeaf: GeneralizedHashTrieNode, + GhtInner>: GeneralizedHashTrieNode, { fn into_iter(self) -> Option> { Some(self.elements.into_iter()) @@ -94,7 +98,7 @@ where Some(self.elements.drain()) } // Node::Schema: SplitBySuffix - type Force = GhtInner>; + type Force = GhtInner>; fn force(mut self) -> Option { let mut retval = Self::Force::default(); self.forced = true; @@ -106,7 +110,7 @@ where Some(retval) } - fn force_drain(&mut self) -> Option>> { + fn force_drain(&mut self) -> Option>> { let mut retval = Self::Force::default(); self.forced = true; for row in self.drain().unwrap() { @@ -176,17 +180,18 @@ pub trait ColtNodeTail: ColtNode { } #[sealed] -impl<'a, Rest, Schema, SuffixSchema> ColtNode for var_type!(&'a mut GhtLeaf, ...Rest) +impl<'a, Rest, Schema, SuffixSchema, Storage> ColtNode for var_type!(&'a mut GhtLeaf, ...Rest) where Rest: ColtNodeTail< - as ColumnLazyTrieNode>::Force, + as ColumnLazyTrieNode>::Force, Schema = Schema, // SuffixSchema = SuffixSchema, >, ::SuffixSchema: 'a, - GhtLeaf: ColumnLazyTrieNode, + GhtLeaf: ColumnLazyTrieNode, Schema: Clone + Hash + Eq + VariadicExt, SuffixSchema: Clone + Hash + Eq + VariadicExt, + Storage: TupleSet, { type Schema = Schema; type Head = Rest::Head; @@ -201,17 +206,18 @@ where } } #[sealed] -impl<'a, Rest, Schema, SuffixSchema, T> ColtNodeTail for var_type!(&'a mut GhtLeaf, ...Rest) +impl<'a, Rest, Schema, SuffixSchema, T, Storage> ColtNodeTail for var_type!(&'a mut GhtLeaf, ...Rest) where Rest: ColtNodeTail< - as ColumnLazyTrieNode>::Force, + as ColumnLazyTrieNode>::Force, Schema = Schema, // SuffixSchema = SuffixSchema, >, ::SuffixSchema: 'a, - GhtLeaf: ColumnLazyTrieNode, + GhtLeaf: ColumnLazyTrieNode, Schema: Clone + Hash + Eq + VariadicExt, SuffixSchema: Clone + Hash + Eq + VariadicExt, + Storage: TupleSet, { fn merge(&mut self, _inner_to_merge: T) { panic!(); @@ -265,32 +271,34 @@ where } #[sealed] -impl<'a, Head, Rest, Schema, ValType> ColtNode for var_type!(&'a mut GhtInner>, ...Rest) +impl<'a, Head, Rest, Schema, ValType, Storage> ColtNode for var_type!(&'a mut GhtInner>, ...Rest) where Rest: ColtNode, Head: Eq + Hash + Clone, Schema: Eq + Hash + Clone + PartialEqVariadic, ValType: Eq + Hash + Clone + PartialEqVariadic, - GhtLeaf: GeneralizedHashTrieNode, + Storage: TupleSet, + GhtLeaf: GeneralizedHashTrieNode, Schema: 'static + Eq + VariadicExt + Hash + Clone + SplitBySuffix + PartialEqVariadic, >::Prefix: Eq + Hash + Clone, - GhtInner>: GeneralizedHashTrieNode + GhtGet, + GhtInner>: + GeneralizedHashTrieNode + GhtGet, // Rest: ColtNode, // Head: Eq + Hash + Clone, // Head2: Eq + Hash + Clone, // Node: GeneralizedHashTrieNode, - GhtInner>: GeneralizedHashTrieNode< + GhtInner>: GeneralizedHashTrieNode< Head = Rest::Head, // SuffixSchema = Rest::SuffixSchema, Schema = Rest::Schema, >, - GhtLeaf: GeneralizedHashTrieNode + GhtGet, + GhtLeaf: GeneralizedHashTrieNode + GhtGet, { type Schema = Rest::Schema; type Head = Rest::Head; type SuffixSchema = Rest::SuffixSchema; // type Get = Rest::Get; // Option<&'a as GhtGet>::Get>, - type Get = var_type!(&'a mut GhtLeaf, ...Rest::Get); + type Get = var_type!(&'a mut GhtLeaf, ...Rest::Get); fn get(self, head: &Self::Head) -> Self::Get { let (first, rest) = self; @@ -299,20 +307,22 @@ where } } #[sealed] -impl<'a, Head, Rest, Schema, ValType> ColtNodeTail>> for var_type!(&'a mut GhtInner>, ...Rest) +impl<'a, Head, Rest, Schema, ValType, Storage> + ColtNodeTail>> for var_type!(&'a mut GhtInner>, ...Rest) where Rest: ColtNode, Head: Eq + Hash + Clone, Schema: Eq + Hash + Clone + PartialEqVariadic, ValType: Eq + Hash + Clone + PartialEqVariadic, - GhtLeaf: GeneralizedHashTrieNode, + Storage: TupleSet, + GhtLeaf: GeneralizedHashTrieNode, Schema: 'static + Eq + VariadicExt + Hash + Clone + SplitBySuffix + PartialEqVariadic, >::Prefix: Eq + Hash + Clone, - GhtInner>: GeneralizedHashTrieNode - + crate::Merge>> + GhtInner>: GeneralizedHashTrieNode + + crate::Merge>> + GhtGet, { - fn merge(&mut self, inner_to_merge: GhtInner>) { + fn merge(&mut self, inner_to_merge: GhtInner>) { // This shouldn't be none? IDK let (head, _rest) = self; crate::Merge::merge(*head, inner_to_merge); @@ -337,18 +347,20 @@ where } } #[sealed] -impl<'a, Head, Schema, ValType> ColtNodeTail>> - for var_type!(&'a mut GhtInner>) +impl<'a, Head, Schema, ValType, Storage> + ColtNodeTail>> + for var_type!(&'a mut GhtInner>) where - GhtInner>: GeneralizedHashTrieNode + GhtInner>: GeneralizedHashTrieNode + GhtGet - + crate::Merge>> + + crate::Merge>> + GhtGet, - GhtLeaf: GeneralizedHashTrieNode, + GhtLeaf: GeneralizedHashTrieNode, Head: Clone + Eq + Hash, Schema: Clone + Eq + Hash + VariadicExt, + Storage: TupleSet, { - fn merge(&mut self, inner_to_merge: GhtInner>) { + fn merge(&mut self, inner_to_merge: GhtInner>) { crate::Merge::merge(self.0, inner_to_merge); } } @@ -393,17 +405,19 @@ where impl GhtForest for var_type!(TrieFirst, TrieSecond, ...TrieRest) where TrieFirst: GeneralizedHashTrieNode + GhtTakeLeaf, - TrieSecond: GeneralizedHashTrieNode + GhtTakeLeaf, + TrieSecond: GeneralizedHashTrieNode + + GhtTakeLeaf, SearchKey: VariadicExt + Split + Clone, var_type!(TrieSecond, ...TrieRest): GhtForest, // GhtForestStruct: GhtForest, TrieFirst::Schema: PartialEqVariadic + SplitBySuffix + Eq + Hash + Clone, TrieSecond::Schema: PartialEqVariadic + SplitBySuffix + Eq + Hash + Clone, - Self: ForestFindLeaf, + Self: ForestFindLeaf, <::Reverse as VariadicExt>::Reverse: Eq + Hash + Clone, GhtLeaf< ::Schema, ::ValType, + TrieFirst::Storage, >: ColumnLazyTrieNode, { fn force<'a>(&mut self, search_key: SearchKey) -> bool { @@ -419,7 +433,7 @@ where // TrieFirst::ValType IS NOT the same as TrieSecond::ValType, // but the elements in the leaf are the same. // So we just need a new GhtLeaf with the right ValType. - let leaf = GhtLeaf:: { + let leaf = GhtLeaf:: { elements: leaf.elements, forced: false, _suffix_schema: PhantomData, @@ -459,35 +473,36 @@ where #[sealed] /// a trait for finding a matching leaf in the forest -pub trait ForestFindLeaf +pub trait ForestFindLeaf where Schema: Eq + Hash + VariadicExt + PartialEqVariadic, + Storage: TupleSet, { /// find a matching leaf in the forest - fn find_containing_leaf(&self, row: Schema::AsRefVar<'_>) -> Option<&'_ GhtLeaf>; + fn find_containing_leaf( + &self, + row: Schema::AsRefVar<'_>, + ) -> Option<&'_ GhtLeaf>; } #[sealed] -impl ForestFindLeaf<::Schema> for var_type!(TrieFirst, ...TrieRest) +impl ForestFindLeaf for var_type!(TrieFirst, ...TrieRest) where ::Schema: PartialEqVariadic, TrieFirst: GeneralizedHashTrieNode, - TrieRest: ForestFindLeaf<::Schema>, + TrieRest: ForestFindLeaf<::Schema, TrieFirst::Storage>, { fn find_containing_leaf( &self, row: <::Schema as VariadicExt>::AsRefVar<'_>, - ) -> Option<&'_ GhtLeaf<::Schema, ()>> { + ) -> Option<&'_ GhtLeaf> { let var_expr!(first, ...rest) = &self; if let Some(leaf) = first.find_containing_leaf(row) { // TODO!!!! unsafe { std::mem::transmute::< - &GhtLeaf< - ::Schema, - ::ValType, - >, - Option<&GhtLeaf<::Schema, ()>>, + &GhtLeaf, + Option<&GhtLeaf>, >(leaf) } } else { @@ -497,14 +512,15 @@ where } #[sealed] -impl ForestFindLeaf for var_type!() +impl ForestFindLeaf for var_type!() where Schema: Eq + Hash + VariadicExt + PartialEqVariadic, + Storage: TupleSet, { fn find_containing_leaf( &self, _row: ::AsRefVar<'_>, - ) -> Option<&'_ GhtLeaf> { + ) -> Option<&'_ GhtLeaf> { None } } diff --git a/lattices/src/ght_test.rs b/lattices/src/ght_test.rs index 7d9d94972a70..80ef78159739 100644 --- a/lattices/src/ght_test.rs +++ b/lattices/src/ght_test.rs @@ -3,6 +3,7 @@ mod test { use std::collections::HashSet; use std::io::{self, Write}; + use variadics::hash_set::VariadicHashSet; use variadics::{var_expr, var_type, VariadicExt}; use crate::ght::{GeneralizedHashTrieNode, GhtGet, GhtLeaf, GhtPrefixIter}; @@ -44,6 +45,7 @@ mod test { #[test] fn test_ght_node_type_macro() { type LilTrie = GhtType!(() => u32); + let _j = LilTrie::default(); let _l = LilTrie::new_from(vec![var_expr!(1)]); type LilTrie2 = GhtType!(() => u32, u64); @@ -191,7 +193,9 @@ mod test { .iter() .map(|&(a, b, c)| var_expr!(a, b, c)), ); - let leaf = GhtLeaf::::new_from(input.clone()); + let leaf = GhtLeaf::>::new_from( + input.clone(), + ); // let key = var_expr!(42u8).as_ref_var(); let key = (); // (var_expr!().as_ref_var();) let v: HashSet = leaf.prefix_iter(key).collect(); @@ -940,49 +944,51 @@ mod test { assert_eq!( // println!( // "found in trie {}", - ForestFindLeaf::::find_containing_leaf( - &forest, - var_expr!(1_u8, 1_u16, 1_u32, 1_u64).as_ref_var() + ForestFindLeaf::< + var_type!(u8, u16, u32, u64), + VariadicHashSet, + >::find_containing_leaf( + &forest, var_expr!(1_u8, 1_u16, 1_u32, 1_u64).as_ref_var() ) .unwrap() .iter_tuples() .next() .unwrap(), - var_expr!(1, 1, 1, 1) + var_expr!(1, 1, 1, 1).as_ref_var() ); assert_eq!( // println!( // "found in trie {}", - ForestFindLeaf::::find_containing_leaf( - &forest, - var_expr!(2, 2, 2, 2).as_ref_var() - ) + ForestFindLeaf::< + var_type!(u8, u16, u32, u64), + VariadicHashSet, + >::find_containing_leaf(&forest, var_expr!(2, 2, 2, 2).as_ref_var()) .unwrap() .iter_tuples() .next() .unwrap(), - var_expr!(2, 2, 2, 2) + var_expr!(2, 2, 2, 2).as_ref_var() ); assert_eq!( // println!( // "found in trie {}", - ForestFindLeaf::::find_containing_leaf( - &forest, - var_expr!(3, 3, 3, 3).as_ref_var() - ) + ForestFindLeaf::< + var_type!(u8, u16, u32, u64), + VariadicHashSet, + >::find_containing_leaf(&forest, var_expr!(3, 3, 3, 3).as_ref_var()) .unwrap() .iter_tuples() .next() .unwrap(), - var_expr!(3, 3, 3, 3) + var_expr!(3, 3, 3, 3).as_ref_var() ); assert!( // println!( // "found in trie {}", - ForestFindLeaf::::find_containing_leaf( - &forest, - var_expr!(4, 4, 4, 4).as_ref_var() - ) + ForestFindLeaf::< + var_type!(u8, u16, u32, u64), + VariadicHashSet, + >::find_containing_leaf(&forest, var_expr!(4, 4, 4, 4).as_ref_var()) .is_none() ); // println!("{:?}", forest.forest); @@ -1031,11 +1037,12 @@ mod test { forest.0.insert(var_expr!(true, 2, "hello", i)); } assert_eq!(forest.0.recursive_iter().count(), 1000009); - let leaf = - ForestFindLeaf::::find_containing_leaf( - &forest, - var_expr!(true, 2, "hello", 2).as_ref_var(), - ); + let leaf = ForestFindLeaf::< + var_type!(bool, usize, &'static str, i32), + VariadicHashSet, + >::find_containing_leaf( + &forest, var_expr!(true, 2, "hello", 2).as_ref_var() + ); println!("leaf size: {}", leaf.unwrap().elements.len()); // println!("forest.0: {:?}", forest.0); println!("forest.1: {:?}", forest.1 .0); diff --git a/variadics/src/hash_set.rs b/variadics/src/hash_set.rs index a996686c96ee..a74a28cb767f 100644 --- a/variadics/src/hash_set.rs +++ b/variadics/src/hash_set.rs @@ -1,14 +1,18 @@ +use std::fmt; use std::hash::{BuildHasher, Hash, Hasher, RandomState}; use hashbrown::hash_table::{Entry, HashTable}; use crate::{PartialEqVariadic, VariadicExt}; +#[derive(Clone)] pub struct VariadicHashSet { table: HashTable, hasher: S, } + impl VariadicHashSet { + /// Creates a new `VariadicHashSet` with a default hasher. pub fn new() -> Self { Self { table: HashTable::new(), @@ -16,6 +20,23 @@ impl VariadicHashSet { } } } + +impl Default for VariadicHashSet { + fn default() -> Self { + Self::new() + } +} + +impl fmt::Debug for VariadicHashSet +where + T: fmt::Debug + VariadicExt + PartialEqVariadic, + for<'a> T::AsRefVar<'a>: Hash + fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_set().entries(self.iter()).finish() + } +} + impl VariadicHashSet where T: VariadicExt + PartialEqVariadic, @@ -39,7 +60,7 @@ where let hash = Self::get_hash(&self.hasher, element.as_ref_var()); let entry = self.table.entry( hash, - |item| ::eq(&element, &item), + |item| ::eq(&element, item), |item| Self::get_hash(&self.hasher, item.as_ref_var()), ); match entry { @@ -55,10 +76,28 @@ where self.table.len() } + pub fn drain(&mut self) -> hashbrown::hash_table::Drain<'_, T> { + self.table.drain() + } + pub fn iter<'a>(&'a self) -> impl Iterator> { self.table.iter().map(|item| item.as_ref_var()) } } + +impl IntoIterator for VariadicHashSet +where + T: VariadicExt + PartialEqVariadic, +{ + type Item = T; + type IntoIter = hashbrown::hash_table::IntoIter; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + self.table.into_iter() + } +} + impl VariadicHashSet { pub fn with_hasher(hasher: S) -> Self { Self { @@ -73,3 +112,82 @@ impl VariadicHashSet { } } } + +// THIS CODE ADAPTED FROM hashbrown::HashMap +impl Extend for VariadicHashSet +where + K: Eq + Hash + PartialEqVariadic, + S: BuildHasher, + for<'a> K::AsRefVar<'a>: Hash, + // for<'a> S::Hasher: Fn(&'a K) -> u64, + // A: Allocator, +{ + // #[cfg_attr(feature = "inline-more", inline)] + fn extend>(&mut self, iter: T) { + // Keys may be already present or show multiple times in the iterator. + // Reserve the entire hint lower bound if the map is empty. + // Otherwise reserve half the hint (rounded up), so the map + // will only resize twice in the worst case. + let iter = iter.into_iter(); + let reserve = if self.len() == 0 { + iter.size_hint().0 + } else { + (iter.size_hint().0 + 1) / 2 + }; + // let hasher = self.hasher.build_hasher(); + // self.table.reserve(reserve, hasher); + iter.for_each(move |k| { + self.insert(k); + }); + } + + // #[inline] + // #[cfg(feature = "nightly")] + // fn extend_one(&mut self, (k, v): (K, V)) { + // self.insert(k, v); + // } + + // #[inline] + // #[cfg(feature = "nightly")] + // fn extend_reserve(&mut self, additional: usize) { + // // Keys may be already present or show multiple times in the iterator. + // // Reserve the entire hint lower bound if the map is empty. + // // Otherwise reserve half the hint (rounded up), so the map + // // will only resize twice in the worst case. + // let reserve = if self.is_empty() { + // additional + // } else { + // (additional + 1) / 2 + // }; + // self.reserve(reserve); + // } +} + +impl PartialEq for VariadicHashSet +where + T: Eq + Hash + PartialEqVariadic, + S: BuildHasher, + for<'a> T::AsRefVar<'a>: Hash, +{ + fn eq(&self, other: &Self) -> bool { + if self.len() != other.len() { + return false; + } + + self.iter().all(|key| other.get(key).is_some()) + } +} + +impl FromIterator for VariadicHashSet +where + T: Eq + Hash + PartialEqVariadic, + S: BuildHasher + Default, + for<'a> T::AsRefVar<'a>: Hash, + // A: Default + Allocator, +{ + fn from_iter>(iter: I) -> Self { + let mut set = Self::with_hasher(Default::default()); + set.extend(iter); + set + } +}