diff --git a/cuckoofilter.go b/cuckoofilter.go index 7878f1d..b03092f 100644 --- a/cuckoofilter.go +++ b/cuckoofilter.go @@ -6,9 +6,12 @@ package cuckoo import ( + "bytes" "encoding/binary" "errors" "fmt" + "io" + "github.com/dgryski/go-metro" ) @@ -16,14 +19,14 @@ import ( const kMaxCuckooCount uint = 500 const ( - //TableTypeSingle normal single table + // TableTypeSingle normal single table TableTypeSingle = 0 - //TableTypePacked packed table, use semi-sort to save 1 bit per item + // TableTypePacked packed table, use semi-sort to save 1 bit per item TableTypePacked = 1 ) type table interface { - Init(tagsPerBucket, bitsPerTag, num uint) + Init(tagsPerBucket, bitsPerTag, num uint, initialBucketsHint []byte) error NumBuckets() uint FindTagInBuckets(i1, i2 uint, tag uint32) bool DeleteTagFromBucket(i uint, tag uint32) bool @@ -32,7 +35,7 @@ type table interface { SizeInBytes() uint Info() string BitsPerItem() uint - Encode() []byte + Reader() (io.Reader, uint) Decode([]byte) error Reset() } @@ -52,7 +55,9 @@ type victimCache struct { used bool } -//Filter cuckoo filter type struct +const filterMetadataSize = 3*bytesPerUint32 + 1 + +// Filter cuckoo filter type struct type Filter struct { victim victimCache numItems uint @@ -75,7 +80,7 @@ func NewFilter(tagsPerBucket, bitsPerItem, maxNumKeys, tableType uint) *Filter { numBuckets = 1 } table := getTable(tableType).(table) - table.Init(tagsPerBucket, bitsPerItem, numBuckets) + _ = table.Init(tagsPerBucket, bitsPerItem, numBuckets, nil) return &Filter{ table: table, } @@ -102,7 +107,7 @@ func (f *Filter) altIndex(index uint, tag uint32) uint { return f.indexHash(uint32(index) ^ (tag * 0x5bd1e995)) } -//Size return num of items that filter store +// Size return num of items that filter store func (f *Filter) Size() uint { var c uint if f.victim.used { @@ -111,22 +116,22 @@ func (f *Filter) Size() uint { return f.numItems + c } -//LoadFactor return current filter's loadFactor +// LoadFactor return current filter's loadFactor func (f *Filter) LoadFactor() float64 { return 1.0 * float64(f.Size()) / float64(f.table.SizeInTags()) } -//SizeInBytes return bytes occupancy of filter's table +// SizeInBytes return bytes occupancy of filter's table func (f *Filter) SizeInBytes() uint { return f.table.SizeInBytes() } -//BitsPerItem return bits occupancy per item of filter's table +// BitsPerItem return bits occupancy per item of filter's table func (f *Filter) BitsPerItem() float64 { return 8.0 * float64(f.table.SizeInBytes()) / float64(f.Size()) } -//Add add an item into filter, return false when filter is full +// Add add an item into filter, return false when filter is full func (f *Filter) Add(item []byte) bool { if f.victim.used { return false @@ -135,7 +140,7 @@ func (f *Filter) Add(item []byte) bool { return f.addImpl(i, tag) } -//AddUnique add an item into filter, return false when filter already contains it or filter is full +// AddUnique add an item into filter, return false when filter already contains it or filter is full func (f *Filter) AddUnique(item []byte) bool { if f.Contain(item) { return false @@ -169,7 +174,7 @@ func (f *Filter) addImpl(i uint, tag uint32) bool { return true } -//Contain return if filter contains an item +// Contain return if filter contains an item func (f *Filter) Contain(key []byte) bool { i1, tag := f.generateIndexTagHash(key) i2 := f.altIndex(i1, tag) @@ -182,7 +187,7 @@ func (f *Filter) Contain(key []byte) bool { return false } -//Delete delete item from filter, return false when item not exist +// Delete delete item from filter, return false when item not exist func (f *Filter) Delete(key []byte) bool { i1, tag := f.generateIndexTagHash(key) i2 := f.altIndex(i1, tag) @@ -238,7 +243,7 @@ func (f *Filter) FalsePositiveRate() float64 { return float64(fp) / float64(rounds) } -//Info return filter's detail info +// Info return filter's detail info func (f *Filter) Info() string { return fmt.Sprintf("CuckooFilter Status:\n"+ "\t\t%v\n"+ @@ -250,37 +255,51 @@ func (f *Filter) Info() string { } // Encode returns a byte slice representing a Cuckoo filter -func (f *Filter) Encode() []byte { - var b [3][bytesPerUint32]byte - binary.LittleEndian.PutUint32(b[0][:], uint32(f.numItems)) - binary.LittleEndian.PutUint32(b[1][:], uint32(f.victim.index)) - binary.LittleEndian.PutUint32(b[2][:], f.victim.tag) - - ret := append(b[0][:], b[1][:]...) - ret = append(ret, b[2][:]...) +func (f *Filter) Encode() ([]byte, error) { + filterReader, filterSize := f.EncodeReader() + buf := make([]byte, filterSize) + if _, err := io.ReadFull(filterReader, buf); err != nil { + return nil, err + } + return buf, nil +} + +// EncodeReader returns a reader representing a Cuckoo filter +func (f *Filter) EncodeReader() (io.Reader, uint) { + var metadata [filterMetadataSize]byte + + for i, n := range []uint32{uint32(f.numItems), uint32(f.victim.index), f.victim.tag} { + binary.LittleEndian.PutUint32(metadata[i*bytesPerUint32:], n) + } + + victimUsed := byte(0) if f.victim.used { - ret = append(ret, byte(1)) - } else { - ret = append(ret, byte(0)) + victimUsed = byte(1) } - ret = append(ret, f.table.Encode()...) + metadata[bytesPerUint32*3] = victimUsed + tableReader, tableEncodedSize := f.table.Reader() + return io.MultiReader(bytes.NewReader(metadata[:]), tableReader), uint(len(metadata)) + tableEncodedSize +} - return ret +// Decode returns a Cuckoo Filter using a copy of the provided byte slice. +func Decode(b []byte) (*Filter, error) { + copiedBytes := make([]byte, len(b)) + copy(copiedBytes, b) + return DecodeFrom(copiedBytes) } -// Decode returns a Cuckoo Filter from a byte slice -func Decode(bytes []byte) (*Filter, error) { - if len(bytes) < 20 { +// DecodeFrom returns a Cuckoo Filter using the exact provided byte slice (no copy). +func DecodeFrom(b []byte) (*Filter, error) { + if len(b) < 20 { return nil, errors.New("unexpected bytes length") } - numItems := uint(binary.LittleEndian.Uint32(bytes[0:4])) - curIndex := uint(binary.LittleEndian.Uint32(bytes[4:8])) - curTag := binary.LittleEndian.Uint32(bytes[8:12]) - used := bytes[12] == byte(1) - tableType := uint(bytes[13]) + numItems := uint(binary.LittleEndian.Uint32(b[0*bytesPerUint32:])) + curIndex := uint(binary.LittleEndian.Uint32(b[1*bytesPerUint32:])) + curTag := binary.LittleEndian.Uint32(b[2*1*bytesPerUint32:]) + used := b[12] == byte(1) + tableType := uint(b[13]) table := getTable(tableType).(table) - err := table.Decode(bytes[13:]) - if err != nil { + if err := table.Decode(b[13:]); err != nil { return nil, err } return &Filter{ diff --git a/cuckoofilter_test.go b/cuckoofilter_test.go index 774dff0..b94a317 100644 --- a/cuckoofilter_test.go +++ b/cuckoofilter_test.go @@ -6,6 +6,7 @@ package cuckoo import ( + "bytes" "crypto/rand" "fmt" "io" @@ -15,9 +16,11 @@ import ( const size = 100000 -var testBucketSize = []uint{2, 4, 8} -var testFingerprintSize = []uint{2, 4, 5, 6, 7, 8, 9, 10, 12, 13, 16, 17, 23, 31, 32} -var testTableType = []uint{TableTypeSingle, TableTypePacked} +var ( + testBucketSize = []uint{2, 4, 8} + testFingerprintSize = []uint{2, 4, 5, 6, 7, 8, 9, 10, 12, 13, 16, 17, 23, 31, 32} + testTableType = []uint{TableTypeSingle, TableTypePacked} +) func TestFilter(t *testing.T) { var insertNum uint = 50000 @@ -33,7 +36,7 @@ func TestFilter(t *testing.T) { continue } cf := NewFilter(b, f, 8190, table) - //fmt.Println(cf.Info()) + // fmt.Println(cf.Info()) a := make([][]byte, 0) for i := uint(0); i < insertNum; i++ { _, _ = io.ReadFull(rand.Reader, hash[:]) @@ -45,12 +48,48 @@ func TestFilter(t *testing.T) { } count := cf.Size() - if count != uint(len(a)) { t.Errorf("Expected count = %d, instead count = %d, b %v f %v", uint(len(a)), count, b, f) return } + encodedBytes, err := cf.Encode() + if err != nil { + t.Fatalf("err %v", err) + } + if len(encodedBytes) != cap(encodedBytes) { + t.Fatalf("len(%d) != cap(%d)", len(encodedBytes), cap(encodedBytes)) + } + ncf, err := Decode(encodedBytes) + if err != nil || !reflect.DeepEqual(cf, ncf) { + t.Errorf("Expected epual, err %v", err) + return + } + + encodedBytes, err = cf.Encode() + if err != nil { + t.Fatalf("err %v", err) + } + ncf, err = DecodeFrom(encodedBytes) + if err != nil || !reflect.DeepEqual(cf, ncf) { + t.Errorf("Expected epual, err %v", err) + return + } + + filterReader, _ := cf.EncodeReader() + bytesFromReader, err := io.ReadAll(filterReader) + if err != nil { + t.Fatalf("Error reading from reader") + } + if !bytes.Equal(bytesFromReader, encodedBytes) { + t.Fatalf("Expected to be equal") + } + + fmt.Println(cf.Info()) + cf.BitsPerItem() + cf.SizeInBytes() + cf.LoadFactor() + for _, v := range a { if !cf.Contain(v) { t.Errorf("Expected contain, instead not contain, b %v f %v table type %v", b, f, table) @@ -65,22 +104,10 @@ func TestFilter(t *testing.T) { return } - bytes := cf.Encode() - ncf, err := Decode(bytes) - if err != nil || !reflect.DeepEqual(cf, ncf) { - t.Errorf("Expected epual, err %v", err) - return - } - - cf.Info() - cf.BitsPerItem() - cf.SizeInBytes() - cf.LoadFactor() fmt.Printf("Filter bucketSize %v fingerprintSize %v tableType %v falsePositive Rate %v \n", b, f, table, cf.FalsePositiveRate()) } } } - } func BenchmarkFilterSingle_Reset(b *testing.B) { diff --git a/packedtable.go b/packedtable.go index 74d5441..dd2ea34 100644 --- a/packedtable.go +++ b/packedtable.go @@ -6,13 +6,14 @@ package cuckoo import ( + "bytes" "encoding/binary" - "errors" "fmt" + "io" "math/rand" ) -//PackedTable using Permutation encoding to save 1 bit per tag +// PackedTable using Permutation encoding to save 1 bit per tag type PackedTable struct { kDirBitsPerTag uint kBitsPerBucket uint @@ -26,7 +27,7 @@ type PackedTable struct { perm PermEncoding } -//NewPackedTable return a packedTable +// NewPackedTable return a packedTable func NewPackedTable() *PackedTable { return &PackedTable{} } @@ -37,8 +38,8 @@ const ( codeSize = 12 ) -//Init init table -func (p *PackedTable) Init(_, bitsPerTag, num uint) { +// Init init table +func (p *PackedTable) Init(_, bitsPerTag, num uint, initialBucketsHint []byte) error { p.bitsPerTag = bitsPerTag p.numBuckets = num @@ -48,31 +49,36 @@ func (p *PackedTable) Init(_, bitsPerTag, num uint) { p.kDirBitsMask = ((1 << p.kDirBitsPerTag) - 1) << cFpSize // NOTE: use 7 extra bytes to avoid overrun as we always read a uint64 p.len = (p.kBitsPerBucket*p.numBuckets+7)>>3 + 7 - p.buckets = make([]byte, p.len) + buckets, err := getBucketsFromHint(initialBucketsHint, p.len) + if err != nil { + return err + } + p.buckets = buckets p.perm.Init() + return nil } -//NumBuckets return num of table buckets +// NumBuckets return num of table buckets func (p *PackedTable) NumBuckets() uint { return p.numBuckets } -//SizeInTags return num of tags that table can store +// SizeInTags return num of tags that table can store func (p *PackedTable) SizeInTags() uint { return tagsPerPTable * p.numBuckets } -//SizeInBytes return bytes occupancy of table +// SizeInBytes return bytes occupancy of table func (p *PackedTable) SizeInBytes() uint { return p.len } -//BitsPerItem return bits occupancy per item of table +// BitsPerItem return bits occupancy per item of table func (p *PackedTable) BitsPerItem() uint { return p.bitsPerTag } -//PrintBucket print a bucket +// PrintBucket print a bucket func (p *PackedTable) PrintBucket(i uint) { pos := p.kBitsPerBucket * i / bitsPerByte fmt.Printf("\tbucketBits =%x\n", p.buckets[pos:pos+p.kBytesPerBucket]) @@ -81,7 +87,7 @@ func (p *PackedTable) PrintBucket(i uint) { p.PrintTags(tags) } -//PrintTags print tags +// PrintTags print tags func (p *PackedTable) PrintTags(tags [tagsPerPTable]uint32) { var lowBits [tagsPerPTable]uint8 var dirBits [tagsPerPTable]uint32 @@ -110,7 +116,7 @@ func (p *PackedTable) sortTags(tags *[tagsPerPTable]uint32) { p.sortPair(&tags[1], &tags[2]) } -//ReadBucket read and decode the bucket i, pass the 4 decoded tags to the 2nd arg +// ReadBucket read and decode the bucket i, pass the 4 decoded tags to the 2nd arg // bucket bits = 12 codeword bits + dir bits of tag1 + dir bits of tag2 ... func (p *PackedTable) ReadBucket(i uint, tags *[tagsPerPTable]uint32) { var codeword uint16 @@ -216,7 +222,7 @@ func (p *PackedTable) readOutBytes(i, pos uint) (uint64, uint64, uint) { return u1, u2, rShift } -//WriteBucket write tags into bucket i +// WriteBucket write tags into bucket i func (p *PackedTable) WriteBucket(i uint, tags [tagsPerPTable]uint32) { p.sortTags(&tags) @@ -235,12 +241,12 @@ func (p *PackedTable) WriteBucket(i uint, tags [tagsPerPTable]uint32) { highBits[3] = tags[3] & 0xfffffff0 // note that : tags[j] = lowBits[j] | highBits[j] - var codeword = p.perm.Encode(lowBits) + codeword := p.perm.Encode(lowBits) pos := i * p.kBitsPerBucket >> 3 switch p.kBitsPerBucket { case 16: // 1 dirBits per tag - var v = codeword | uint16(highBits[0]<<8) | uint16(highBits[1]<<9) | + v := codeword | uint16(highBits[0]<<8) | uint16(highBits[1]<<9) | uint16(highBits[2]<<10) | uint16(highBits[3]<<11) p.buckets[pos] = byte(v) p.buckets[pos+1] = byte(v >> 8) @@ -295,7 +301,7 @@ func (p *PackedTable) WriteBucket(i uint, tags [tagsPerPTable]uint32) { p.buckets[pos+3] = byte(v >> 24) case 32: // 5 dirBits per tag - var v = uint32(codeword) | (highBits[0] << 8) | (highBits[1] << 13) | + v := uint32(codeword) | (highBits[0] << 8) | (highBits[1] << 13) | (highBits[2] << 18) | (highBits[3] << 23) p.buckets[pos] = byte(v) p.buckets[pos+1] = byte(v >> 8) @@ -320,7 +326,7 @@ func (p *PackedTable) WriteBucket(i uint, tags [tagsPerPTable]uint32) { p.buckets[pos+7] = byte(v >> 56) case 64: // 13 dirBits per tag - var v = uint64(codeword) | uint64(highBits[0])<<8 | + v := uint64(codeword) | uint64(highBits[0])<<8 | uint64(highBits[1])<<21 | uint64(highBits[2])<<34 | uint64(highBits[3])<<47 p.buckets[pos] = byte(v) @@ -334,7 +340,6 @@ func (p *PackedTable) WriteBucket(i uint, tags [tagsPerPTable]uint32) { default: p.writeInBytes(i, pos, codeword, highBits) } - } func (p *PackedTable) writeInBytes(i, pos uint, codeword uint16, highBits [tagsPerPTable]uint32) { @@ -380,7 +385,7 @@ func (p *PackedTable) writeInBytes(i, pos uint, codeword uint16, highBits [tagsP return } -//FindTagInBuckets find if tag in bucket i1 i2 +// FindTagInBuckets find if tag in bucket i1 i2 func (p *PackedTable) FindTagInBuckets(i1, i2 uint, tag uint32) bool { var tags1, tags2 [tagsPerPTable]uint32 p.ReadBucket(i1, &tags1) @@ -391,7 +396,7 @@ func (p *PackedTable) FindTagInBuckets(i1, i2 uint, tag uint32) bool { (tags2[2] == tag) || (tags2[3] == tag) } -//DeleteTagFromBucket delete tag from bucket i +// DeleteTagFromBucket delete tag from bucket i func (p *PackedTable) DeleteTagFromBucket(i uint, tag uint32) bool { var tags [tagsPerPTable]uint32 p.ReadBucket(i, &tags) @@ -405,7 +410,7 @@ func (p *PackedTable) DeleteTagFromBucket(i uint, tag uint32) bool { return false } -//InsertTagToBucket insert tag into bucket i +// InsertTagToBucket insert tag into bucket i func (p *PackedTable) InsertTagToBucket(i uint, tag uint32, kickOut bool, oldTag *uint32) bool { var tags [tagsPerPTable]uint32 p.ReadBucket(i, &tags) @@ -425,14 +430,14 @@ func (p *PackedTable) InsertTagToBucket(i uint, tag uint32, kickOut bool, oldTag return false } -//Reset reset table +// Reset reset table func (p *PackedTable) Reset() { for i := range p.buckets { p.buckets[i] = 0 } } -//Info return table's info +// Info return table's info func (p *PackedTable) Info() string { return fmt.Sprintf("PackedHashtable with tag size: %v bits \n"+ "\t\t4 packed bits(3 bits after compression) and %v direct bits\n"+ @@ -442,26 +447,20 @@ func (p *PackedTable) Info() string { p.bitsPerTag, p.kDirBitsPerTag, p.numBuckets, p.SizeInTags()) } +const packedTableMetadataSize = 2+bytesPerUint32 + // Encode returns a byte slice representing a TableBucket -func (p *PackedTable) Encode() []byte { - bytes := make([]byte, p.len+6) - bytes[0] = uint8(TableTypePacked) - bytes[1] = uint8(p.bitsPerTag) - b := make([]byte, bytesPerUint32) - binary.LittleEndian.PutUint32(b, uint32(p.numBuckets)) - copy(bytes[2:], b) - copy(bytes[6:], p.buckets) - return bytes +func (p *PackedTable) Reader() (io.Reader, uint) { + var metadata [packedTableMetadataSize]byte + metadata[0] = uint8(TableTypePacked) + metadata[1] = uint8(p.bitsPerTag) + binary.LittleEndian.PutUint32(metadata[2:], uint32(p.numBuckets)) + return io.MultiReader(bytes.NewReader(metadata[:]), bytes.NewReader(p.buckets)), uint(len(metadata) + len(p.buckets)) } // Decode parse a byte slice into a TableBucket -func (p *PackedTable) Decode(bytes []byte) error { - bitsPerTag := uint(bytes[1]) - numBuckets := uint(binary.LittleEndian.Uint32(bytes[2:6])) - p.Init(0, bitsPerTag, numBuckets) - if len(bytes) != int(p.len+6) { - return errors.New("unexpected bytes length") - } - copy(p.buckets, bytes[6:]) - return nil +func (p *PackedTable) Decode(b []byte) error { + bitsPerTag := uint(b[1]) + numBuckets := uint(binary.LittleEndian.Uint32(b[2:])) + return p.Init(0, bitsPerTag, numBuckets, b[6:]) } diff --git a/singletable.go b/singletable.go index 54bddac..b41c63c 100644 --- a/singletable.go +++ b/singletable.go @@ -6,13 +6,14 @@ package cuckoo import ( + "bytes" "encoding/binary" - "errors" "fmt" + "io" "math/rand" ) -//SingleTable the most naive table implementation: one huge bit array +// SingleTable the most naive table implementation: one huge bit array type SingleTable struct { kTagsPerBucket uint numBuckets uint @@ -22,44 +23,48 @@ type SingleTable struct { len uint } -//NewSingleTable return a singleTable +// NewSingleTable return a singleTable func NewSingleTable() *SingleTable { return &SingleTable{} } -//Init init table -func (t *SingleTable) Init(tagsPerBucket, bitsPerTag, num uint) { +// Init init table +func (t *SingleTable) Init(tagsPerBucket, bitsPerTag, num uint, initialBucketsHint []byte) error { t.bitsPerTag = bitsPerTag t.numBuckets = num t.kTagsPerBucket = tagsPerBucket t.tagMask = (1 << bitsPerTag) - 1 t.len = (t.bitsPerTag*t.kTagsPerBucket*t.numBuckets + 7) >> 3 - - t.bucket = make([]byte, t.len) + buckets, err := getBucketsFromHint(initialBucketsHint, t.len) + if err != nil { + return err + } + t.bucket = buckets + return nil } -//NumBuckets return num of table buckets +// NumBuckets return num of table buckets func (t *SingleTable) NumBuckets() uint { return t.numBuckets } -//SizeInBytes return bytes occupancy of table +// SizeInBytes return bytes occupancy of table func (t *SingleTable) SizeInBytes() uint { return t.len } -//SizeInTags return num of tags that table can store +// SizeInTags return num of tags that table can store func (t *SingleTable) SizeInTags() uint { return t.kTagsPerBucket * t.numBuckets } -//BitsPerItem return bits occupancy per item of table +// BitsPerItem return bits occupancy per item of table func (t *SingleTable) BitsPerItem() uint { return t.bitsPerTag } -//ReadTag read tag from bucket(i,j) +// ReadTag read tag from bucket(i,j) func (t *SingleTable) ReadTag(i, j uint) uint32 { pos := (i*t.bitsPerTag*t.kTagsPerBucket + t.bitsPerTag*j) / bitsPerByte var tag uint32 @@ -96,10 +101,10 @@ func (t *SingleTable) readOutBytes(i, j, pos uint) uint32 { return uint32(tmp) } -//WriteTag write tag into bucket(i,j) +// WriteTag write tag into bucket(i,j) func (t *SingleTable) WriteTag(i, j uint, n uint32) { pos := (i*t.bitsPerTag*t.kTagsPerBucket + t.bitsPerTag*j) / bitsPerByte - var tag = n & t.tagMask + tag := n & t.tagMask /* following code only works for little-endian */ switch t.bitsPerTag { case 2: @@ -161,10 +166,9 @@ func (t *SingleTable) writeInBytes(i, j, pos uint, tag uint32) { for k := uint(0); k < kBytes; k++ { t.bucket[pos+k] = byte(tmp >> (k * bitsPerByte)) } - return } -//FindTagInBuckets find if tag in bucket i1 i2 +// FindTagInBuckets find if tag in bucket i1 i2 func (t *SingleTable) FindTagInBuckets(i1, i2 uint, tag uint32) bool { var j uint for j = 0; j < t.kTagsPerBucket; j++ { @@ -175,7 +179,7 @@ func (t *SingleTable) FindTagInBuckets(i1, i2 uint, tag uint32) bool { return false } -//DeleteTagFromBucket delete tag from bucket i +// DeleteTagFromBucket delete tag from bucket i func (t *SingleTable) DeleteTagFromBucket(i uint, tag uint32) bool { var j uint for j = 0; j < t.kTagsPerBucket; j++ { @@ -187,7 +191,7 @@ func (t *SingleTable) DeleteTagFromBucket(i uint, tag uint32) bool { return false } -//InsertTagToBucket insert tag into bucket i +// InsertTagToBucket insert tag into bucket i func (t *SingleTable) InsertTagToBucket(i uint, tag uint32, kickOut bool, oldTag *uint32) bool { var j uint for j = 0; j < t.kTagsPerBucket; j++ { @@ -204,14 +208,14 @@ func (t *SingleTable) InsertTagToBucket(i uint, tag uint32, kickOut bool, oldTag return false } -//Reset reset table +// Reset reset table func (t *SingleTable) Reset() { for i := range t.bucket { t.bucket[i] = 0 } } -//Info return table's info +// Info return table's info func (t *SingleTable) Info() string { return fmt.Sprintf("SingleHashtable with tag size: %v bits \n"+ "\t\tAssociativity: %v \n"+ @@ -220,28 +224,22 @@ func (t *SingleTable) Info() string { t.bitsPerTag, t.kTagsPerBucket, t.numBuckets, t.SizeInTags()) } +const singleTableMetadataSize = 3 + bytesPerUint32 + // Encode returns a byte slice representing a TableBucket -func (t *SingleTable) Encode() []byte { - bytes := make([]byte, t.len+7) - bytes[0] = uint8(TableTypeSingle) - bytes[1] = uint8(t.kTagsPerBucket) - bytes[2] = uint8(t.bitsPerTag) - b := make([]byte, bytesPerUint32) - binary.LittleEndian.PutUint32(b, uint32(t.numBuckets)) - copy(bytes[3:], b) - copy(bytes[7:], t.bucket) - return bytes +func (t *SingleTable) Reader() (io.Reader, uint) { + var metadata [singleTableMetadataSize]byte + metadata[0] = uint8(TableTypeSingle) + metadata[1] = uint8(t.kTagsPerBucket) + metadata[2] = uint8(t.bitsPerTag) + binary.LittleEndian.PutUint32(metadata[3:], uint32(t.numBuckets)) + return io.MultiReader(bytes.NewReader(metadata[:]), bytes.NewReader(t.bucket)), uint(len(metadata) + len(t.bucket)) } // Decode parse a byte slice into a TableBucket -func (t *SingleTable) Decode(bytes []byte) error { - tagsPerBucket := uint(bytes[1]) - bitsPerTag := uint(bytes[2]) - numBuckets := uint(binary.LittleEndian.Uint32(bytes[3:7])) - t.Init(tagsPerBucket, bitsPerTag, numBuckets) - if len(bytes) != int(t.len+7) { - return errors.New("unexpected bytes length") - } - copy(t.bucket, bytes[7:]) - return nil +func (t *SingleTable) Decode(b []byte) error { + tagsPerBucket := uint(b[1]) + bitsPerTag := uint(b[2]) + numBuckets := uint(binary.LittleEndian.Uint32(b[3:])) + return t.Init(tagsPerBucket, bitsPerTag, numBuckets, b[7:]) } diff --git a/util.go b/util.go index 5b87111..9777123 100644 --- a/util.go +++ b/util.go @@ -5,6 +5,8 @@ package cuckoo +import "fmt" + const ( bitsPerByte = 8 bytesPerUint64 = 8 @@ -33,3 +35,14 @@ func maxLoadFactor(tagsPerBucket uint) float64 { return 0.99 } } + +func getBucketsFromHint(initialBucketsHint []byte, expectedLength uint) ([]byte, error) { + result := initialBucketsHint + if len(result) == 0 { + result = make([]byte, expectedLength) + } + if uint(len(result)) != expectedLength { + return nil, fmt.Errorf("buckets length should be %d but got %d", expectedLength, len(result)) + } + return result, nil +}