-
Notifications
You must be signed in to change notification settings - Fork 446
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adds a bloom filter implementation to `@libp2p/utils` for use in libp2p components.
- Loading branch information
1 parent
998fcaf
commit e1923b0
Showing
3 changed files
with
350 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
// ported from xxbloom - https://github.com/ceejbot/xxbloom/blob/master/LICENSE | ||
import { randomBytes } from '@libp2p/crypto' | ||
import mur from 'murmurhash3js-revisited' | ||
import { Uint8ArrayList } from 'uint8arraylist' | ||
import { alloc } from 'uint8arrays/alloc' | ||
import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string' | ||
|
||
const LN2_SQUARED = Math.LN2 * Math.LN2 | ||
|
||
export interface BloomFilterOptions { | ||
seeds?: number[] | ||
hashes?: number | ||
bits?: number | ||
} | ||
|
||
export class BloomFilter { | ||
/** | ||
* Create a `BloomFilter` with the smallest `bits` and `hashes` value for the | ||
* specified item count and error rate. | ||
*/ | ||
static create (itemcount: number, errorRate: number = 0.005): BloomFilter { | ||
const opts = optimize(itemcount, errorRate) | ||
return new BloomFilter(opts) | ||
} | ||
|
||
public readonly seeds: number[] | ||
public readonly bits: number | ||
public buffer: Uint8Array | ||
|
||
constructor (options: BloomFilterOptions = {}) { | ||
if (options.seeds != null) { | ||
this.seeds = options.seeds | ||
} else { | ||
this.seeds = generateSeeds(options.hashes ?? 8) | ||
} | ||
|
||
this.bits = options.bits ?? 1024 | ||
this.buffer = alloc(Math.ceil(this.bits / 8)) | ||
} | ||
|
||
/** | ||
* Add an item to the filter | ||
*/ | ||
add (item: Uint8Array | string): void { | ||
if (typeof item === 'string') { | ||
item = uint8ArrayFromString(item) | ||
} | ||
|
||
for (let i = 0; i < this.seeds.length; i++) { | ||
const hash = mur.x86.hash32(item, this.seeds[i]) | ||
const bit = hash % this.bits | ||
|
||
this.setbit(bit) | ||
} | ||
} | ||
|
||
/** | ||
* Test if the filter has an item. If it returns false it definitely does not | ||
* have the item. If it returns true, it probably has the item but there's | ||
* an `errorRate` chance it doesn't. | ||
*/ | ||
has (item: Uint8Array | string): boolean { | ||
if (typeof item === 'string') { | ||
item = uint8ArrayFromString(item) | ||
} | ||
|
||
for (let i = 0; i < this.seeds.length; i++) { | ||
const hash = mur.x86.hash32(item, this.seeds[i]) | ||
const bit = hash % this.bits | ||
|
||
const isSet = this.getbit(bit) | ||
|
||
if (!isSet) { | ||
return false | ||
} | ||
} | ||
|
||
return true | ||
} | ||
|
||
/** | ||
* Reset the filter | ||
*/ | ||
clear (): void { | ||
this.buffer.fill(0) | ||
} | ||
|
||
setbit (bit: number): void { | ||
let pos = 0 | ||
let shift = bit | ||
while (shift > 7) { | ||
pos++ | ||
shift -= 8 | ||
} | ||
|
||
let bitfield = this.buffer[pos] | ||
bitfield |= (0x1 << shift) | ||
this.buffer[pos] = bitfield | ||
} | ||
|
||
getbit (bit: number): boolean { | ||
let pos = 0 | ||
let shift = bit | ||
while (shift > 7) { | ||
pos++ | ||
shift -= 8 | ||
} | ||
|
||
const bitfield = this.buffer[pos] | ||
return (bitfield & (0x1 << shift)) !== 0 | ||
} | ||
} | ||
|
||
function optimize (itemcount: number, errorRate: number = 0.005): { bits: number, hashes: number } { | ||
const bits = Math.round(-1 * itemcount * Math.log(errorRate) / LN2_SQUARED) | ||
const hashes = Math.round((bits / itemcount) * Math.LN2) | ||
|
||
return { bits, hashes } | ||
} | ||
|
||
function generateSeeds (count: number): number[] { | ||
let buf: Uint8ArrayList | ||
let j: number | ||
const seeds = [] | ||
|
||
for (let i = 0; i < count; i++) { | ||
buf = new Uint8ArrayList(randomBytes(4)) | ||
seeds[i] = buf.getUint32(0, true) | ||
|
||
// Make sure we don't end up with two identical seeds, | ||
// which is unlikely but possible. | ||
for (j = 0; j < i; j++) { | ||
if (seeds[i] === seeds[j]) { | ||
i-- | ||
break | ||
} | ||
} | ||
} | ||
|
||
return seeds | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,199 @@ | ||
// ported from xxbloom - https://github.com/ceejbot/xxbloom/blob/master/LICENSE | ||
import { expect } from 'aegir/chai' | ||
import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string' | ||
import { BloomFilter } from '../src/bloom-filter.js' | ||
|
||
function hasBitsSet (buffer: Uint8Array): number { | ||
let isset = 0 | ||
for (let i = 0; i < buffer.length; i++) { | ||
isset |= (buffer[i] !== 0 ? 1 : 0) | ||
} | ||
return isset | ||
} | ||
|
||
describe('bloom-filter', () => { | ||
it('constructs a filter of the requested size', () => { | ||
const filter = new BloomFilter({ hashes: 4, bits: 32 }) | ||
expect(filter.seeds).to.have.lengthOf(4) | ||
expect(filter.bits).to.equal(32) | ||
expect(filter.buffer).to.be.an.instanceOf(Uint8Array) | ||
}) | ||
|
||
it('zeroes out its storage buffer', () => { | ||
const filter = new BloomFilter({ hashes: 3, bits: 64 }) | ||
for (let i = 0; i < filter.buffer.length; i++) { | ||
expect(filter.buffer[i]).to.equal(0) | ||
} | ||
}) | ||
|
||
it('uses passed-in seeds if provided', () => { | ||
const filter = new BloomFilter({ bits: 256, seeds: [1, 2, 3, 4, 5] }) | ||
expect(filter.seeds.length).to.equal(5) | ||
expect(filter.seeds[0]).to.equal(1) | ||
expect(filter.seeds[4]).to.equal(5) | ||
}) | ||
|
||
describe('createOptimal()', () => { | ||
it('creates a filter with good defaults', () => { | ||
let filter = BloomFilter.create(95) | ||
expect(filter.bits).to.equal(1048) | ||
expect(filter.seeds.length).to.equal(8) | ||
|
||
filter = BloomFilter.create(148) | ||
expect(filter.bits).to.equal(1632) | ||
expect(filter.seeds.length).to.equal(8) | ||
|
||
filter = BloomFilter.create(10) | ||
expect(filter.bits).to.equal(110) | ||
expect(filter.seeds.length).to.equal(8) | ||
}) | ||
|
||
it('createOptimal() lets you specify an error rate', () => { | ||
let filter = BloomFilter.create(20000) | ||
expect(filter.bits).to.equal(220555) | ||
const previous = filter.bits | ||
|
||
filter = BloomFilter.create(20000, 0.2) | ||
expect(filter.bits).to.be.below(previous) | ||
}) | ||
}) | ||
|
||
describe('setbit() and getbit()', () => { | ||
it('sets the specified bit', () => { | ||
const filter = new BloomFilter({ hashes: 3, bits: 16 }) | ||
|
||
filter.setbit(0) | ||
let val = filter.getbit(0) | ||
expect(val).to.equal(true) | ||
|
||
filter.setbit(1) | ||
val = filter.getbit(1) | ||
expect(val).to.equal(true) | ||
|
||
val = filter.getbit(2) | ||
expect(val).to.equal(false) | ||
|
||
filter.setbit(10) | ||
val = filter.getbit(10) | ||
expect(val).to.equal(true) | ||
}) | ||
|
||
it('can set all bits', () => { | ||
let i: number | ||
let value: number | ||
|
||
const filter = new BloomFilter({ hashes: 3, bits: 16 }) | ||
expect(filter.buffer.length).to.equal(2) | ||
|
||
for (i = 0; i < 16; i++) { | ||
filter.setbit(i) | ||
} | ||
|
||
for (i = 0; i < 2; i++) { | ||
value = filter.buffer[i] | ||
expect(value).to.equal(255) | ||
} | ||
}) | ||
|
||
it('slides over into the next buffer slice when setting bits', () => { | ||
let val | ||
const filter = new BloomFilter({ hashes: 3, bits: 64 }) | ||
|
||
filter.setbit(8) | ||
val = filter.buffer[1] | ||
expect(val).to.equal(1) | ||
|
||
filter.setbit(17) | ||
val = filter.buffer[2] | ||
expect(val).to.equal(2) | ||
|
||
filter.setbit(34) | ||
val = filter.buffer[4] | ||
expect(val).to.equal(4) | ||
}) | ||
}) | ||
|
||
describe('add()', () => { | ||
it('can store buffers', () => { | ||
const filter = new BloomFilter({ hashes: 4, bits: 128 }) | ||
|
||
expect(hasBitsSet(filter.buffer)).to.equal(0) | ||
filter.add(uint8ArrayFromString('cat')) | ||
expect(hasBitsSet(filter.buffer)).to.equal(1) | ||
}) | ||
|
||
it('can store strings', () => { | ||
const filter = new BloomFilter({ hashes: 4, bits: 128 }) | ||
filter.add('cat') | ||
|
||
expect(hasBitsSet(filter.buffer)).to.equal(1) | ||
}) | ||
|
||
it('can add a hundred random items', () => { | ||
const alpha = '0123456789abcdefghijklmnopqrstuvwxyz' | ||
function randomWord (length?: number): string { | ||
length = length ?? Math.ceil(Math.random() * 20) | ||
let result = '' | ||
for (let i = 0; i < length; i++) { | ||
result += alpha[Math.floor(Math.random() * alpha.length)] | ||
} | ||
|
||
return result | ||
} | ||
|
||
const filter = BloomFilter.create(100) | ||
const words: string[] = [] | ||
|
||
for (let i = 0; i < 100; i++) { | ||
const w = randomWord() | ||
words.push(w) | ||
filter.add(w) | ||
} | ||
|
||
for (let i = 0; i < words.length; i++) { | ||
expect(filter.has(words[i])).to.equal(true) | ||
} | ||
}) | ||
}) | ||
|
||
describe('has()', () => { | ||
it('returns true when called on a stored item', () => { | ||
const filter = new BloomFilter({ hashes: 3, bits: 16 }) | ||
filter.add('cat') | ||
|
||
expect(hasBitsSet(filter.buffer)).to.equal(1) | ||
expect(filter.has('cat')).to.be.true() | ||
}) | ||
|
||
it('returns false for items not in the set (mostly)', () => { | ||
const filter = new BloomFilter({ hashes: 4, bits: 50 }) | ||
filter.add('cat') | ||
expect(filter.has('dog')).to.be.false() | ||
}) | ||
|
||
it('responds appropriately for arrays of added items', () => { | ||
const filter = new BloomFilter({ hashes: 3, bits: 128 }) | ||
filter.add('cat') | ||
filter.add('dog') | ||
filter.add('wallaby') | ||
|
||
expect(filter.has('cat')).to.equal(true) | ||
expect(filter.has('dog')).to.equal(true) | ||
expect(filter.has('wallaby')).to.equal(true) | ||
expect(filter.has('orange')).to.equal(false) | ||
}) | ||
}) | ||
|
||
describe('clear()', () => { | ||
it('clears the filter', () => { | ||
const filter = new BloomFilter({ hashes: 3, bits: 128 }) | ||
filter.add('cat') | ||
filter.add('dog') | ||
filter.add('wallaby') | ||
expect(hasBitsSet(filter.buffer)).to.equal(1) | ||
|
||
filter.clear() | ||
expect(hasBitsSet(filter.buffer)).to.equal(0) | ||
}) | ||
}) | ||
}) |