diff --git a/package-lock.json b/package-lock.json index 493091b3..e90ae921 100644 --- a/package-lock.json +++ b/package-lock.json @@ -19,7 +19,6 @@ "@open-draft/deferred-promise": "^2.2.0", "@tabcat/zzzync": "^5.0.0", "datastore-core": "^9.2.7", - "fission-bloom-filters": "^1.7.1", "helia": "^3.0.0", "interface-blockstore": "^5.2.9", "it-all": "^3.0.4", @@ -34,7 +33,8 @@ "streaming-iterables": "^8.0.1", "uint8arrays": "^5.0.1", "w3name": "^1.0.8", - "web3.storage": "^4.5.5" + "web3.storage": "^4.5.5", + "xxhashjs": "^0.2.2" }, "devDependencies": { "@chainsafe/libp2p-gossipsub": "^11.0.1", @@ -53,6 +53,7 @@ "@libp2p/websockets": "^8.0.10", "@multiformats/multiaddr": "^12.1.12", "@types/mocha": "^10.0.6", + "@types/xxhashjs": "^0.2.4", "aegir": "^42.1.0", "blockstore-level": "^1.1.7", "copy-deps": "^1.1.2", @@ -8011,6 +8012,15 @@ "@types/node": "*" } }, + "node_modules/@types/xxhashjs": { + "version": "0.2.4", + "resolved": "https://registry.npmjs.org/@types/xxhashjs/-/xxhashjs-0.2.4.tgz", + "integrity": "sha512-E2+ZoJY2JjmVPN0iQM5gJvZkk98O2PYXSi6HrciEk3EKF34+mauEk/HgwTeCz+2r8HXHMKpucrwy4qTT12OPaQ==", + "dev": true, + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@types/yargs": { "version": "17.0.32", "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.32.tgz", @@ -14107,21 +14117,6 @@ "node": ">= 10.13.0" } }, - "node_modules/fission-bloom-filters": { - "version": "1.7.1", - "resolved": "https://registry.npmjs.org/fission-bloom-filters/-/fission-bloom-filters-1.7.1.tgz", - "integrity": "sha512-AAVWxwqgSDK+/3Tn2kx+a9j/ND/pyVNVZgn/rL5pfQaX7w0qfP81PlLCNKhM4XKOhcg1kFXNcoWkQKg3MyyULw==", - "dependencies": { - "buffer": "^6.0.3", - "is-buffer": "^2.0.4", - "lodash": "^4.17.15", - "lodash.eq": "^4.0.0", - "lodash.indexof": "^4.0.5", - "reflect-metadata": "^0.1.13", - "seedrandom": "^3.0.5", - "xxhashjs": "^0.2.2" - } - }, "node_modules/flat": { "version": "5.0.2", "resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz", @@ -16241,6 +16236,7 @@ "version": "2.0.5", "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-2.0.5.tgz", "integrity": "sha512-i2R6zNFDwgEHJyQUtJEk0XFi1i0dPFn/oqjK3/vPCcDeJvW5NQ83V8QbicfF1SupOaB0h8ntgBC2YiE7dfyctQ==", + "dev": true, "funding": [ { "type": "github", @@ -18688,7 +18684,8 @@ "node_modules/lodash": { "version": "4.17.21", "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", - "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" + "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==", + "dev": true }, "node_modules/lodash-es": { "version": "4.17.21", @@ -18707,11 +18704,6 @@ "resolved": "https://registry.npmjs.org/lodash.debounce/-/lodash.debounce-4.0.8.tgz", "integrity": "sha512-FT1yDzDYEoYWhnSGnpE/4Kj1fLZkDFyqRb7fNt6FdYOSxlUWAtp42Eh6Wb0rGIv/m9Bgo7x4GhQbm5Ys4SG5ow==" }, - "node_modules/lodash.eq": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/lodash.eq/-/lodash.eq-4.0.0.tgz", - "integrity": "sha512-vbrJpXL6kQNG6TkInxX12DZRfuYVllSxhwYqjYB78g2zF3UI15nFO/0AgmZnZRnaQ38sZtjCiVjGr2rnKt4v0g==" - }, "node_modules/lodash.escaperegexp": { "version": "4.1.2", "resolved": "https://registry.npmjs.org/lodash.escaperegexp/-/lodash.escaperegexp-4.1.2.tgz", @@ -18724,11 +18716,6 @@ "integrity": "sha512-uHaJFihxmJcEX3kT4I23ABqKKalJ/zDrDg0lsFtc1h+3uw49SIJ5beyhx5ExVRti3AvKoOJngIj7xz3oylPdWQ==", "dev": true }, - "node_modules/lodash.indexof": { - "version": "4.0.5", - "resolved": "https://registry.npmjs.org/lodash.indexof/-/lodash.indexof-4.0.5.tgz", - "integrity": "sha512-t9wLWMQsawdVmf6/IcAgVGqAJkNzYVcn4BHYZKTPW//l7N5Oq7Bq138BaVk19agcsPZePcidSgTTw4NqS1nUAw==" - }, "node_modules/lodash.ismatch": { "version": "4.4.0", "resolved": "https://registry.npmjs.org/lodash.ismatch/-/lodash.ismatch-4.4.0.tgz", @@ -29663,11 +29650,6 @@ "esprima": "~4.0.0" } }, - "node_modules/reflect-metadata": { - "version": "0.1.14", - "resolved": "https://registry.npmjs.org/reflect-metadata/-/reflect-metadata-0.1.14.tgz", - "integrity": "sha512-ZhYeb6nRaXCfhnndflDK8qI6ZQ/YcWZCISRAWICW9XYqMUwjZM9Z0DveWX/ABN01oxSHwVxKQmxeYZSsm0jh5A==" - }, "node_modules/regenerate": { "version": "1.4.2", "resolved": "https://registry.npmjs.org/regenerate/-/regenerate-1.4.2.tgz", @@ -30193,11 +30175,6 @@ "loose-envify": "^1.1.0" } }, - "node_modules/seedrandom": { - "version": "3.0.5", - "resolved": "https://registry.npmjs.org/seedrandom/-/seedrandom-3.0.5.tgz", - "integrity": "sha512-8OwmbklUNzwezjGInmZ+2clQmExQPvomqjL7LFqOYqtmuxRgQYqOD3mHaU+MvZn5FLUeVxVfQjwLZW/n/JFuqg==" - }, "node_modules/semantic-release": { "name": "@achingbrain/semantic-release", "version": "21.0.9", diff --git a/package.json b/package.json index e9f32f8f..88883d10 100644 --- a/package.json +++ b/package.json @@ -89,6 +89,7 @@ "@libp2p/websockets": "^8.0.10", "@multiformats/multiaddr": "^12.1.12", "@types/mocha": "^10.0.6", + "@types/xxhashjs": "^0.2.4", "aegir": "^42.1.0", "blockstore-level": "^1.1.7", "copy-deps": "^1.1.2", @@ -123,7 +124,6 @@ "@open-draft/deferred-promise": "^2.2.0", "@tabcat/zzzync": "^5.0.0", "datastore-core": "^9.2.7", - "fission-bloom-filters": "^1.7.1", "helia": "^3.0.0", "interface-blockstore": "^5.2.9", "it-all": "^3.0.4", @@ -138,7 +138,8 @@ "streaming-iterables": "^8.0.1", "uint8arrays": "^5.0.1", "w3name": "^1.0.8", - "web3.storage": "^4.5.5" + "web3.storage": "^4.5.5", + "xxhashjs": "^0.2.2" }, "overrides": { "@alanshaw/pail": { diff --git a/src/utils/bloom-filter.ts b/src/utils/bloom-filter.ts new file mode 100644 index 00000000..d962d3f8 --- /dev/null +++ b/src/utils/bloom-filter.ts @@ -0,0 +1,104 @@ +/** + * This is a slimmed down Bloom Filter based of: + * https://github.com/Callidon/bloom-filters + * https://github.com/fission-codes/bloom-filters + */ +import XXH from 'xxhashjs' + +const uint8ToBits = (uint8: number): number[] => [128, 64, 32, 16, 8, 4, 2, 1].map( + x => (x & uint8) > 0 ? 1 : 0 +) + +const bitsToUint8 = (bits: number[]): number => bits.reduce( + (acc, cur, i) => cur === 0 ? acc : acc + Math.pow(2, 7 - i), + 0 +) + +const uint8ArrayToBuffer = (a: Uint8Array): ArrayBuffer => a.buffer.slice(a.byteOffset, a.byteLength + a.byteOffset) + +const hashTwice = (value: Uint8Array, seed: number): [number, number] => [ + XXH.h64(uint8ArrayToBuffer(value), seed + 1).toNumber(), + XXH.h64(uint8ArrayToBuffer(value), seed + 2).toNumber() +] + +const getDistinctIndices = (element: Uint8Array, size: number, number: number, seed: number): number[] => { + const indexes = new Set() + let n = 0 + let hashes = hashTwice(element, seed) + + while (indexes.size < number) { + const ind = hashes[0] % size + if (!indexes.has(ind)) { + indexes.add(ind) + } + + hashes[0] = (hashes[0] + hashes[1]) % size + hashes[1] = (hashes[1] + n) % size + n++ + + if (n > size) { + seed++ + hashes = hashTwice(element, seed) + } + } + + return [...indexes.values()] +} + +export default class BloomFilter { + public seed: number + private readonly _size: number + private readonly _nbHashes: number + private _filter: number[] + + constructor (size: number, nbHashes: number, seed: number = 0x1111111111) { + if (nbHashes < 1) { + throw new Error('A Bloom Filter must have at least 2 hash functions.') + } + + this.seed = seed + this._size = size + this._nbHashes = nbHashes + this._filter = new Array(this._size).fill(0) + } + + static fromBytes (bytes: Uint8Array, nbHashes: number): BloomFilter { + const bits = bytes.reduce((a, c) => a.concat(uint8ToBits(c)), [] as number[]) + const filter = new BloomFilter(bits.length, nbHashes) + + filter._filter = bits + + return filter + } + + add (element: Uint8Array): void { + const indexes = getDistinctIndices(element, this._size, this._nbHashes, this.seed) + + for (let i = 0; i < indexes.length; i++) { + this._filter[indexes[i]] = 1 + } + } + + has (element: Uint8Array): boolean { + const indexes = getDistinctIndices(element, this._size, this._nbHashes, this.seed) + + for (let i = 0; i < indexes.length; i++) { + if (this._filter[indexes[i]] == null || this._filter[indexes[i]] === 0) { + return false + } + } + + return true + } + + toBytes (): Uint8Array { + const arr = new Uint8Array(Math.ceil(this._size / 8)) + + for (let i = 0; i < arr.length; i++) { + const bits = this._filter.slice(i * 8, i * 8 + 8) + arr[i] = bitsToUint8(bits) + } + + return arr + } +} diff --git a/src/utils/heads-exchange.ts b/src/utils/heads-exchange.ts index e73d32d5..46fce8ca 100644 --- a/src/utils/heads-exchange.ts +++ b/src/utils/heads-exchange.ts @@ -1,18 +1,16 @@ import { DeferredPromise } from '@open-draft/deferred-promise' -import { BloomFilter } from 'fission-bloom-filters' import * as lp from 'it-length-prefixed' import { pipe } from 'it-pipe' import { type Pushable, pushable } from 'it-pushable' import { CID } from 'multiformats/cid' import { consume } from 'streaming-iterables' +import BloomFilter from './bloom-filter.js' import type { Stream } from '@libp2p/interface/connection' import type { PeerId } from '@libp2p/interface/peer-id' import type { Uint8ArrayList } from 'uint8arraylist' import { Message } from '@/message/heads.js' import { hashHeads } from '@/utils/replicator.js' -const uint8ArrayToBuffer = (a: Uint8Array): ArrayBuffer => a.buffer.slice(a.byteOffset, a.byteLength + a.byteOffset) - const calculateFilterParams = (length: number, rate: number): { size: number, hashes: number } => { const safeLength = length <= 0 ? 1 : length const size = Math.ceil(-((safeLength * Math.log(rate)) / Math.pow(Math.log(2), 2))) @@ -23,14 +21,10 @@ const calculateFilterParams = (length: number, rate: number): { size: number, ha const createFilter = (heads: CID[], options: Partial<{ collisionRate: number, seed: number }> = {}): { filter: BloomFilter, hashes: number } => { const { size, hashes } = calculateFilterParams(heads.length, options.collisionRate ?? 0.1) - const filter = new BloomFilter(size, hashes) - - if (options.seed != null) { - filter.seed = options.seed - } + const filter = new BloomFilter(size, hashes, options.seed) for (const head of heads) { - filter.add(uint8ArrayToBuffer(head.bytes)) + filter.add(head.bytes) } return { filter, hashes } @@ -223,7 +217,7 @@ export class HeadsExchange { filter.seed = message.filter.seed ?? this.remoteSeed - const missing = this.heads.map(h => h.bytes).filter(b => !filter.has(uint8ArrayToBuffer(b))) + const missing = this.heads.map(h => h.bytes).filter(b => !filter.has(b)) return { heads: missing } } diff --git a/test/test-bloom-filter.ts b/test/test-bloom-filter.ts new file mode 100644 index 00000000..e2e49692 --- /dev/null +++ b/test/test-bloom-filter.ts @@ -0,0 +1,88 @@ +import { assert } from 'aegir/chai' +import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string' +import BloomFilter from '../src/utils/bloom-filter.js' + +const testData = [ + uint8ArrayFromString('test-1'), + uint8ArrayFromString('test-2'), + uint8ArrayFromString('test-3'), + uint8ArrayFromString('abc123'), + uint8ArrayFromString('A very long uint8array..........'), + uint8ArrayFromString(''), + uint8ArrayFromString('1'), + uint8ArrayFromString('a'), + uint8ArrayFromString('b'), + uint8ArrayFromString('c') +] + +describe('bloom filter', () => { + it('creates a filter with the specified seed', () => { + const seed = 0x123456789 + const filter = new BloomFilter(2, 2, seed) + + assert.equal(filter.seed, seed) + }) + + it('the has method returns false on an empty filter', () => { + const filter = new BloomFilter(2, 2) + + for (const data of testData) { + assert.isFalse(filter.has(data)) + } + }) + + it('the has method returns true if it has that element', () => { + const filter = new BloomFilter(20, 4) + + for (const data of testData) { + filter.add(data) + } + + for (const data of testData) { + assert.isTrue(filter.has(data)) + } + }) + + it('the has method returns true only on elements that are contained in a partial filter', () => { + const filter = new BloomFilter(20, 4) + + for (let i = 0; i < testData.length / 2; i++) { + filter.add(testData[i]) + } + + for (let i = 0; i < testData.length; i++) { + if (i < testData.length / 2) { + assert.isTrue(filter.has(testData[i])) + } else { + assert.isFalse(filter.has(testData[i])) + } + } + }) + + it('encodes the filter', () => { + const filter = new BloomFilter(20, 4) + + for (const data of testData) { + filter.add(data) + } + + const f = filter.toBytes() + + assert.isOk(f) + }) + + it('decodes the filter', () => { + const nbHashes = 4 + const filter = new BloomFilter(20, nbHashes) + + for (const data of testData) { + filter.add(data) + } + + const f = filter.toBytes() + + const filter2 = BloomFilter.fromBytes(f, nbHashes) + + assert.deepEqual(filter2.toBytes(), filter.toBytes()) + }) +})