From dc1cd9b13e1de9c1aa56ce0fbea331684505abce Mon Sep 17 00:00:00 2001 From: JokingHero Date: Mon, 25 Dec 2023 21:42:42 +0100 Subject: [PATCH] update manifest and allow building BFF with super low number of items --- Manifest.toml | 6 ++++-- Project.toml | 2 +- README.md | 2 +- src/binaryfusefilter.jl | 6 +++--- test/runtests.jl | 10 ++++++++-- 5 files changed, 17 insertions(+), 9 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index f878761..764b70e 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -1,8 +1,10 @@ # This file is machine-generated - editing it directly is not advised -[[Random]] +manifest_format = "2.0" + +[[deps.Random]] deps = ["Serialization"] uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -[[Serialization]] +[[deps.Serialization]] uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" diff --git a/Project.toml b/Project.toml index fea81f9..29e9975 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "FastFilter" uuid = "5470531f-076d-47a9-aacb-e15e1e7f44ac" authors = ["JokingHero "] -version = "0.1.0" +version = "0.1.1" [deps] Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" diff --git a/README.md b/README.md index 7e62203..2fb4d8d 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ they are never wrong when item was actually in the initial set. What is more, space usage and speed of access is excellent for these data structures. This Julia package is based of [binaryfusefilter.h](https://github.com/FastFilter/xor_singleheader/blob/master/include/binaryfusefilter.h). -Currently only binary fuse filters are re-implemented, but we support UInt8, UInt16 and UInt32 fingerprints. +Currently, only binary fuse filters are re-implemented, but we support UInt8, UInt16 and UInt32 fingerprints. Feel free to make a PR with other implementations. diff --git a/src/binaryfusefilter.jl b/src/binaryfusefilter.jl index acacb2e..1a963f3 100644 --- a/src/binaryfusefilter.jl +++ b/src/binaryfusefilter.jl @@ -25,9 +25,9 @@ end function calculate_size_factor(arity::UInt32, n_keys::UInt32) if arity == 3 - return Float64(max(1.125, 0.875 + 0.25 * log(1000000)/log(Float64(n_keys)))) + return Float64(max(1.125, 0.875 + 0.25 * log(1000000)/log(Float64(n_keys + 1)))) elseif arity == 4 - return Float64(max(1.075, 0.77 + 0.305 * log(600000)/log(Float64(n_keys)))) + return Float64(max(1.075, 0.77 + 0.305 * log(600000)/log(Float64(n_keys + 1)))) else return Float64(2.0) end @@ -143,7 +143,7 @@ function BinaryFuseFilter{T}( end startPos = zeros(UInt32, block) - for i in 1:length(startPos) + for i in eachindex(startPos) # important: we do not want i * n_keys to overflow!!! # in java we have (int) ((long) i * size / block) startPos[i] = UInt32(((UInt64(i - 1) * UInt64(n_keys)) >> blockBits)) diff --git a/test/runtests.jl b/test/runtests.jl index fc9dfa5..65382c8 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,11 +3,17 @@ using FastFilter using Random utypes = [UInt8, UInt16, UInt32] -error_rates = [0.005, 1.5e-5, 2.33e-10] +error_rates = [0.005, 1.7e-5, 2.33e-10] rng = MersenneTwister(42) -for i in 1:length(utypes) +for i in eachindex(utypes) @testset "binaryfusefilter.jl " begin + + @testset "Can be build with stupid small number of items" begin + filter = BinaryFuseFilter{utypes[i]}([UInt64(1)]) + @test UInt64(1) in filter + end + n = Int(230e6) items = rand(rng, UInt64, n) filter = BinaryFuseFilter{utypes[i]}(items)