From b85dfd650247105c73acbc562d965f6321008458 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Mon, 19 Aug 2024 09:24:12 +0000 Subject: [PATCH 1/2] Use msbIdx for direct lookup, and then additional check if prev is large enough. --- vespalib/src/tests/stllike/hashtable_test.cpp | 27 +++++++++++++----- .../src/vespa/vespalib/stllike/hashtable.cpp | 28 ++++++++----------- .../src/vespa/vespalib/stllike/hashtable.h | 8 ++---- 3 files changed, 34 insertions(+), 29 deletions(-) diff --git a/vespalib/src/tests/stllike/hashtable_test.cpp b/vespalib/src/tests/stllike/hashtable_test.cpp index 4880c7a872d7..4cd5172a4dc9 100644 --- a/vespalib/src/tests/stllike/hashtable_test.cpp +++ b/vespalib/src/tests/stllike/hashtable_test.cpp @@ -6,8 +6,6 @@ #include #include #include -#include -#include using vespalib::hashtable; using std::vector; @@ -29,7 +27,7 @@ template using up_hashtable = TEST("require that hashtable can store unique_ptrs") { up_hashtable table(100); using UP = std::unique_ptr; - table.insert(UP(new int(42))); + table.insert(std::make_unique(42)); auto it = table.find(42); EXPECT_EQUAL(42, **it); @@ -43,12 +41,12 @@ TEST("require that hashtable can store unique_ptrs") { template using Entry = std::pair>; typedef hashtable, - vespalib::hash, std::equal_to, + vespalib::hash, std::equal_to<>, Select1st>> PairHashtable; TEST("require that hashtable can store pairs of ") { PairHashtable table(100); - table.insert(make_pair(42, std::unique_ptr(new int(84)))); + table.insert(make_pair(42, std::make_unique(84))); PairHashtable::iterator it = table.find(42); EXPECT_EQUAL(84, *it->second); auto it2 = table.find(42); @@ -69,9 +67,24 @@ TEST("require that hashtable can be copied") { EXPECT_EQUAL(42, *table2.find(42)); } +TEST("require that getModuloStl always return a larger number in 32 bit integer range") { + for (size_t i=0; i < 32; i++) { + size_t num = 1ul << i; + size_t prime = hashtable_base::getModuloStl(num); + EXPECT_GREATER_EQUAL(prime, num); + printf("%lu <= %lu\n", num, prime); + } + for (size_t i=0; i < 32; i++) { + size_t num = (1ul << i) - 1; + size_t prime = hashtable_base::getModuloStl(num); + EXPECT_GREATER_EQUAL(prime, num); + printf("%lu <= %lu\n", num, prime); + } +} + TEST("require that you can insert duplicates") { using Pair = std::pair; - using Map = hashtable, std::equal_to, Select1st>; + using Map = hashtable, std::equal_to<>, Select1st>; Map m(1); EXPECT_EQUAL(0u, m.size()); @@ -126,7 +139,7 @@ struct FirstInVector { TEST("require that hashtable> can be copied") { typedef hashtable, vespalib::hash, - std::equal_to, FirstInVector>> VectorHashtable; + std::equal_to<>, FirstInVector>> VectorHashtable; VectorHashtable table(100); table.insert(std::vector{2, 4, 6}); VectorHashtable table2(table); diff --git a/vespalib/src/vespa/vespalib/stllike/hashtable.cpp b/vespalib/src/vespa/vespalib/stllike/hashtable.cpp index 5cc85f728502..397d1fafbd0d 100644 --- a/vespalib/src/vespa/vespalib/stllike/hashtable.cpp +++ b/vespalib/src/vespa/vespalib/stllike/hashtable.cpp @@ -4,33 +4,27 @@ namespace { -static const unsigned long __stl_prime_list[] = +constexpr unsigned long STL_PRIME_LIST[] = { - 7ul, 17ul, 53ul, 97ul, 193ul, - 389ul, 769ul, 1543ul, 3079ul, 6151ul, - 12289ul, 24593ul, 49157ul, 98317ul, 196613ul, - 393241ul, 786433ul, 1572869ul, 3145739ul, 6291469ul, - 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul, - 402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul + 7ul, 7ul, 7ul, 17ul, 53ul, 97ul, 193ul, 389ul, + 769ul, 1543ul, 3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul, + 196613ul, 393241ul, 786433ul, 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, + 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul }; } namespace vespalib { -size_t -hashtable_base::getModulo(size_t newSize, const unsigned long * list, size_t sz) noexcept -{ - const unsigned long* first = list; - const unsigned long* last = list + sz; - const unsigned long* pos = std::lower_bound(first, last, newSize); - return (pos == last) ? *(last - 1) : *pos; -} - size_t hashtable_base::getModuloStl(size_t size) noexcept { - return getModulo(size, __stl_prime_list, sizeof(__stl_prime_list)/sizeof(__stl_prime_list[0])); + if (size > 0xfffffffful) return 0xfffffffful; + if (size < 8) return 7ul; + uint32_t index = Optimized::msbIdx(size); + return (size <= STL_PRIME_LIST[index - 1]) + ? STL_PRIME_LIST[index - 1] + : STL_PRIME_LIST[index]; } } diff --git a/vespalib/src/vespa/vespalib/stllike/hashtable.h b/vespalib/src/vespa/vespalib/stllike/hashtable.h index c56d90a89f8c..57e629446110 100644 --- a/vespalib/src/vespa/vespalib/stllike/hashtable.h +++ b/vespalib/src/vespa/vespalib/stllike/hashtable.h @@ -76,9 +76,9 @@ class hashtable_base class and_modulator { public: - explicit and_modulator(next_t sizeOfHashTable) noexcept : _mask(sizeOfHashTable-1) { } - next_t modulo(next_t hash) const noexcept { return hash & _mask; } - next_t getTableSize() const noexcept { return _mask + 1; } + constexpr explicit and_modulator(next_t sizeOfHashTable) noexcept : _mask(sizeOfHashTable-1) { } + constexpr next_t modulo(next_t hash) const noexcept { return hash & _mask; } + constexpr next_t getTableSize() const noexcept { return _mask + 1; } static next_t selectHashTableSize(size_t sz) noexcept { return hashtable_base::getModuloSimple(sz); } private: next_t _mask; @@ -95,8 +95,6 @@ class hashtable_base (void) to; } }; -private: - static size_t getModulo(size_t newSize, const unsigned long * list, size_t sz) noexcept; }; template From 1aec5bca080c7a46cb85f9841200e795e97a1f50 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Mon, 19 Aug 2024 10:29:44 +0000 Subject: [PATCH 2/2] Also test the prime itself, and prime+1 --- vespalib/src/tests/stllike/hashtable_test.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vespalib/src/tests/stllike/hashtable_test.cpp b/vespalib/src/tests/stllike/hashtable_test.cpp index 4cd5172a4dc9..1ee8b36e9ed0 100644 --- a/vespalib/src/tests/stllike/hashtable_test.cpp +++ b/vespalib/src/tests/stllike/hashtable_test.cpp @@ -72,6 +72,8 @@ TEST("require that getModuloStl always return a larger number in 32 bit integer size_t num = 1ul << i; size_t prime = hashtable_base::getModuloStl(num); EXPECT_GREATER_EQUAL(prime, num); + EXPECT_EQUAL(prime, hashtable_base::getModuloStl(prime)); + EXPECT_GREATER(hashtable_base::getModuloStl(prime+1), prime + 1); printf("%lu <= %lu\n", num, prime); } for (size_t i=0; i < 32; i++) {