Skip to content

Commit

Permalink
Merge pull request #32168 from vespa-engine/balder/use-msbIdx-for-dir…
Browse files Browse the repository at this point in the history
…ect-lookup

Use msbIdx for direct lookup, and then additional check if prev is la…
  • Loading branch information
baldersheim authored Aug 19, 2024
2 parents 62c1280 + 1aec5bc commit cad7770
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 29 deletions.
29 changes: 22 additions & 7 deletions vespalib/src/tests/stllike/hashtable_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
#include <vespa/vespalib/stllike/identity.h>
#include <vespa/vespalib/testkit/test_kit.h>
#include <vespa/vespalib/stllike/hash_map.hpp>
#include <memory>
#include <vector>

using vespalib::hashtable;
using std::vector;
Expand All @@ -29,7 +27,7 @@ template<typename K> using up_hashtable =
TEST("require that hashtable can store unique_ptrs") {
up_hashtable<int> table(100);
using UP = std::unique_ptr<int>;
table.insert(UP(new int(42)));
table.insert(std::make_unique<int>(42));
auto it = table.find(42);
EXPECT_EQUAL(42, **it);

Expand All @@ -43,12 +41,12 @@ TEST("require that hashtable can store unique_ptrs") {
template <typename K, typename V> using Entry =
std::pair<K, std::unique_ptr<V>>;
typedef hashtable<int, Entry<int, int>,
vespalib::hash<int>, std::equal_to<int>,
vespalib::hash<int>, std::equal_to<>,
Select1st<Entry<int, int>>> PairHashtable;

TEST("require that hashtable can store pairs of <key, unique_ptr to value>") {
PairHashtable table(100);
table.insert(make_pair(42, std::unique_ptr<int>(new int(84))));
table.insert(make_pair(42, std::make_unique<int>(84)));
PairHashtable::iterator it = table.find(42);
EXPECT_EQUAL(84, *it->second);
auto it2 = table.find(42);
Expand All @@ -69,9 +67,26 @@ TEST("require that hashtable<int> can be copied") {
EXPECT_EQUAL(42, *table2.find(42));
}

TEST("require that getModuloStl always return a larger number in 32 bit integer range") {
for (size_t i=0; i < 32; i++) {
size_t num = 1ul << i;
size_t prime = hashtable_base::getModuloStl(num);
EXPECT_GREATER_EQUAL(prime, num);
EXPECT_EQUAL(prime, hashtable_base::getModuloStl(prime));
EXPECT_GREATER(hashtable_base::getModuloStl(prime+1), prime + 1);
printf("%lu <= %lu\n", num, prime);
}
for (size_t i=0; i < 32; i++) {
size_t num = (1ul << i) - 1;
size_t prime = hashtable_base::getModuloStl(num);
EXPECT_GREATER_EQUAL(prime, num);
printf("%lu <= %lu\n", num, prime);
}
}

TEST("require that you can insert duplicates") {
using Pair = std::pair<int, vespalib::string>;
using Map = hashtable<int, Pair, vespalib::hash<int>, std::equal_to<int>, Select1st<Pair>>;
using Map = hashtable<int, Pair, vespalib::hash<int>, std::equal_to<>, Select1st<Pair>>;

Map m(1);
EXPECT_EQUAL(0u, m.size());
Expand Down Expand Up @@ -126,7 +141,7 @@ struct FirstInVector {

TEST("require that hashtable<vector<int>> can be copied") {
typedef hashtable<int, vector<int>, vespalib::hash<int>,
std::equal_to<int>, FirstInVector<int, vector<int>>> VectorHashtable;
std::equal_to<>, FirstInVector<int, vector<int>>> VectorHashtable;
VectorHashtable table(100);
table.insert(std::vector<int>{2, 4, 6});
VectorHashtable table2(table);
Expand Down
28 changes: 11 additions & 17 deletions vespalib/src/vespa/vespalib/stllike/hashtable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,27 @@

namespace {

static const unsigned long __stl_prime_list[] =
constexpr unsigned long STL_PRIME_LIST[] =
{
7ul, 17ul, 53ul, 97ul, 193ul,
389ul, 769ul, 1543ul, 3079ul, 6151ul,
12289ul, 24593ul, 49157ul, 98317ul, 196613ul,
393241ul, 786433ul, 1572869ul, 3145739ul, 6291469ul,
12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul,
402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul
7ul, 7ul, 7ul, 17ul, 53ul, 97ul, 193ul, 389ul,
769ul, 1543ul, 3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul,
196613ul, 393241ul, 786433ul, 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul
};

}

namespace vespalib {

size_t
hashtable_base::getModulo(size_t newSize, const unsigned long * list, size_t sz) noexcept
{
const unsigned long* first = list;
const unsigned long* last = list + sz;
const unsigned long* pos = std::lower_bound(first, last, newSize);
return (pos == last) ? *(last - 1) : *pos;
}

size_t
hashtable_base::getModuloStl(size_t size) noexcept
{
return getModulo(size, __stl_prime_list, sizeof(__stl_prime_list)/sizeof(__stl_prime_list[0]));
if (size > 0xfffffffful) return 0xfffffffful;
if (size < 8) return 7ul;
uint32_t index = Optimized::msbIdx(size);
return (size <= STL_PRIME_LIST[index - 1])
? STL_PRIME_LIST[index - 1]
: STL_PRIME_LIST[index];
}

}
8 changes: 3 additions & 5 deletions vespalib/src/vespa/vespalib/stllike/hashtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@ class hashtable_base
class and_modulator
{
public:
explicit and_modulator(next_t sizeOfHashTable) noexcept : _mask(sizeOfHashTable-1) { }
next_t modulo(next_t hash) const noexcept { return hash & _mask; }
next_t getTableSize() const noexcept { return _mask + 1; }
constexpr explicit and_modulator(next_t sizeOfHashTable) noexcept : _mask(sizeOfHashTable-1) { }
constexpr next_t modulo(next_t hash) const noexcept { return hash & _mask; }
constexpr next_t getTableSize() const noexcept { return _mask + 1; }
static next_t selectHashTableSize(size_t sz) noexcept { return hashtable_base::getModuloSimple(sz); }
private:
next_t _mask;
Expand All @@ -95,8 +95,6 @@ class hashtable_base
(void) to;
}
};
private:
static size_t getModulo(size_t newSize, const unsigned long * list, size_t sz) noexcept;
};

template<typename V>
Expand Down

0 comments on commit cad7770

Please sign in to comment.