Skip to content

Commit

Permalink
revert prefetch for StringHashTable
Browse files Browse the repository at this point in the history
Signed-off-by: guo-shaoge <[email protected]>
  • Loading branch information
guo-shaoge committed Dec 17, 2024
1 parent 83fb879 commit 20d56e4
Show file tree
Hide file tree
Showing 11 changed files with 17 additions and 792 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ class AggregateFunctionGroupUniqArrayGeneric
{
// We have to copy the keys to our arena.
assert(arena != nullptr);
cur_set.emplace(ArenaKeyHolder{rhs_elem.getValue(), arena}, it, inserted);
cur_set.emplace(ArenaKeyHolder{rhs_elem.getValue(), *arena}, it, inserted);
}
}

Expand Down
2 changes: 1 addition & 1 deletion dbms/src/AggregateFunctions/KeyHolderHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ inline auto getKeyHolder(const IColumn & column, size_t row_num, Arena & arena)
{
if constexpr (is_plain_column)
{
return ArenaKeyHolder{column.getDataAt(row_num), &arena};
return ArenaKeyHolder{column.getDataAt(row_num), arena};
}
else
{
Expand Down
41 changes: 6 additions & 35 deletions dbms/src/Common/ColumnsHashing.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,32 +124,17 @@ struct HashMethodString
[[maybe_unused]] Arena * pool,
[[maybe_unused]] std::vector<String> & sort_key_containers) const
{
auto key = getKey(row);
auto last_offset = row == 0 ? 0 : offsets[row - 1];
// Remove last zero byte.
StringRef key(chars + last_offset, offsets[row] - last_offset - 1);
if (likely(collator))
key = collator->sortKey(key.data, key.size, sort_key_containers[0]);

return ArenaKeyHolder{key, pool};
}

ALWAYS_INLINE inline ArenaKeyHolder getKeyHolder(ssize_t row, Arena * pool, Arena * sort_key_pool) const
{
auto key = getKey(row);
if (likely(collator))
key = collator->sortKey(key.data, key.size, *sort_key_pool);

return ArenaKeyHolder{key, pool};
return ArenaKeyHolder{key, *pool};
}

protected:
friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;

private:
ALWAYS_INLINE inline StringRef getKey(size_t row) const
{
auto last_offset = row == 0 ? 0 : offsets[row - 1];
// Remove last zero byte.
return StringRef(chars + last_offset, offsets[row] - last_offset - 1);
}
};

template <typename Value, typename Mapped, bool padding>
Expand All @@ -175,16 +160,11 @@ struct HashMethodStringBin
}

ALWAYS_INLINE inline auto getKeyHolder(ssize_t row, Arena * pool, std::vector<String> &) const
{
return getKeyHolder(row, pool, nullptr);
}

ALWAYS_INLINE inline auto getKeyHolder(ssize_t row, Arena * pool, Arena *) const
{
auto last_offset = row == 0 ? 0 : offsets[row - 1];
StringRef key(chars + last_offset, offsets[row] - last_offset - 1);
key = BinCollatorSortKey<padding>(key.data, key.size);
return ArenaKeyHolder{key, pool};
return ArenaKeyHolder{key, *pool};
}

protected:
Expand Down Expand Up @@ -433,16 +413,7 @@ struct HashMethodFixedString
if (collator)
key = collator->sortKeyFastPath(key.data, key.size, sort_key_containers[0]);

return ArenaKeyHolder{key, pool};
}

ALWAYS_INLINE inline ArenaKeyHolder getKeyHolder(size_t row, Arena * pool, Arena * sort_key_pool) const
{
StringRef key(&(*chars)[row * n], n);
if (collator)
key = collator->sortKeyFastPath(key.data, key.size, *sort_key_pool);

return ArenaKeyHolder{key, pool};
return ArenaKeyHolder{key, *pool};
}

protected:
Expand Down
38 changes: 0 additions & 38 deletions dbms/src/Common/ColumnsHashingImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,44 +204,6 @@ class HashMethodBase
}
}

template <size_t SubMapIndex, bool enable_prefetch = false, typename Data, typename StringKeyType>
ALWAYS_INLINE inline EmplaceResult emplaceStringKey(
Data & data,
size_t idx,
std::vector<StringKeyType> & datas,
const std::vector<size_t> & hashvals)
{
// For spill, hashvals.size() will be le to total_rows.
// Because only remaining rows that didn't insert into HashMap will be handled here.
assert(hashvals.size() <= static_cast<Derived &>(*this).total_rows);

auto & submap = StringHashTableSubMapSelector<SubMapIndex, Data::is_two_level, std::decay_t<Data>>::getSubMap(
hashvals[idx],
data);
if constexpr (enable_prefetch)
prefetch(submap, idx, hashvals);

return emplaceImpl(datas[idx], submap, hashvals[idx]);
}

template <size_t SubMapIndex, bool enable_prefetch = false, typename Data, typename StringKeyType>
ALWAYS_INLINE inline FindResult findStringKey(
Data & data,
size_t idx,
std::vector<StringKeyType> & datas,
const std::vector<size_t> & hashvals)
{
assert(hashvals.size() <= static_cast<Derived &>(*this).total_rows);

auto & submap = StringHashTableSubMapSelector<SubMapIndex, Data::is_two_level, std::decay_t<Data>>::getSubMap(
hashvals[idx],
data);
if constexpr (enable_prefetch)
prefetch(submap, idx, hashvals);

return findKeyImpl(keyHolderGetKey(datas[idx]), submap, hashvals[idx]);
}

template <typename Data>
ALWAYS_INLINE inline size_t getHash(
const Data & data,
Expand Down
6 changes: 3 additions & 3 deletions dbms/src/Common/HashTable/HashTableKeyHolder.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ namespace DB
struct ArenaKeyHolder
{
StringRef key{};
Arena * pool = nullptr;
Arena & pool;
};

} // namespace DB
Expand All @@ -111,14 +111,14 @@ inline void ALWAYS_INLINE keyHolderPersistKey(DB::ArenaKeyHolder & holder)
{
// Hash table shouldn't ask us to persist a zero key
assert(holder.key.size > 0);
holder.key.data = holder.pool->insert(holder.key.data, holder.key.size);
holder.key.data = holder.pool.insert(holder.key.data, holder.key.size);
}

inline void ALWAYS_INLINE keyHolderPersistKey(DB::ArenaKeyHolder && holder)
{
// Hash table shouldn't ask us to persist a zero key
assert(holder.key.size > 0);
holder.key.data = holder.pool->insert(holder.key.data, holder.key.size);
holder.key.data = holder.pool.insert(holder.key.data, holder.key.size);
}

inline void ALWAYS_INLINE keyHolderDiscardKey(DB::ArenaKeyHolder &) {}
Expand Down
143 changes: 0 additions & 143 deletions dbms/src/Common/HashTable/StringHashTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@

#include <Common/HashTable/HashMap.h>
#include <Common/HashTable/HashTable.h>
#include <Common/Logger.h>
#include <IO/Endian.h>
#include <common/logger_useful.h>

#include <new>
#include <variant>
Expand Down Expand Up @@ -194,99 +192,6 @@ struct StringHashTableLookupResult
friend bool operator!=(const std::nullptr_t &, const StringHashTableLookupResult & b) { return b.mapped_ptr; }
};

template <typename KeyHolder, typename Func0, typename Func8, typename Func16, typename Func24, typename FuncStr>
static auto
#if defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER)
NO_INLINE NO_SANITIZE_ADDRESS NO_SANITIZE_THREAD
#else
ALWAYS_INLINE
#endif
dispatchStringHashTable(
size_t row,
KeyHolder && key_holder,
Func0 && func0,
Func8 && func8,
Func16 && func16,
Func24 && func24,
FuncStr && func_str)
{
const StringRef & x = keyHolderGetKey(key_holder);
const size_t sz = x.size;
if (sz == 0)
{
return func0(x, row);
}

if (x.data[sz - 1] == 0)
{
// Strings with trailing zeros are not representable as fixed-size
// string keys. Put them to the generic table.
return func_str(key_holder, row);
}

const char * p = x.data;
// pending bits that needs to be shifted out
const char s = (-sz & 7) * 8;
union
{
StringKey8 k8;
StringKey16 k16;
StringKey24 k24;
UInt64 n[3];
};
switch ((sz - 1) >> 3)
{
case 0: // 1..8 bytes
{
// first half page
if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0)
{
memcpy(&n[0], p, 8);
if constexpr (DB::isLittleEndian())
n[0] &= (-1ULL >> s);
else
n[0] &= (-1ULL << s);
}
else
{
const char * lp = x.data + x.size - 8;
memcpy(&n[0], lp, 8);
if constexpr (DB::isLittleEndian())
n[0] >>= s;
else
n[0] <<= s;
}
return func8(k8, row);
}
case 1: // 9..16 bytes
{
memcpy(&n[0], p, 8);
const char * lp = x.data + x.size - 8;
memcpy(&n[1], lp, 8);
if constexpr (DB::isLittleEndian())
n[1] >>= s;
else
n[1] <<= s;
return func16(k16, row);
}
case 2: // 17..24 bytes
{
memcpy(&n[0], p, 16);
const char * lp = x.data + x.size - 8;
memcpy(&n[2], lp, 8);
if constexpr (DB::isLittleEndian())
n[2] >>= s;
else
n[2] <<= s;
return func24(k24, row);
}
default: // >= 25 bytes
{
return func_str(key_holder, row);
}
}
}

template <typename SubMaps>
class StringHashTable : private boost::noncopyable
{
Expand All @@ -307,8 +212,6 @@ class StringHashTable : private boost::noncopyable

template <typename, typename, size_t>
friend class TwoLevelStringHashTable;
template <size_t, bool, typename>
friend struct StringHashTableSubMapSelector;

T0 m0;
T1 m1;
Expand Down Expand Up @@ -565,49 +468,3 @@ class StringHashTable : private boost::noncopyable
ms.clearAndShrink();
}
};

template <size_t SubMapIndex, bool is_two_level, typename Data>
struct StringHashTableSubMapSelector;

template <typename Data>
struct StringHashTableSubMapSelector<0, false, Data>
{
struct Hash
{
static ALWAYS_INLINE size_t operator()(const StringRef &) { return 0; }
};

static typename Data::T0 & getSubMap(size_t, Data & data) { return data.m0; }
};

template <typename Data>
struct StringHashTableSubMapSelector<1, false, Data>
{
using Hash = StringHashTableHash;

static typename Data::T1 & getSubMap(size_t, Data & data) { return data.m1; }
};

template <typename Data>
struct StringHashTableSubMapSelector<2, false, Data>
{
using Hash = StringHashTableHash;

static typename Data::T2 & getSubMap(size_t, Data & data) { return data.m2; }
};

template <typename Data>
struct StringHashTableSubMapSelector<3, false, Data>
{
using Hash = StringHashTableHash;

static typename Data::T3 & getSubMap(size_t, Data & data) { return data.m3; }
};

template <typename Data>
struct StringHashTableSubMapSelector<4, false, Data>
{
using Hash = StringHashTableHash;

static typename Data::Ts & getSubMap(size_t, Data & data) { return data.ms; }
};
63 changes: 0 additions & 63 deletions dbms/src/Common/HashTable/TwoLevelStringHashTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -277,66 +277,3 @@ class TwoLevelStringHashTable : private boost::noncopyable
return res;
}
};

template <typename Data>
struct StringHashTableSubMapSelector<0, true, Data>
{
struct Hash
{
static ALWAYS_INLINE size_t operator()(const StringRef &) { return 0; }
};

static typename Data::Impl::T0 & getSubMap(size_t hashval, Data & data)
{
const auto bucket = Data::getBucketFromHash(hashval);
return data.impls[bucket].m0;
}
};

template <typename Data>
struct StringHashTableSubMapSelector<1, true, Data>
{
using Hash = StringHashTableHash;

static typename Data::Impl::T1 & getSubMap(size_t hashval, Data & data)
{
const auto bucket = Data::getBucketFromHash(hashval);
return data.impls[bucket].m1;
}
};

template <typename Data>
struct StringHashTableSubMapSelector<2, true, Data>
{
using Hash = StringHashTableHash;

static typename Data::Impl::T2 & getSubMap(size_t hashval, Data & data)
{
const auto bucket = Data::getBucketFromHash(hashval);
return data.impls[bucket].m2;
}
};

template <typename Data>
struct StringHashTableSubMapSelector<3, true, Data>
{
using Hash = StringHashTableHash;

static typename Data::Impl::T3 & getSubMap(size_t hashval, Data & data)
{
const auto bucket = Data::getBucketFromHash(hashval);
return data.impls[bucket].m3;
}
};

template <typename Data>
struct StringHashTableSubMapSelector<4, true, Data>
{
using Hash = StringHashTableHash;

static typename Data::Impl::Ts & getSubMap(size_t hashval, Data & data)
{
const auto bucket = Data::getBucketFromHash(hashval);
return data.impls[bucket].ms;
}
};
Loading

0 comments on commit 20d56e4

Please sign in to comment.