Skip to content

Commit

Permalink
Replace xxh64 with a simplified version
Browse files Browse the repository at this point in the history
This function gives the same result as regular xxh64(), but the difference
is that it only works on single 64-bit values.

Mostly the "bit mixing" part of xxh64 (functions finalize()/avalanche()) is
left.

This performs a little bit better than calling the original function on an
8-byte slice because the compiler does not fully inline everything
(finalize() remains a function call).

This speeds up index generation by a couple of percent.
  • Loading branch information
marcelm committed Sep 21, 2023
1 parent 6f377e2 commit b7519c9
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 5 deletions.
120 changes: 120 additions & 0 deletions src/hash.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#ifndef STROBEALIGN_HASH_HPP
#define STROBEALIGN_HASH_HPP

/* This is an extremely reduced version of xxh64 that can only hash single
* 64 bit values, consisting mostly of the "bit mixing" part of xxh64
* (finalize()/avalanche()).
*
* This performs a little bit better than calling the original function on an
* 8-byte slice because the compiler does not fully inline everything.
*/


/*
* xxHash - Extremely Fast Hash algorithm
* Header File
* Copyright (C) 2012-2021 Yann Collet
*
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* You can contact the author at:
* - xxHash homepage: https://www.xxhash.com
* - xxHash source repository: https://github.com/Cyan4973/xxHash
*/


#include <stdint.h>


#ifdef __has_builtin
# define XXH_HAS_BUILTIN(x) __has_builtin(x)
#else
# define XXH_HAS_BUILTIN(x) 0
#endif

/*!
* @internal
* @def XXH_rotl32(x,r)
* @brief 32-bit rotate left.
*
* @param x The 32-bit integer to be rotated.
* @param r The number of bits to rotate.
* @pre
* @p r > 0 && @p r < 32
* @note
* @p x and @p r may be evaluated multiple times.
* @return The rotated result.
*/
#if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \
&& XXH_HAS_BUILTIN(__builtin_rotateleft64)
# define XXH_rotl32 __builtin_rotateleft32
# define XXH_rotl64 __builtin_rotateleft64
/* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */
#elif defined(_MSC_VER)
# define XXH_rotl32(x,r) _rotl(x,r)
# define XXH_rotl64(x,r) _rotl64(x,r)
#else
# define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
# define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
#endif



/******* xxh64 *******/
/*!
* @}
* @defgroup XXH64_impl XXH64 implementation
* @ingroup impl
*
* Details on the XXH64 implementation.
* @{
*/
/* #define rather that static const, to be used as initializers */
#define XXH_PRIME64_1 0x9E3779B185EBCA87ULL /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */
#define XXH_PRIME64_2 0xC2B2AE3D27D4EB4FULL /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */
#define XXH_PRIME64_3 0x165667B19E3779F9ULL /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */
#define XXH_PRIME64_4 0x85EBCA77C2B2AE63ULL /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */
#define XXH_PRIME64_5 0x27D4EB2F165667C5ULL /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */


/// xxh64, but it can only be used for a single u64
uint64_t xxh64(uint64_t input) {
uint64_t result = XXH_PRIME64_5 + 8;
input *= XXH_PRIME64_2;
input = XXH_rotl64(input, 31);
result ^= input * XXH_PRIME64_1;
result = XXH_rotl64(result, 27);
result = result * XXH_PRIME64_1 + XXH_PRIME64_4;
result ^= result >> 33;
result = result * XXH_PRIME64_2;
result ^= result >> 29;
result = result * XXH_PRIME64_3;
result ^= result >> 32;
return result;
}

#endif
8 changes: 3 additions & 5 deletions src/randstrobes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
#include <algorithm>
#include <cassert>

#define XXH_INLINE_ALL
#include <xxhash.h>

#include "hash.hpp"
#include "randstrobes.hpp"

// a, A -> 0
Expand Down Expand Up @@ -37,14 +35,14 @@ static inline syncmer_hash_t syncmer_kmer_hash(uint64_t packed) {
// return yk;
// return hash64(yk, mask);
// return sahlin_dna_hash(yk, mask);
return XXH64(&packed, sizeof(uint64_t), 0);
return xxh64(packed);
}

static inline syncmer_hash_t syncmer_smer_hash(uint64_t packed) {
// return ys;
// return robin_hash(ys);
// return hash64(ys, mask);
return XXH64(&packed, 8, 0);
return xxh64(packed);
}

static inline randstrobe_hash_t randstrobe_hash(syncmer_hash_t hash1, syncmer_hash_t hash2) {
Expand Down

0 comments on commit b7519c9

Please sign in to comment.