diff --git a/src/hash.hpp b/src/hash.hpp new file mode 100644 index 00000000..c1995e8a --- /dev/null +++ b/src/hash.hpp @@ -0,0 +1,120 @@ +#ifndef STROBEALIGN_HASH_HPP +#define STROBEALIGN_HASH_HPP + +/* This is an extremely reduced version of xxh64 that can only hash single + * 64 bit values, consisting mostly of the "bit mixing" part of xxh64 + * (finalize()/avalanche()). + * + * This performs a little bit better than calling the original function on an + * 8-byte slice because the compiler does not fully inline everything. + */ + + +/* + * xxHash - Extremely Fast Hash algorithm + * Header File + * Copyright (C) 2012-2021 Yann Collet + * + * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at: + * - xxHash homepage: https://www.xxhash.com + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + + +#include + + +#ifdef __has_builtin +# define XXH_HAS_BUILTIN(x) __has_builtin(x) +#else +# define XXH_HAS_BUILTIN(x) 0 +#endif + +/*! + * @internal + * @def XXH_rotl32(x,r) + * @brief 32-bit rotate left. + * + * @param x The 32-bit integer to be rotated. + * @param r The number of bits to rotate. + * @pre + * @p r > 0 && @p r < 32 + * @note + * @p x and @p r may be evaluated multiple times. + * @return The rotated result. + */ +#if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \ + && XXH_HAS_BUILTIN(__builtin_rotateleft64) +# define XXH_rotl32 __builtin_rotateleft32 +# define XXH_rotl64 __builtin_rotateleft64 +/* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */ +#elif defined(_MSC_VER) +# define XXH_rotl32(x,r) _rotl(x,r) +# define XXH_rotl64(x,r) _rotl64(x,r) +#else +# define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) +# define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r)))) +#endif + + + +/******* xxh64 *******/ +/*! + * @} + * @defgroup XXH64_impl XXH64 implementation + * @ingroup impl + * + * Details on the XXH64 implementation. + * @{ + +*/ +/* #define rather that static const, to be used as initializers */ +#define XXH_PRIME64_1 0x9E3779B185EBCA87ULL /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */ +#define XXH_PRIME64_2 0xC2B2AE3D27D4EB4FULL /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */ +#define XXH_PRIME64_3 0x165667B19E3779F9ULL /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */ +#define XXH_PRIME64_4 0x85EBCA77C2B2AE63ULL /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */ +#define XXH_PRIME64_5 0x27D4EB2F165667C5ULL /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */ + + +/// xxh64, but it can only be used for a single u64 +uint64_t xxh64(uint64_t input) { + uint64_t result = XXH_PRIME64_5 + 8; + input *= XXH_PRIME64_2; + input = XXH_rotl64(input, 31); + result ^= input * XXH_PRIME64_1; + result = XXH_rotl64(result, 27); + result = result * XXH_PRIME64_1 + XXH_PRIME64_4; + result ^= result >> 33; + result = result * XXH_PRIME64_2; + result ^= result >> 29; + result = result * XXH_PRIME64_3; + result ^= result >> 32; + return result; +} + +#endif diff --git a/src/randstrobes.cpp b/src/randstrobes.cpp index 8c87af37..3cfb5a0a 100644 --- a/src/randstrobes.cpp +++ b/src/randstrobes.cpp @@ -4,9 +4,7 @@ #include #include -#define XXH_INLINE_ALL -#include - +#include "hash.hpp" #include "randstrobes.hpp" // a, A -> 0 @@ -37,14 +35,14 @@ static inline syncmer_hash_t syncmer_kmer_hash(uint64_t packed) { // return yk; // return hash64(yk, mask); // return sahlin_dna_hash(yk, mask); - return XXH64(&packed, sizeof(uint64_t), 0); + return xxh64(packed); } static inline syncmer_hash_t syncmer_smer_hash(uint64_t packed) { // return ys; // return robin_hash(ys); // return hash64(ys, mask); - return XXH64(&packed, 8, 0); + return xxh64(packed); } static inline randstrobe_hash_t randstrobe_hash(syncmer_hash_t hash1, syncmer_hash_t hash2) {