From 4febaa19aeac295c2e3a59422a1d0971516f4fd4 Mon Sep 17 00:00:00 2001 From: Caleb Spare Date: Thu, 4 Apr 2024 11:16:23 -0700 Subject: [PATCH] Add initial support for custom seeds This adds support for custom seeds when using a Digest (but not to the one-shot functions Sum64 and Sum64String). The seed is not stored in the digest itself -- every Reset uses a zero seed and (Un)MarshalBinary is unchanged. This is simpler for backward compatibility but may be something to reconsider if we rework the API for a v3. --- xxhash.go | 24 ++++++++++++----- xxhash_asm.go | 2 +- xxhash_other.go | 2 +- xxhash_safe.go | 2 +- xxhash_test.go | 68 +++++++++++++++++++++++++++++++++++------------- xxhash_unsafe.go | 2 +- 6 files changed, 72 insertions(+), 28 deletions(-) diff --git a/xxhash.go b/xxhash.go index a9e0d45..b949cef 100644 --- a/xxhash.go +++ b/xxhash.go @@ -33,19 +33,31 @@ type Digest struct { n int // how much of mem is used } -// New creates a new Digest that computes the 64-bit xxHash algorithm. +// New creates a new Digest with a zero seed. func New() *Digest { + return NewWithSeed(0) +} + +// NewWithSeed creates a new Digest with the given seed. +func NewWithSeed(seed uint64) *Digest { var d Digest - d.Reset() + d.ResetWithSeed(seed) return &d } // Reset clears the Digest's state so that it can be reused. +// It uses a seed value of zero. func (d *Digest) Reset() { - d.v1 = primes[0] + prime2 - d.v2 = prime2 - d.v3 = 0 - d.v4 = -primes[0] + d.ResetWithSeed(0) +} + +// ResetWithSeed clears the Digest's state so that it can be reused. +// It uses the given seed to initialize the state. +func (d *Digest) ResetWithSeed(seed uint64) { + d.v1 = seed + primes[0] + prime2 + d.v2 = seed + prime2 + d.v3 = seed + d.v4 = seed - primes[0] d.total = 0 d.n = 0 } diff --git a/xxhash_asm.go b/xxhash_asm.go index 9216e0a..78f95f2 100644 --- a/xxhash_asm.go +++ b/xxhash_asm.go @@ -6,7 +6,7 @@ package xxhash -// Sum64 computes the 64-bit xxHash digest of b. +// Sum64 computes the 64-bit xxHash digest of b with a zero seed. // //go:noescape func Sum64(b []byte) uint64 diff --git a/xxhash_other.go b/xxhash_other.go index 26df13b..118e49e 100644 --- a/xxhash_other.go +++ b/xxhash_other.go @@ -3,7 +3,7 @@ package xxhash -// Sum64 computes the 64-bit xxHash digest of b. +// Sum64 computes the 64-bit xxHash digest of b with a zero seed. func Sum64(b []byte) uint64 { // A simpler version would be // d := New() diff --git a/xxhash_safe.go b/xxhash_safe.go index e86f1b5..05f5e7d 100644 --- a/xxhash_safe.go +++ b/xxhash_safe.go @@ -5,7 +5,7 @@ package xxhash -// Sum64String computes the 64-bit xxHash digest of s. +// Sum64String computes the 64-bit xxHash digest of s with a zero seed. func Sum64String(s string) uint64 { return Sum64([]byte(s)) } diff --git a/xxhash_test.go b/xxhash_test.go index 6330f19..8e2f456 100644 --- a/xxhash_test.go +++ b/xxhash_test.go @@ -4,45 +4,60 @@ import ( "bytes" "encoding/binary" "fmt" + "math" "strings" "testing" ) func TestAll(t *testing.T) { + // Exactly 63 characters, which exercises all code paths. + const s63 = "Call me Ishmael. Some years ago--never mind how long precisely-" for _, tt := range []struct { - name string input string + seed uint64 want uint64 }{ - {"empty", "", 0xef46db3751d8e999}, - {"a", "a", 0xd24ec4f1a98c6e5b}, - {"as", "as", 0x1c330fb2d66be179}, - {"asd", "asd", 0x631c37ce72a97393}, - {"asdf", "asdf", 0x415872f599cea71e}, - { - "len=63", - // Exactly 63 characters, which exercises all code paths. - "Call me Ishmael. Some years ago--never mind how long precisely-", - 0x02a2e85470d6fd96, - }, + {"", 0, 0xef46db3751d8e999}, + {"a", 0, 0xd24ec4f1a98c6e5b}, + {"as", 0, 0x1c330fb2d66be179}, + {"asd", 0, 0x631c37ce72a97393}, + {"asdf", 0, 0x415872f599cea71e}, + {s63, 0, 0x02a2e85470d6fd96}, + + {"", 123, 0xe0db84de91f3e198}, + {"asdf", math.MaxUint64, 0x9a2fd8473be539b6}, + {s63, 54321, 0x1736d186daf5d1cd}, } { lastChunkSize := len(tt.input) if lastChunkSize == 0 { lastChunkSize = 1 } + var name string + if tt.input == "" { + name = "input=empty" + } else if len(tt.input) > 10 { + name = fmt.Sprintf("input=len-%d", len(tt.input)) + } else { + name = fmt.Sprintf("input=%q", tt.input) + } + if tt.seed != 0 { + name += fmt.Sprintf(",seed=%d", tt.seed) + } for chunkSize := 1; chunkSize <= lastChunkSize; chunkSize++ { - name := fmt.Sprintf("%s,chunkSize=%d", tt.name, chunkSize) + name := fmt.Sprintf("%s,chunkSize=%d", name, chunkSize) t.Run(name, func(t *testing.T) { - testDigest(t, tt.input, chunkSize, tt.want) + testDigest(t, tt.input, tt.seed, chunkSize, tt.want) }) } - t.Run(tt.name, func(t *testing.T) { testSum(t, tt.input, tt.want) }) + if tt.seed == 0 { + t.Run(name, func(t *testing.T) { testSum(t, tt.input, tt.want) }) + } } } -func testDigest(t *testing.T, input string, chunkSize int, want uint64) { - d := New() - ds := New() // uses WriteString +func testDigest(t *testing.T, input string, seed uint64, chunkSize int, want uint64) { + d := NewWithSeed(seed) + ds := NewWithSeed(seed) // uses WriteString for i := 0; i < len(input); i += chunkSize { chunk := input[i:] if len(chunk) > chunkSize { @@ -96,6 +111,23 @@ func TestReset(t *testing.T) { } } +func TestResetWithSeed(t *testing.T) { + parts := []string{"The quic", "k br", "o", "wn fox jumps", " ov", "er the lazy ", "dog."} + d := NewWithSeed(123) + for _, part := range parts { + d.Write([]byte(part)) + } + h0 := d.Sum64() + + d.ResetWithSeed(123) + d.Write([]byte(strings.Join(parts, ""))) + h1 := d.Sum64() + + if h0 != h1 { + t.Errorf("0x%x != 0x%x", h0, h1) + } +} + func TestBinaryMarshaling(t *testing.T) { d := New() d.WriteString("abc") diff --git a/xxhash_unsafe.go b/xxhash_unsafe.go index 1c1638f..cf9d42a 100644 --- a/xxhash_unsafe.go +++ b/xxhash_unsafe.go @@ -33,7 +33,7 @@ import ( // // See https://github.com/golang/go/issues/42739 for discussion. -// Sum64String computes the 64-bit xxHash digest of s. +// Sum64String computes the 64-bit xxHash digest of s with a zero seed. // It may be faster than Sum64([]byte(s)) by avoiding a copy. func Sum64String(s string) uint64 { b := *(*[]byte)(unsafe.Pointer(&sliceHeader{s, len(s)}))