From 264e44561569545abf77602c67630e42ed90a701 Mon Sep 17 00:00:00 2001 From: Radu Berinde Date: Sun, 4 Aug 2024 04:43:01 +0300 Subject: [PATCH] crbytes: CommonPrefix This commit adds a `crbytes` library with a `CommonPrefix` method. It has a generic implementation that compares 8 bytes at a time and a native `amd64` implementation that is based on the `bytes.Compare` implementation. --- crbytes/common_prefix.go | 38 +++++++ crbytes/common_prefix_amd64.s | 166 ++++++++++++++++++++++++++++ crbytes/common_prefix_bench_test.go | 93 ++++++++++++++++ crbytes/common_prefix_generic.go | 22 ++++ crbytes/common_prefix_native.go | 22 ++++ crbytes/common_prefix_test.go | 86 ++++++++++++++ 6 files changed, 427 insertions(+) create mode 100644 crbytes/common_prefix.go create mode 100644 crbytes/common_prefix_amd64.s create mode 100644 crbytes/common_prefix_bench_test.go create mode 100644 crbytes/common_prefix_generic.go create mode 100644 crbytes/common_prefix_native.go create mode 100644 crbytes/common_prefix_test.go diff --git a/crbytes/common_prefix.go b/crbytes/common_prefix.go new file mode 100644 index 0000000..2d94185 --- /dev/null +++ b/crbytes/common_prefix.go @@ -0,0 +1,38 @@ +// Copyright 2024 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. + +package crbytes + +import ( + "encoding/binary" +) + +// commonPrefixGeneric is used for architectures without a native +// implementation. It is defined here rather than common_generic.go so that the +// benchmarking code can have access to it even when there's a native +// implementation available. +func commonPrefixGeneric(a, b []byte) int { + asUint64 := func(data []byte, i int) uint64 { + return binary.LittleEndian.Uint64(data[i:]) + } + var shared int + n := min(len(a), len(b)) + for shared < n-7 && asUint64(a, shared) == asUint64(b, shared) { + shared += 8 + } + for shared < n && a[shared] == b[shared] { + shared++ + } + return shared +} diff --git a/crbytes/common_prefix_amd64.s b/crbytes/common_prefix_amd64.s new file mode 100644 index 0000000..8f673a0 --- /dev/null +++ b/crbytes/common_prefix_amd64.s @@ -0,0 +1,166 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in licenses/BSD-golang.txt. + +// This code is based on compare_amd64.s from Go 1.12.5. + +TEXT ·CommonPrefix(SB),$0-56 + MOVQ a_base+0(FP), SI + MOVQ a_len+8(FP), BX + MOVQ b_base+24(FP), DI + MOVQ b_len+32(FP), DX + + CMPQ BX, DX + MOVQ DX, R8 + CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare + // Throughout this function, DX remembers the original min(alen, blen) and + // R8 is the number of bytes we still need to compare (with bytes 0 to + // DX-R8 known to match). + MOVQ R8, DX + CMPQ R8, $8 + JB small + + CMPQ R8, $63 + JBE loop + JMP big_loop + RET + +// loop checks 16 bytes at a time. +loop: + CMPQ R8, $16 + JB _0through15 + MOVOU (SI), X0 + MOVOU (DI), X1 + PCMPEQB X0, X1 + PMOVMSKB X1, AX + XORQ $0xffff, AX // convert EQ to NE + JNE diff16 // branch if at least one byte is not equal + ADDQ $16, SI + ADDQ $16, DI + SUBQ $16, R8 + JMP loop + +diff64: + SUBQ $48, R8 + JMP diff16 +diff48: + SUBQ $32, R8 + JMP diff16 +diff32: + SUBQ $16, R8 + // AX = bit mask of differences +diff16: + BSFQ AX, BX // index of first byte that differs + SUBQ BX, R8 + + SUBQ R8, DX + MOVQ DX, ret+48(FP) + RET + +_0through15: // R8 < 16, DX >= 8 + CMPQ R8, $8 + JBE _0through8 + MOVQ (SI), AX + MOVQ (DI), CX + CMPQ AX, CX + JNE diff8 +_0through8: + // Load last 8 bytes of both. + MOVQ -8(SI)(R8*1), AX + MOVQ -8(DI)(R8*1), CX + CMPQ AX, CX + JEQ allsame + MOVQ $8, R8 + + // AX and CX contain parts of a and b that differ. +diff8: + BSWAPQ AX // reverse order of bytes + BSWAPQ CX + XORQ AX, CX + BSRQ CX, CX // index of highest bit difference + SHRQ $3, CX // index of highest byte difference + + SUBQ R8, DX + ADDQ $7, DX + SUBQ CX, DX + MOVQ DX, ret+48(FP) + RET + + // DX < 8 +small: + LEAQ (R8*8), CX // bytes left -> bits left + NEGQ CX // - bits lift (== 64 - bits left mod 64) + JEQ allsame + + // load bytes of a into high bytes of AX + CMPB SI, $0xf8 + JA si_high + MOVQ (SI), SI + JMP si_finish +si_high: + MOVQ -8(SI)(R8*1), SI + SHRQ CX, SI +si_finish: + SHLQ CX, SI + + // load bytes of b into high bytes of BX + CMPB DI, $0xf8 + JA di_high + MOVQ (DI), DI + JMP di_finish +di_high: + MOVQ -8(DI)(R8*1), DI + SHRQ CX, DI +di_finish: + SHLQ CX, DI + + BSWAPQ SI // reverse order of bytes + BSWAPQ DI + XORQ SI, DI // find bit differences + JEQ allsame + BSRQ DI, CX // index of highest bit difference + SHRQ $3, CX // index of highest byte difference + DECQ DX + SUBQ CX, DX + MOVQ DX, ret+48(FP) + RET + +allsame: + MOVQ DX, ret+48(FP) + RET + +big_loop: + MOVOU (SI), X0 + MOVOU (DI), X1 + PCMPEQB X0, X1 + PMOVMSKB X1, AX + XORQ $0xffff, AX + JNE diff16 + + MOVOU 16(SI), X0 + MOVOU 16(DI), X1 + PCMPEQB X0, X1 + PMOVMSKB X1, AX + XORQ $0xffff, AX + JNE diff32 + + MOVOU 32(SI), X0 + MOVOU 32(DI), X1 + PCMPEQB X0, X1 + PMOVMSKB X1, AX + XORQ $0xffff, AX + JNE diff48 + + MOVOU 48(SI), X0 + MOVOU 48(DI), X1 + PCMPEQB X0, X1 + PMOVMSKB X1, AX + XORQ $0xffff, AX + JNE diff64 + + ADDQ $64, SI + ADDQ $64, DI + SUBQ $64, R8 + CMPQ R8, $64 + JBE loop + JMP big_loop diff --git a/crbytes/common_prefix_bench_test.go b/crbytes/common_prefix_bench_test.go new file mode 100644 index 0000000..632d7c7 --- /dev/null +++ b/crbytes/common_prefix_bench_test.go @@ -0,0 +1,93 @@ +// Copyright 2024 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. + +package crbytes + +import ( + "bytes" + "math/rand" + "slices" + "testing" +) + +// Sample benchmark results on linux/amd64, cpu: Intel(R) Xeon(R) CPU @ 2.80GHz: +// +// name time/op +// CommonPrefix/small/crbytes-24 8.54ns ± 1% +// CommonPrefix/small/generic-24 11.0ns ± 1% +// CommonPrefix/small/naive-24 13.6ns ± 1% +// CommonPrefix/medium/crbytes-24 13.8ns ± 2% +// CommonPrefix/medium/generic-24 26.3ns ± 2% +// CommonPrefix/medium/naive-24 31.7ns ± 2% +// CommonPrefix/large/crbytes-24 153ns ± 2% +// CommonPrefix/large/generic-24 362ns ± 2% +// CommonPrefix/large/naive-24 755ns ± 1% + +func BenchmarkCommonPrefix(b *testing.B) { + small := lexicographicSet(4, 16) + medium := lexicographicSet(10, 100) + large := lexicographicSet(1000, 10000) + b.Run("small", func(b *testing.B) { + runBenchComparison(b, small) + }) + b.Run("medium", func(b *testing.B) { + runBenchComparison(b, medium) + }) + b.Run("large", func(b *testing.B) { + runBenchComparison(b, large) + }) +} + +func runBenchComparison(b *testing.B, input [][]byte) { + b.Run("crbytes", func(b *testing.B) { + runBench(b, input, CommonPrefix) + }) + b.Run("generic", func(b *testing.B) { + runBench(b, input, commonPrefixGeneric) + }) + b.Run("naive", func(b *testing.B) { + runBench(b, input, commonPrefixNaive) + }) +} + +func runBench(b *testing.B, input [][]byte, impl func(a, b []byte) int) { + n := len(input) + j := 0 + var sum int + for i := 0; i < b.N; i++ { + next := j + 1 + if next >= n { + next = 0 + } + sum += impl(input[j], input[next]) + j = next + } + b.Logf("average result: %d\n", sum/b.N) +} + +// lexicographicSet returns a lexicographically ordered list of byte slices +// which all have a common prefix of length minLength, with random bytes (with +// alphabet size 2) following up to maxLength. +func lexicographicSet(minLength, maxLength int) [][]byte { + const n = 10_000 + const alphabet = 2 + prefix := genBytes(minLength, alphabet) + + result := make([][]byte, n) + for i := range result { + result[i] = slices.Concat(prefix, genBytes(rand.Intn(maxLength-minLength+1), alphabet)) + } + slices.SortFunc(result, bytes.Compare) + return result +} diff --git a/crbytes/common_prefix_generic.go b/crbytes/common_prefix_generic.go new file mode 100644 index 0000000..56b0cf9 --- /dev/null +++ b/crbytes/common_prefix_generic.go @@ -0,0 +1,22 @@ +// Copyright 2024 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. + +//go:build !amd64 + +package crbytes + +// CommonPrefix returns the longest prefix shared by the two slices. +func CommonPrefix(a, b []byte) int { + return commonPrefixGeneric(a, b) +} diff --git a/crbytes/common_prefix_native.go b/crbytes/common_prefix_native.go new file mode 100644 index 0000000..907b0a7 --- /dev/null +++ b/crbytes/common_prefix_native.go @@ -0,0 +1,22 @@ +// Copyright 2024 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. + +//go:build amd64 + +package crbytes + +// CommonPrefix returns the longest prefix shared by the two slices. +// +//go:noescape +func CommonPrefix(a, b []byte) int diff --git a/crbytes/common_prefix_test.go b/crbytes/common_prefix_test.go new file mode 100644 index 0000000..71cd96c --- /dev/null +++ b/crbytes/common_prefix_test.go @@ -0,0 +1,86 @@ +// Copyright 2024 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. + +package crbytes + +import ( + "bytes" + "math/rand" + "testing" +) + +func TestCommonPrefixAllLengths(t *testing.T) { + // Construct cases with each length up to a certain size. + for l := 0; l <= 256; l++ { + for k := 0; k <= l; k++ { + a := bytes.Repeat([]byte("x"), l) + b := bytes.Repeat([]byte("x"), l) + if k < l { + b[k] = '0' + } + if res := CommonPrefix(a, b); res != k { + t.Errorf("length=%d expected=%d result=%d\n", l, k, res) + } + // Always test the generic implementation too. + if res := commonPrefixGeneric(a, b); res != k { + t.Errorf("length=%d expected=%d result=%d\n", l, k, res) + } + } + } +} + +func TestCommonPrefixRand(t *testing.T) { + for _, tc := range []struct { + maxLen int + alphabet int + }{ + {maxLen: 4, alphabet: 2}, + {maxLen: 100, alphabet: 2}, + {maxLen: 200, alphabet: 2}, + {maxLen: 10, alphabet: 4}, + {maxLen: 500, alphabet: 4}, + {maxLen: 10, alphabet: 26}, + {maxLen: 500, alphabet: 26}, + } { + for n := 0; n < 1000; n++ { + a := genBytes(rand.Intn(tc.maxLen+1), tc.alphabet) + b := genBytes(rand.Intn(tc.maxLen+1), tc.alphabet) + expected := commonPrefixNaive(a, b) + if res := CommonPrefix(a, b); res != expected { + t.Errorf("%q %q expected=%d result=%d\n", a, b, expected, res) + } + // Always test the generic implementation too. + if res := commonPrefixGeneric(a, b); res != expected { + t.Errorf("%q %q expected=%d result=%d\n", a, b, expected, res) + } + } + } +} + +func commonPrefixNaive(a, b []byte) int { + n := min(len(a), len(b)) + i := 0 + for i < n && a[i] == b[i] { + i++ + } + return i +} + +func genBytes(length int, alphabet int) []byte { + a := make([]byte, length) + for i := range a { + a[i] = 'a' + byte(rand.Intn(alphabet)) + } + return a +}