-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This commit adds a `crbytes` library with a `CommonPrefix` method. It has a generic implementation that compares 8 bytes at a time and a native `amd64` implementation that is based on the `bytes.Compare` implementation.
- Loading branch information
1 parent
f1e2f24
commit 264e445
Showing
6 changed files
with
427 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
// Copyright 2024 The Cockroach Authors. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | ||
// implied. See the License for the specific language governing | ||
// permissions and limitations under the License. | ||
|
||
package crbytes | ||
|
||
import ( | ||
"encoding/binary" | ||
) | ||
|
||
// commonPrefixGeneric is used for architectures without a native | ||
// implementation. It is defined here rather than common_generic.go so that the | ||
// benchmarking code can have access to it even when there's a native | ||
// implementation available. | ||
func commonPrefixGeneric(a, b []byte) int { | ||
asUint64 := func(data []byte, i int) uint64 { | ||
return binary.LittleEndian.Uint64(data[i:]) | ||
} | ||
var shared int | ||
n := min(len(a), len(b)) | ||
for shared < n-7 && asUint64(a, shared) == asUint64(b, shared) { | ||
shared += 8 | ||
} | ||
for shared < n && a[shared] == b[shared] { | ||
shared++ | ||
} | ||
return shared | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
// Copyright 2018 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in licenses/BSD-golang.txt. | ||
|
||
// This code is based on compare_amd64.s from Go 1.12.5. | ||
|
||
TEXT ·CommonPrefix(SB),$0-56 | ||
MOVQ a_base+0(FP), SI | ||
MOVQ a_len+8(FP), BX | ||
MOVQ b_base+24(FP), DI | ||
MOVQ b_len+32(FP), DX | ||
|
||
CMPQ BX, DX | ||
MOVQ DX, R8 | ||
CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare | ||
// Throughout this function, DX remembers the original min(alen, blen) and | ||
// R8 is the number of bytes we still need to compare (with bytes 0 to | ||
// DX-R8 known to match). | ||
MOVQ R8, DX | ||
CMPQ R8, $8 | ||
JB small | ||
|
||
CMPQ R8, $63 | ||
JBE loop | ||
JMP big_loop | ||
RET | ||
|
||
// loop checks 16 bytes at a time. | ||
loop: | ||
CMPQ R8, $16 | ||
JB _0through15 | ||
MOVOU (SI), X0 | ||
MOVOU (DI), X1 | ||
PCMPEQB X0, X1 | ||
PMOVMSKB X1, AX | ||
XORQ $0xffff, AX // convert EQ to NE | ||
JNE diff16 // branch if at least one byte is not equal | ||
ADDQ $16, SI | ||
ADDQ $16, DI | ||
SUBQ $16, R8 | ||
JMP loop | ||
|
||
diff64: | ||
SUBQ $48, R8 | ||
JMP diff16 | ||
diff48: | ||
SUBQ $32, R8 | ||
JMP diff16 | ||
diff32: | ||
SUBQ $16, R8 | ||
// AX = bit mask of differences | ||
diff16: | ||
BSFQ AX, BX // index of first byte that differs | ||
SUBQ BX, R8 | ||
|
||
SUBQ R8, DX | ||
MOVQ DX, ret+48(FP) | ||
RET | ||
|
||
_0through15: // R8 < 16, DX >= 8 | ||
CMPQ R8, $8 | ||
JBE _0through8 | ||
MOVQ (SI), AX | ||
MOVQ (DI), CX | ||
CMPQ AX, CX | ||
JNE diff8 | ||
_0through8: | ||
// Load last 8 bytes of both. | ||
MOVQ -8(SI)(R8*1), AX | ||
MOVQ -8(DI)(R8*1), CX | ||
CMPQ AX, CX | ||
JEQ allsame | ||
MOVQ $8, R8 | ||
|
||
// AX and CX contain parts of a and b that differ. | ||
diff8: | ||
BSWAPQ AX // reverse order of bytes | ||
BSWAPQ CX | ||
XORQ AX, CX | ||
BSRQ CX, CX // index of highest bit difference | ||
SHRQ $3, CX // index of highest byte difference | ||
|
||
SUBQ R8, DX | ||
ADDQ $7, DX | ||
SUBQ CX, DX | ||
MOVQ DX, ret+48(FP) | ||
RET | ||
|
||
// DX < 8 | ||
small: | ||
LEAQ (R8*8), CX // bytes left -> bits left | ||
NEGQ CX // - bits lift (== 64 - bits left mod 64) | ||
JEQ allsame | ||
|
||
// load bytes of a into high bytes of AX | ||
CMPB SI, $0xf8 | ||
JA si_high | ||
MOVQ (SI), SI | ||
JMP si_finish | ||
si_high: | ||
MOVQ -8(SI)(R8*1), SI | ||
SHRQ CX, SI | ||
si_finish: | ||
SHLQ CX, SI | ||
|
||
// load bytes of b into high bytes of BX | ||
CMPB DI, $0xf8 | ||
JA di_high | ||
MOVQ (DI), DI | ||
JMP di_finish | ||
di_high: | ||
MOVQ -8(DI)(R8*1), DI | ||
SHRQ CX, DI | ||
di_finish: | ||
SHLQ CX, DI | ||
|
||
BSWAPQ SI // reverse order of bytes | ||
BSWAPQ DI | ||
XORQ SI, DI // find bit differences | ||
JEQ allsame | ||
BSRQ DI, CX // index of highest bit difference | ||
SHRQ $3, CX // index of highest byte difference | ||
DECQ DX | ||
SUBQ CX, DX | ||
MOVQ DX, ret+48(FP) | ||
RET | ||
|
||
allsame: | ||
MOVQ DX, ret+48(FP) | ||
RET | ||
|
||
big_loop: | ||
MOVOU (SI), X0 | ||
MOVOU (DI), X1 | ||
PCMPEQB X0, X1 | ||
PMOVMSKB X1, AX | ||
XORQ $0xffff, AX | ||
JNE diff16 | ||
|
||
MOVOU 16(SI), X0 | ||
MOVOU 16(DI), X1 | ||
PCMPEQB X0, X1 | ||
PMOVMSKB X1, AX | ||
XORQ $0xffff, AX | ||
JNE diff32 | ||
|
||
MOVOU 32(SI), X0 | ||
MOVOU 32(DI), X1 | ||
PCMPEQB X0, X1 | ||
PMOVMSKB X1, AX | ||
XORQ $0xffff, AX | ||
JNE diff48 | ||
|
||
MOVOU 48(SI), X0 | ||
MOVOU 48(DI), X1 | ||
PCMPEQB X0, X1 | ||
PMOVMSKB X1, AX | ||
XORQ $0xffff, AX | ||
JNE diff64 | ||
|
||
ADDQ $64, SI | ||
ADDQ $64, DI | ||
SUBQ $64, R8 | ||
CMPQ R8, $64 | ||
JBE loop | ||
JMP big_loop |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
// Copyright 2024 The Cockroach Authors. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | ||
// implied. See the License for the specific language governing | ||
// permissions and limitations under the License. | ||
|
||
package crbytes | ||
|
||
import ( | ||
"bytes" | ||
"math/rand" | ||
"slices" | ||
"testing" | ||
) | ||
|
||
// Sample benchmark results on linux/amd64, cpu: Intel(R) Xeon(R) CPU @ 2.80GHz: | ||
// | ||
// name time/op | ||
// CommonPrefix/small/crbytes-24 8.54ns ± 1% | ||
// CommonPrefix/small/generic-24 11.0ns ± 1% | ||
// CommonPrefix/small/naive-24 13.6ns ± 1% | ||
// CommonPrefix/medium/crbytes-24 13.8ns ± 2% | ||
// CommonPrefix/medium/generic-24 26.3ns ± 2% | ||
// CommonPrefix/medium/naive-24 31.7ns ± 2% | ||
// CommonPrefix/large/crbytes-24 153ns ± 2% | ||
// CommonPrefix/large/generic-24 362ns ± 2% | ||
// CommonPrefix/large/naive-24 755ns ± 1% | ||
|
||
func BenchmarkCommonPrefix(b *testing.B) { | ||
small := lexicographicSet(4, 16) | ||
medium := lexicographicSet(10, 100) | ||
large := lexicographicSet(1000, 10000) | ||
b.Run("small", func(b *testing.B) { | ||
runBenchComparison(b, small) | ||
}) | ||
b.Run("medium", func(b *testing.B) { | ||
runBenchComparison(b, medium) | ||
}) | ||
b.Run("large", func(b *testing.B) { | ||
runBenchComparison(b, large) | ||
}) | ||
} | ||
|
||
func runBenchComparison(b *testing.B, input [][]byte) { | ||
b.Run("crbytes", func(b *testing.B) { | ||
runBench(b, input, CommonPrefix) | ||
}) | ||
b.Run("generic", func(b *testing.B) { | ||
runBench(b, input, commonPrefixGeneric) | ||
}) | ||
b.Run("naive", func(b *testing.B) { | ||
runBench(b, input, commonPrefixNaive) | ||
}) | ||
} | ||
|
||
func runBench(b *testing.B, input [][]byte, impl func(a, b []byte) int) { | ||
n := len(input) | ||
j := 0 | ||
var sum int | ||
for i := 0; i < b.N; i++ { | ||
next := j + 1 | ||
if next >= n { | ||
next = 0 | ||
} | ||
sum += impl(input[j], input[next]) | ||
j = next | ||
} | ||
b.Logf("average result: %d\n", sum/b.N) | ||
} | ||
|
||
// lexicographicSet returns a lexicographically ordered list of byte slices | ||
// which all have a common prefix of length minLength, with random bytes (with | ||
// alphabet size 2) following up to maxLength. | ||
func lexicographicSet(minLength, maxLength int) [][]byte { | ||
const n = 10_000 | ||
const alphabet = 2 | ||
prefix := genBytes(minLength, alphabet) | ||
|
||
result := make([][]byte, n) | ||
for i := range result { | ||
result[i] = slices.Concat(prefix, genBytes(rand.Intn(maxLength-minLength+1), alphabet)) | ||
} | ||
slices.SortFunc(result, bytes.Compare) | ||
return result | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
// Copyright 2024 The Cockroach Authors. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | ||
// implied. See the License for the specific language governing | ||
// permissions and limitations under the License. | ||
|
||
//go:build !amd64 | ||
|
||
package crbytes | ||
|
||
// CommonPrefix returns the longest prefix shared by the two slices. | ||
func CommonPrefix(a, b []byte) int { | ||
return commonPrefixGeneric(a, b) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
// Copyright 2024 The Cockroach Authors. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | ||
// implied. See the License for the specific language governing | ||
// permissions and limitations under the License. | ||
|
||
//go:build amd64 | ||
|
||
package crbytes | ||
|
||
// CommonPrefix returns the longest prefix shared by the two slices. | ||
// | ||
//go:noescape | ||
func CommonPrefix(a, b []byte) int |
Oops, something went wrong.