Skip to content

Commit

Permalink
crbytes: CommonPrefix
Browse files Browse the repository at this point in the history
This commit adds a `crbytes` library with a `CommonPrefix` method. It
has a generic implementation that compares 8 bytes at a time and a
native `amd64` implementation that is based on the `bytes.Compare`
implementation.
  • Loading branch information
RaduBerinde committed Aug 4, 2024
1 parent f1e2f24 commit 264e445
Show file tree
Hide file tree
Showing 6 changed files with 427 additions and 0 deletions.
38 changes: 38 additions & 0 deletions crbytes/common_prefix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright 2024 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.

package crbytes

import (
"encoding/binary"
)

// commonPrefixGeneric is used for architectures without a native
// implementation. It is defined here rather than common_generic.go so that the
// benchmarking code can have access to it even when there's a native
// implementation available.
func commonPrefixGeneric(a, b []byte) int {
asUint64 := func(data []byte, i int) uint64 {
return binary.LittleEndian.Uint64(data[i:])
}
var shared int
n := min(len(a), len(b))
for shared < n-7 && asUint64(a, shared) == asUint64(b, shared) {
shared += 8
}
for shared < n && a[shared] == b[shared] {
shared++
}
return shared
}
166 changes: 166 additions & 0 deletions crbytes/common_prefix_amd64.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in licenses/BSD-golang.txt.

// This code is based on compare_amd64.s from Go 1.12.5.

TEXT ·CommonPrefix(SB),$0-56
MOVQ a_base+0(FP), SI
MOVQ a_len+8(FP), BX
MOVQ b_base+24(FP), DI
MOVQ b_len+32(FP), DX

CMPQ BX, DX
MOVQ DX, R8
CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare
// Throughout this function, DX remembers the original min(alen, blen) and
// R8 is the number of bytes we still need to compare (with bytes 0 to
// DX-R8 known to match).
MOVQ R8, DX
CMPQ R8, $8
JB small

CMPQ R8, $63
JBE loop
JMP big_loop
RET

// loop checks 16 bytes at a time.
loop:
CMPQ R8, $16
JB _0through15
MOVOU (SI), X0
MOVOU (DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX // convert EQ to NE
JNE diff16 // branch if at least one byte is not equal
ADDQ $16, SI
ADDQ $16, DI
SUBQ $16, R8
JMP loop

diff64:
SUBQ $48, R8
JMP diff16
diff48:
SUBQ $32, R8
JMP diff16
diff32:
SUBQ $16, R8
// AX = bit mask of differences
diff16:
BSFQ AX, BX // index of first byte that differs
SUBQ BX, R8

SUBQ R8, DX
MOVQ DX, ret+48(FP)
RET

_0through15: // R8 < 16, DX >= 8
CMPQ R8, $8
JBE _0through8
MOVQ (SI), AX
MOVQ (DI), CX
CMPQ AX, CX
JNE diff8
_0through8:
// Load last 8 bytes of both.
MOVQ -8(SI)(R8*1), AX
MOVQ -8(DI)(R8*1), CX
CMPQ AX, CX
JEQ allsame
MOVQ $8, R8

// AX and CX contain parts of a and b that differ.
diff8:
BSWAPQ AX // reverse order of bytes
BSWAPQ CX
XORQ AX, CX
BSRQ CX, CX // index of highest bit difference
SHRQ $3, CX // index of highest byte difference

SUBQ R8, DX
ADDQ $7, DX
SUBQ CX, DX
MOVQ DX, ret+48(FP)
RET

// DX < 8
small:
LEAQ (R8*8), CX // bytes left -> bits left
NEGQ CX // - bits lift (== 64 - bits left mod 64)
JEQ allsame

// load bytes of a into high bytes of AX
CMPB SI, $0xf8
JA si_high
MOVQ (SI), SI
JMP si_finish
si_high:
MOVQ -8(SI)(R8*1), SI
SHRQ CX, SI
si_finish:
SHLQ CX, SI

// load bytes of b into high bytes of BX
CMPB DI, $0xf8
JA di_high
MOVQ (DI), DI
JMP di_finish
di_high:
MOVQ -8(DI)(R8*1), DI
SHRQ CX, DI
di_finish:
SHLQ CX, DI

BSWAPQ SI // reverse order of bytes
BSWAPQ DI
XORQ SI, DI // find bit differences
JEQ allsame
BSRQ DI, CX // index of highest bit difference
SHRQ $3, CX // index of highest byte difference
DECQ DX
SUBQ CX, DX
MOVQ DX, ret+48(FP)
RET

allsame:
MOVQ DX, ret+48(FP)
RET

big_loop:
MOVOU (SI), X0
MOVOU (DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX
JNE diff16

MOVOU 16(SI), X0
MOVOU 16(DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX
JNE diff32

MOVOU 32(SI), X0
MOVOU 32(DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX
JNE diff48

MOVOU 48(SI), X0
MOVOU 48(DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX
JNE diff64

ADDQ $64, SI
ADDQ $64, DI
SUBQ $64, R8
CMPQ R8, $64
JBE loop
JMP big_loop
93 changes: 93 additions & 0 deletions crbytes/common_prefix_bench_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// Copyright 2024 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.

package crbytes

import (
"bytes"
"math/rand"
"slices"
"testing"
)

// Sample benchmark results on linux/amd64, cpu: Intel(R) Xeon(R) CPU @ 2.80GHz:
//
// name time/op
// CommonPrefix/small/crbytes-24 8.54ns ± 1%
// CommonPrefix/small/generic-24 11.0ns ± 1%
// CommonPrefix/small/naive-24 13.6ns ± 1%
// CommonPrefix/medium/crbytes-24 13.8ns ± 2%
// CommonPrefix/medium/generic-24 26.3ns ± 2%
// CommonPrefix/medium/naive-24 31.7ns ± 2%
// CommonPrefix/large/crbytes-24 153ns ± 2%
// CommonPrefix/large/generic-24 362ns ± 2%
// CommonPrefix/large/naive-24 755ns ± 1%

func BenchmarkCommonPrefix(b *testing.B) {
small := lexicographicSet(4, 16)
medium := lexicographicSet(10, 100)
large := lexicographicSet(1000, 10000)
b.Run("small", func(b *testing.B) {
runBenchComparison(b, small)
})
b.Run("medium", func(b *testing.B) {
runBenchComparison(b, medium)
})
b.Run("large", func(b *testing.B) {
runBenchComparison(b, large)
})
}

func runBenchComparison(b *testing.B, input [][]byte) {
b.Run("crbytes", func(b *testing.B) {
runBench(b, input, CommonPrefix)
})
b.Run("generic", func(b *testing.B) {
runBench(b, input, commonPrefixGeneric)
})
b.Run("naive", func(b *testing.B) {
runBench(b, input, commonPrefixNaive)
})
}

func runBench(b *testing.B, input [][]byte, impl func(a, b []byte) int) {
n := len(input)
j := 0
var sum int
for i := 0; i < b.N; i++ {
next := j + 1
if next >= n {
next = 0
}
sum += impl(input[j], input[next])
j = next
}
b.Logf("average result: %d\n", sum/b.N)
}

// lexicographicSet returns a lexicographically ordered list of byte slices
// which all have a common prefix of length minLength, with random bytes (with
// alphabet size 2) following up to maxLength.
func lexicographicSet(minLength, maxLength int) [][]byte {
const n = 10_000
const alphabet = 2
prefix := genBytes(minLength, alphabet)

result := make([][]byte, n)
for i := range result {
result[i] = slices.Concat(prefix, genBytes(rand.Intn(maxLength-minLength+1), alphabet))
}
slices.SortFunc(result, bytes.Compare)
return result
}
22 changes: 22 additions & 0 deletions crbytes/common_prefix_generic.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Copyright 2024 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.

//go:build !amd64

package crbytes

// CommonPrefix returns the longest prefix shared by the two slices.
func CommonPrefix(a, b []byte) int {
return commonPrefixGeneric(a, b)
}
22 changes: 22 additions & 0 deletions crbytes/common_prefix_native.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Copyright 2024 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.

//go:build amd64

package crbytes

// CommonPrefix returns the longest prefix shared by the two slices.
//
//go:noescape
func CommonPrefix(a, b []byte) int
Loading

0 comments on commit 264e445

Please sign in to comment.