diff --git a/crbytes/common_prefix.go b/crbytes/common_prefix.go
new file mode 100644
index 0000000..1cbf852
--- /dev/null
+++ b/crbytes/common_prefix.go
@@ -0,0 +1,36 @@
+// Copyright 2024 The Cockroach Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the License.
+
+package crbytes
+
+import "encoding/binary"
+
+// commonPrefixGeneric is used for architectures without a native
+// implementation. It is defined here rather than common_generic.go so that the
+// benchmarking code can have access to it even when there's a native
+// implementation available.
+func commonPrefixGeneric(a, b []byte) int {
+	asUint64 := func(data []byte, i int) uint64 {
+		return binary.LittleEndian.Uint64(data[i:])
+	}
+	var shared int
+	n := min(len(a), len(b))
+	for shared < n-7 && asUint64(a, shared) == asUint64(b, shared) {
+		shared += 8
+	}
+	for shared < n && a[shared] == b[shared] {
+		shared++
+	}
+	return shared
+}
diff --git a/crbytes/common_prefix_amd64.s b/crbytes/common_prefix_amd64.s
new file mode 100644
index 0000000..fe7c841
--- /dev/null
+++ b/crbytes/common_prefix_amd64.s
@@ -0,0 +1,284 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in licenses/BSD-golang.txt.
+
+// This code is based on compare_amd64.s from Go 1.12.5.
+
+TEXT ·CommonPrefix(SB),$0-56
+    // SI = uintptr(unsafe.Pointer(&a[0]))
+    MOVQ    a_base+0(FP), SI
+    // BX = len(a)
+    MOVQ    a_len+8(FP), BX
+    // DI = uintptr(unsafe.Pointer(&b[0]))
+    MOVQ    b_base+24(FP), DI
+    // DX = len(b)
+    MOVQ    b_len+32(FP), DX
+
+    CMPQ    BX, DX
+    MOVQ    DX, R8
+    CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare
+    // Throughout this function, DX remembers the original min(alen, blen) and
+    // R8 is the number of bytes we still need to compare (with bytes 0 to
+    // DX-R8 known to match).
+    MOVQ    R8, DX
+    CMPQ    R8, $8
+    JB  small
+
+    CMPQ    R8, $63
+    JBE     loop
+    JMP     big_loop
+    RET
+
+// loop is used when we have between 8 and 63 bytes left to compare (8 <= R8 < 64).
+// Invariant: 8 <= R8 < 64
+loop:
+    CMPQ    R8, $16
+    JB      _0through15
+    // X0 = a[:16]
+    MOVOU   (SI), X0
+    // X0 = b[:16]
+    MOVOU   (DI), X1
+    // Compare Packed Data for Equal:
+    //   for i := 0; i < 16; i++ {
+    //     if X0[i] != X1[i] {
+    //      X1[i] = 0
+    //     } else {
+    //      X1[i] = 0xFF
+    //     }
+    //   }
+    PCMPEQB X0, X1
+    // Move Byte Mask.
+    //   AX = 0
+    //   for i := 0; i < 16; i++ {
+    //     if X1[i] & 0x80 != 0 {
+    //       AX |= (1 << i)
+    //   }
+    PMOVMSKB X1, AX
+    // AX ^= 0xFFFF
+    XORQ    $0xffff, AX    // convert EQ to NE
+    // if AX != 0 {
+    //   goto diff16
+    // }
+    JNE     diff16    // branch if at least one byte is not equal
+    // a = a[16:]
+    ADDQ    $16, SI
+    // b = b[16:]
+    ADDQ    $16, DI
+    // R8 -= 16
+    SUBQ    $16, R8
+    JMP     loop
+
+// Invariant: a[0:48] matches b[0:48] and AX contains a bit mask of differences
+// between a[48:64] and b[48:64].
+diff64:
+    // R8 -= 48
+    SUBQ    $48, R8
+    JMP     diff16
+
+// Invariant: a[0:32] matches b[0:32] and AX contains a bit mask of differences
+// between a[32:48] and b[32:48].
+diff48:
+    // R8 -= 32
+    SUBQ    $32, R8
+    JMP     diff16
+
+// Invariant: a[0:16] matches b[0:16] and AX contains a bit mask of differences
+// between a[16:32] and b[16:32].
+diff32:
+    // R8 -= 16
+    SUBQ    $16, R8
+
+// Invariant: AX contains a bit mask of differences between a[:16] and b[:16].
+//   AX & (1 << i) == 1 iff a[i] != b[i]
+diff16:
+    // Bit Scan Forward (return the index of the least significant set bit)
+    //   BX = bits.TrailingZeroes64(AX)
+    BSFQ    AX, BX
+    // BX is now the prefix of bytes that matched, advance by this much.
+    // R8 -= BX
+    SUBQ    BX, R8
+
+    // Return DX (original min(alen, blen)) - R8 (bytes left to compare)
+    SUBQ    R8, DX
+    MOVQ    DX, ret+48(FP)
+    RET
+
+// Invariants:
+//  - original slices contained at least 8 bytes (DX >= 8)
+//  - we have at most 15 bytes left to compare (R8 < 16)
+_0through15:
+    // if R8 <= 8 {
+    //   goto _0through8
+    // }
+    CMPQ    R8, $8
+    JBE     _0through8
+    // AX = a[:8]
+    MOVQ    (SI), AX
+    // CX = b[:8]
+    MOVQ    (DI), CX
+    // if AX != CX {
+    //   goto diff8
+    // }
+    CMPQ    AX, CX
+    JNE     diff8
+
+// Invariants:
+//  - original slices contained at least 8 bytes (DX >= 8)
+//  - we have at most 8 bytes left to compare (R8 <= 8)
+//
+// Because the backing slices have at least 8 bytes and all the bytes so far
+// matched, we can (potentially) back up to where we have exactly 8 bytes to
+// compare.
+_0through8:
+    // AX = b[len(b)-8:]
+    MOVQ    -8(SI)(R8*1), AX
+    // CX = b[len(b)-8:]
+    MOVQ    -8(DI)(R8*1), CX
+    // if AX == CX {
+    //   goto allsame
+    // }
+    CMPQ    AX, CX
+    JEQ     allsame
+    // R8 = 8
+    MOVQ    $8, R8
+
+// Invariant: AX contains a bit mask of differences between a[:8] and b[:8].
+//   AX & (1 << i) == 1 iff a[i] != b[i]
+diff8:
+    // CX ^= AX
+    XORQ    AX, CX
+    // Bit Scan Forward (return the index of the least significant set bit)
+    //   BX = bits.TrailingZeroes64(CX)
+    BSFQ    CX, CX
+    // CX /= 8
+    SHRQ    $3, CX
+    // CX is now the 0-based index of the first byte that differs.
+    // R8 -= CX
+    SUBQ    CX, R8
+
+    // Return DX (original min(alen, blen)) - R8 (bytes left to compare)
+    SUBQ    R8, DX
+    MOVQ    DX, ret+48(FP)
+    RET
+
+// Invariant: original min(alen, blen) < 8. DX < 8, R8 = DX.
+small:
+    // CX = R8 * 8
+    LEAQ    (R8*8), CX
+    // CX = -CX
+    // We only care about the lower 6 bits of CX, so this is equivalent to:
+    // CX = (8-min(alen, blen)) * 8
+    NEGQ    CX
+    JEQ     allsame
+
+    // We will load 8 bytes, even though some of them are outside the slice
+    // bounds. We go out of bounds either before or after the slice depending on
+    // the value of the pointer.
+
+    // if uintptr(unsafe.Pointer(&a[0]) > 0xF8 {
+    //   goto si_high
+    // }
+    CMPB    SI, $0xf8
+    JA      si_high
+    // SI = a[:8]
+    MOVQ    (SI), SI
+    // Discard the upper bytes which were out of bounds and add 0s (to be
+    // removed below).
+    SHLQ    CX, SI
+    JMP     si_finish
+si_high:
+    // SI = a[len(a)-8:]
+    MOVQ    -8(SI)(R8*1), SI
+si_finish:
+    // SI = SI >> CX
+    // Discard the lower bytes which were added by SHLQ in one case, or that
+    // were out of bounds in the si_high case.
+    // In both cases, SI = a[:].
+    SHRQ    CX, SI
+
+    // if uintptr(unsafe.Pointer(&b[0]) > 0xF8 {
+    //   goto di_high
+    // }
+    CMPB    DI, $0xf8
+    JA      di_high
+    // DI = b[:8]
+    MOVQ    (DI), DI
+    // Discard the upper bytes which were out of bounds and add 0s (to be
+    // removed below).
+    SHLQ    CX, DI
+    JMP     di_finish
+di_high:
+    // DI = b[len(b)-8:]
+    MOVQ    -8(DI)(R8*1), DI
+di_finish:
+    // DI = DI >> CX
+    // Discard the lower bytes which were added by SHLQ in one case, or that
+    // were out of bounds in the di_high case.
+    // In both cases, DI = b[:].
+    SHRQ    CX, DI
+
+    // DI ^= SI
+    XORQ    SI, DI
+    // if DI == 0 {
+    //   goto allsame
+    // }
+    JEQ     allsame
+
+    // Bit Scan Forward (return the index of the least significant set bit)
+    //   DI = bits.TrailingZeroes64(DI)
+    BSFQ    DI, DI
+    // DI /= 8
+    SHRQ    $3, DI
+    // DI is now the 0-based index of the first byte that differs.
+    // R8 -= DI
+    SUBQ    DI, R8
+
+    // Return DX (original min(alen, blen)) - R8 (bytes left to compare)
+    SUBQ    R8, DX
+allsame:
+    MOVQ    DX, ret+48(FP)
+    RET
+
+// big_loop is used when we have at least 64 bytes to compare. It is similar to
+// <loop>, except that we do 4 iterations at a time.
+big_loop:
+    MOVOU    (SI), X0
+    MOVOU    (DI), X1
+    PCMPEQB  X0, X1
+    PMOVMSKB X1, AX
+    XORQ     $0xffff, AX
+    JNE      diff16
+
+    MOVOU    16(SI), X0
+    MOVOU    16(DI), X1
+    PCMPEQB  X0, X1
+    PMOVMSKB X1, AX
+    XORQ     $0xffff, AX
+    JNE      diff32
+
+    MOVOU    32(SI), X0
+    MOVOU    32(DI), X1
+    PCMPEQB  X0, X1
+    PMOVMSKB X1, AX
+    XORQ     $0xffff, AX
+    JNE      diff48
+
+    MOVOU    48(SI), X0
+    MOVOU    48(DI), X1
+    PCMPEQB  X0, X1
+    PMOVMSKB X1, AX
+    XORQ     $0xffff, AX
+    JNE      diff64
+
+    // a = a[64:]
+    ADDQ    $64, SI
+    // b = b[64:]
+    ADDQ    $64, DI
+    // R8 -= 64
+    SUBQ    $64, R8
+    CMPQ    R8, $64
+    // if R8 < 64 {
+    //   goto loop
+    // }
+    JBE     loop
+    JMP     big_loop
diff --git a/crbytes/common_prefix_arm64.s b/crbytes/common_prefix_arm64.s
new file mode 100644
index 0000000..cf20e22
--- /dev/null
+++ b/crbytes/common_prefix_arm64.s
@@ -0,0 +1,244 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in licenses/BSD-golang.txt.
+
+// This code is based on compare_arm64.s from Go 1.12.5.
+
+TEXT ·CommonPrefix(SB),$0-56
+    // R0 = uintptr(unsafe.Pointer(&a[0]))
+    MOVD    a_base+0(FP), R0
+    // R1 = len(a)
+    MOVD    a_len+8(FP), R1
+    // R2 = uintptr(unsafe.Pointer(&b[0]))
+    MOVD    b_base+24(FP), R2
+    // R3 = len(b)
+    MOVD    b_len+32(FP), R3
+
+    CMP     R1, R3
+    // R6 = min(alen, blen)
+    CSEL    LT, R3, R1, R6
+    // Throughout this function, R7 remembers the original min(alen, blen) and
+    // R6 is the number of bytes we still need to compare (with bytes 0 to R7-R6
+    // known to match).
+    MOVD    R6, R7
+
+    // if R6 == 0 {
+    //   goto samebytes
+    // }
+    CBZ     R6, samebytes
+    // IF R6 < 16 {
+    //   goto small
+    // }
+    CMP     $16, R6
+    BLT     small
+
+// chunk16_loop compares 16 bytes at a time.
+// Invariant: R6 >= 16
+chunk16_loop:
+    // R4, R8, a = a[:8], a[8:16], a[16:]
+    LDP.P   16(R0), (R4, R8)
+    // R5, R9, b = b[:8], b[8:16]; b[16:]
+    LDP.P   16(R2), (R5, R9)
+    // if R4 != R5 {
+    //   goto cmp
+    // }
+    CMP     R4, R5
+    BNE     cmp
+    // if R8 != R9 {
+    //   goto cmpnext
+    // }
+    CMP     R8, R9
+    BNE     cmpnext
+    // R6 -= 16
+    SUB     $16, R6
+    // if R6 >= 16 {
+    //   goto chunk16_loop
+    // }
+    CMP     $16, R6
+    BGE     chunk16_loop
+    // if R6 == 0 {
+    //   goto samebytes
+    // }
+    CBZ     R6, samebytes
+    // if R6 <= 8 {
+    //   goto tail
+    // }
+    CMP     $8, R6
+    BLE     tail
+    // We have more than 8 bytes remaining; compare the first 8 bytes.
+    // R4, a = a[:8], a[8:]
+    // R5, b = b[:8], b[8:]
+    MOVD.P  8(R0), R4
+    MOVD.P  8(R2), R5
+    // if R4 != R5 {
+    //   goto cmp
+    // }
+    CMP     R4, R5
+    BNE     cmp
+    // R6 -= 8
+    SUB     $8, R6
+
+// Invariants:
+//  - the original slices have at least 8 bytes (R7 >= 8)
+//  - there are at most 8 bytes left to compare (R6 <= 8)
+tail:
+    // R6 -= 8
+    SUB     $8, R6
+    // R4 = a[R6:R6+8]
+    MOVD    (R0)(R6), R4
+    // R5 = b[R6:R6+8]
+    MOVD    (R2)(R6), R5
+    // if R4 == R6 {
+    //   goto samebytes
+    // }
+    CMP     R4, R5
+    BEQ     samebytes
+    // R6 = 8
+    MOVD    $8, R6
+
+// Invariants: R4 and R5 contain the next 8 bytes and R4 != R5.
+cmp:
+    // R4 = bits.ReverseBytes64(R4)
+    REV     R4, R4
+    // R5 = bits.ReverseBytes64(R5)
+    REV     R5, R5
+// Invariant: R4 and R5 contain the next 8 bytes in reverse order and R4 != R5.
+cmprev:
+    // R5 ^= R4
+    EOR     R4, R5, R5
+    // R5 = bits.LeadingZeros64(R5)
+    // This is the number of bits that match.
+    CLZ     R5, R5
+    // R5 /= 8
+    // This is the number of bytes that match.
+    LSR     $3, R5, R5
+    // R6 -= R5
+    SUBS    R5, R6, R6
+    // if R6 == 0 {
+    //   goto samebytes
+    // }
+    BLT samebytes
+
+ret:
+    // return R7 - R6
+    SUB R6, R7
+    MOVD    R7, ret+48(FP)
+    RET
+
+// Invariant: we have less than 16 bytes to compare (R6 = R7, R6 < 16).
+small:
+    // Test Bit and Branch if Zero:
+    //   if R6 & 8 != 0 {
+    //     goto lt_8
+    //   }
+    TBZ     $3, R6, lt_8
+    // R4 = a[:8]
+    MOVD    (R0), R4
+    // R5 = b[:8]
+    MOVD    (R2), R5
+    // if R4 != R5 {
+    //   goto cmp
+    // }
+    CMP     R4, R5
+    BNE     cmp
+    // R6 -= 8
+    SUBS    $8, R6, R6
+    // if R6 == 0 {
+    //   goto samebytes
+    // }
+    BEQ     samebytes
+    // a = a[8:]
+    ADD     $8, R0
+    // b = b[8:]
+    ADD     $8, R2
+    // goto tail
+    B       tail
+
+// Invariant: we have less than 8 bytes to compare (R6 = R7, R6 < 8).
+lt_8:
+    // Test Bit and Branch if Zero:
+    //   if R6 & 4 != 0 {
+    //     goto lt_4
+    //   }
+    TBZ     $2, R6, lt_4
+    // R4 = a[:4]
+    MOVWU   (R0), R4
+    // R5 = b[:4]
+    MOVWU   (R2), R5
+    // if R4 != R5 {
+    //   goto cmp
+    // }
+    CMPW    R4, R5
+    BNE     cmp
+    // R6 -= 4
+    SUBS    $4, R6
+    // if R6 == 0 {
+    //   goto samebytes
+    // }
+    BEQ     samebytes
+    // a = a[4:]
+    ADD     $4, R0
+    // b = b[4:]
+    ADD     $4, R2
+
+// Invariant: we have less than 4 bytes to compare (R6 = R7, R6 < 4).
+lt_4:
+    // Test Bit and Branch if Zero:
+    //   if R6 & 2 != 0 {
+    //     goto lt_2
+    //   }
+    TBZ     $1, R6, lt_2
+    // R4 = a[:2]
+    MOVHU   (R0), R4
+    // R5 = b[:2]
+    MOVHU   (R2), R5
+    CMPW    R4, R5
+    // if R4 != R5 {
+    //   goto cmp
+    // }
+    BNE     cmp
+    // a = a[2:]
+    ADD     $2, R0
+    // b = b[2:]
+    ADD     $2, R2
+    // R6 -= 2
+    SUB     $2, R6
+
+// Invariant: we have less than 2 bytes to compare (R6 = R7, R6 < 2).
+lt_2:
+    // if R6 == 0 {
+    //   goto samebytes
+    // }
+    TBZ    $0, R6, samebytes
+
+// Invariant: we have 1 byte to compare (R6 = R7 = 1).
+one:
+    // R4 = a[:1]
+    MOVBU   (R0), R4
+    // R6 = b[:1]
+    MOVBU   (R2), R5
+    // if R4 != R5 {
+    //   goto ret
+    // }
+    CMPW    R4, R5
+    BNE     ret
+
+// Invariant: all R7 bytes matched.
+samebytes:
+    // Return R7
+    MOVD    R7, ret+48(FP)
+    RET
+
+// Invariants:
+//   - the next 8 bytes match (a[:8] == b[:8])
+//   - the following bytes R8 and R9 contain the following 8 bytes (R8 = a[8:16], R9 = b[8:16])
+//   - R8 != R9
+cmpnext:
+    // R6 -= 8
+    SUB     $8, R6
+    // R4 = bits.ReverseBytes64(R8)
+    REV     R8, R4
+    // R5 = bits.ReverseBytes64(R9)
+    REV     R9, R5
+    // goto cmprev
+    B       cmprev
diff --git a/crbytes/common_prefix_bench_test.go b/crbytes/common_prefix_bench_test.go
new file mode 100644
index 0000000..40bb83a
--- /dev/null
+++ b/crbytes/common_prefix_bench_test.go
@@ -0,0 +1,105 @@
+// Copyright 2024 The Cockroach Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the License.
+
+package crbytes
+
+import (
+	"bytes"
+	"math/rand"
+	"slices"
+	"testing"
+)
+
+// Sample benchmark results:
+//
+// linux/amd64, Intel(R) Xeon(R) CPU @ 2.80GHz:
+//
+//	CommonPrefix/small/crbytes-24   8.54ns ± 1%
+//	CommonPrefix/small/generic-24   11.0ns ± 1%
+//	CommonPrefix/small/naive-24     13.6ns ± 1%
+//	CommonPrefix/medium/crbytes-24  13.8ns ± 2%
+//	CommonPrefix/medium/generic-24  26.3ns ± 2%
+//	CommonPrefix/medium/naive-24    31.7ns ± 2%
+//	CommonPrefix/large/crbytes-24    153ns ± 2%
+//	CommonPrefix/large/generic-24    362ns ± 2%
+//	CommonPrefix/large/naive-24      755ns ± 1%
+//
+// darwin/arm64, Apple M1:
+//
+//	CommonPrefix/small/crbytes-10   5.48ns ± 1%
+//	CommonPrefix/small/generic-10   7.02ns ± 7%
+//	CommonPrefix/small/naive-10     9.58ns ± 2%
+//	CommonPrefix/medium/crbytes-10  7.42ns ± 8%
+//	CommonPrefix/medium/generic-10  15.6ns ± 5%
+//	CommonPrefix/medium/naive-10    23.5ns ± 7%
+//	CommonPrefix/large/crbytes-10    125ns ± 4%
+//	CommonPrefix/large/generic-10    249ns ±11%
+//	CommonPrefix/large/naive-10      698ns ± 0%
+func BenchmarkCommonPrefix(b *testing.B) {
+	small := lexicographicSet(4, 16)
+	medium := lexicographicSet(10, 100)
+	large := lexicographicSet(1000, 10000)
+	b.Run("small", func(b *testing.B) {
+		runBenchComparison(b, small)
+	})
+	b.Run("medium", func(b *testing.B) {
+		runBenchComparison(b, medium)
+	})
+	b.Run("large", func(b *testing.B) {
+		runBenchComparison(b, large)
+	})
+}
+
+func runBenchComparison(b *testing.B, input [][]byte) {
+	b.Run("crbytes", func(b *testing.B) {
+		runBench(b, input, CommonPrefix)
+	})
+	b.Run("generic", func(b *testing.B) {
+		runBench(b, input, commonPrefixGeneric)
+	})
+	b.Run("naive", func(b *testing.B) {
+		runBench(b, input, commonPrefixNaive)
+	})
+}
+
+func runBench(b *testing.B, input [][]byte, impl func(a, b []byte) int) {
+	n := len(input)
+	j := 0
+	var sum int
+	for i := 0; i < b.N; i++ {
+		next := j + 1
+		if next >= n {
+			next = 0
+		}
+		sum += impl(input[j], input[next])
+		j = next
+	}
+	b.Logf("average result: %d\n", sum/b.N)
+}
+
+// lexicographicSet returns a lexicographically ordered list of byte slices
+// which all have a common prefix of length minLength, with random bytes (with
+// alphabet size 2) following up to maxLength.
+func lexicographicSet(minLength, maxLength int) [][]byte {
+	const n = 10_000
+	const alphabet = 2
+	prefix := genBytes(minLength, alphabet)
+
+	result := make([][]byte, n)
+	for i := range result {
+		result[i] = slices.Concat(prefix, genBytes(rand.Intn(maxLength-minLength+1), alphabet))
+	}
+	slices.SortFunc(result, bytes.Compare)
+	return result
+}
diff --git a/crbytes/common_prefix_generic.go b/crbytes/common_prefix_generic.go
new file mode 100644
index 0000000..cf107a5
--- /dev/null
+++ b/crbytes/common_prefix_generic.go
@@ -0,0 +1,22 @@
+// Copyright 2024 The Cockroach Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the License.
+
+//go:build !amd64 && !arm64
+
+package crbytes
+
+// CommonPrefix returns the longest prefix shared by the two slices.
+func CommonPrefix(a, b []byte) int {
+	return commonPrefixGeneric(a, b)
+}
diff --git a/crbytes/common_prefix_native.go b/crbytes/common_prefix_native.go
new file mode 100644
index 0000000..2d31bac
--- /dev/null
+++ b/crbytes/common_prefix_native.go
@@ -0,0 +1,22 @@
+// Copyright 2024 The Cockroach Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the License.
+
+//go:build amd64 || arm64
+
+package crbytes
+
+// CommonPrefix returns the longest prefix shared by the two slices.
+//
+//go:noescape
+func CommonPrefix(a, b []byte) int
diff --git a/crbytes/common_prefix_test.go b/crbytes/common_prefix_test.go
new file mode 100644
index 0000000..71cd96c
--- /dev/null
+++ b/crbytes/common_prefix_test.go
@@ -0,0 +1,86 @@
+// Copyright 2024 The Cockroach Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the License.
+
+package crbytes
+
+import (
+	"bytes"
+	"math/rand"
+	"testing"
+)
+
+func TestCommonPrefixAllLengths(t *testing.T) {
+	// Construct cases with each length up to a certain size.
+	for l := 0; l <= 256; l++ {
+		for k := 0; k <= l; k++ {
+			a := bytes.Repeat([]byte("x"), l)
+			b := bytes.Repeat([]byte("x"), l)
+			if k < l {
+				b[k] = '0'
+			}
+			if res := CommonPrefix(a, b); res != k {
+				t.Errorf("length=%d expected=%d result=%d\n", l, k, res)
+			}
+			// Always test the generic implementation too.
+			if res := commonPrefixGeneric(a, b); res != k {
+				t.Errorf("length=%d expected=%d result=%d\n", l, k, res)
+			}
+		}
+	}
+}
+
+func TestCommonPrefixRand(t *testing.T) {
+	for _, tc := range []struct {
+		maxLen   int
+		alphabet int
+	}{
+		{maxLen: 4, alphabet: 2},
+		{maxLen: 100, alphabet: 2},
+		{maxLen: 200, alphabet: 2},
+		{maxLen: 10, alphabet: 4},
+		{maxLen: 500, alphabet: 4},
+		{maxLen: 10, alphabet: 26},
+		{maxLen: 500, alphabet: 26},
+	} {
+		for n := 0; n < 1000; n++ {
+			a := genBytes(rand.Intn(tc.maxLen+1), tc.alphabet)
+			b := genBytes(rand.Intn(tc.maxLen+1), tc.alphabet)
+			expected := commonPrefixNaive(a, b)
+			if res := CommonPrefix(a, b); res != expected {
+				t.Errorf("%q %q expected=%d result=%d\n", a, b, expected, res)
+			}
+			// Always test the generic implementation too.
+			if res := commonPrefixGeneric(a, b); res != expected {
+				t.Errorf("%q %q expected=%d result=%d\n", a, b, expected, res)
+			}
+		}
+	}
+}
+
+func commonPrefixNaive(a, b []byte) int {
+	n := min(len(a), len(b))
+	i := 0
+	for i < n && a[i] == b[i] {
+		i++
+	}
+	return i
+}
+
+func genBytes(length int, alphabet int) []byte {
+	a := make([]byte, length)
+	for i := range a {
+		a[i] = 'a' + byte(rand.Intn(alphabet))
+	}
+	return a
+}