Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Slices without reflect #12

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 12 additions & 15 deletions hash.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,9 @@ package gohashtree

import (
"fmt"
"reflect"
"unsafe"
)

func _hash(digests *byte, p [][32]byte, count uint32)
func _hash(digests *byte, p *byte, count uint32)

func Hash(digests [][32]byte, chunks [][32]byte) error {
if len(chunks) == 0 {
Expand All @@ -43,15 +41,15 @@ func Hash(digests [][32]byte, chunks [][32]byte) error {
return fmt.Errorf("not enough digest length, need at least %v, got %v", len(chunks)/2, len(digests))
}
if supportedCPU {
_hash(&digests[0][0], chunks, uint32(len(chunks)/2))
_hash(&digests[0][0], &chunks[0][0], uint32(len(chunks)/2))
} else {
sha256_1_generic(digests, chunks)
}
return nil
}

func HashChunks(digests [][32]byte, chunks [][32]byte) {
_hash(&digests[0][0], chunks, uint32(len(chunks)/2))
_hash(&digests[0][0], &chunks[0][0], uint32(len(chunks)/2))
}

func HashByteSlice(digests []byte, chunks []byte) error {
Expand All @@ -69,18 +67,17 @@ func HashByteSlice(digests []byte, chunks []byte) error {
}
// We use an unsafe pointer to cast []byte to [][32]byte. The length and
// capacity of the slice need to be divided accordingly by 32.
header := *(*reflect.SliceHeader)(unsafe.Pointer(&chunks))
header.Len <<= 5
header.Cap <<= 5
chunkedChunks := *(*[][32]byte)(unsafe.Pointer(&header))

if supportedCPU {
_hash(&digests[0], chunkedChunks, uint32(len(chunks)/64))
_hash(&digests[0], &chunks[0], uint32(len(chunks)/64))
} else {
headerDigest := *(*reflect.SliceHeader)(unsafe.Pointer(&digests))
headerDigest.Len <<= 5
headerDigest.Cap <<= 5
chunkedDigest := *(*[][32]byte)(unsafe.Pointer(&headerDigest))
chunkedChunks := make([][32]byte, len(chunks)/32)
for i := range chunkedChunks {
copy(chunkedChunks[i][:], chunks[32*i:32*i+32])
}
chunkedDigest := make([][32]byte, len(digests)/32)
for i := range chunkedDigest {
copy(chunkedDigest[i][:], digests[32*i:32*i+32])
}
sha256_1_generic(chunkedDigest, chunkedChunks)
}
return nil
Expand Down
20 changes: 10 additions & 10 deletions hash_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -783,8 +783,8 @@ TEXT ·_hash(SB), 0, $928-36
JE avx2

MOVQ digests+0(FP), OUTPUT_PTR // digests *[][32]byte
MOVQ p_base+8(FP), DATA_PTR // p [][32]byte
MOVL count+32(FP), NUM_BLKS // NUM_BLKS uint32
MOVQ p+8(FP), DATA_PTR // p *[][32]byte or *[]byte
MOVL count+16(FP), NUM_BLKS // NUM_BLKS uint32

avx1:
CMPL NUM_BLKS, $4
Expand Down Expand Up @@ -1314,9 +1314,9 @@ sha256_1_avx_epilog:

// 8 blocks at a time with AVX2
avx2:
MOVL count+32(FP), NUM_BLKS // NUMBLKS uint32
MOVQ digests+0(FP), OUTPUT_PTR // digests *[][32]byte
MOVQ p_base+8(FP), DATA_PTR // p [][32]byte
MOVL count+16(FP), NUM_BLKS // NUMBLKS uint32
MOVQ digests+0(FP), OUTPUT_PTR // digests *[][32]byte or *[]byte
MOVQ p+8(FP), DATA_PTR // p *[][32]byte or p *[]byte

sha256_8_avx2_loop:
CMPL NUM_BLKS, $8
Expand Down Expand Up @@ -1591,8 +1591,8 @@ sha256_8_avx2_loop:
// AVX 512 section
avx512:
MOVQ digests+0(FP), OUTPUT_PTR
MOVQ p_base+8(FP), DATA_PTR
MOVL count+32(FP), NUM_BLKS
MOVQ p+8(FP), DATA_PTR
MOVL count+16(FP), NUM_BLKS

MOVQ $_DIGEST_16<>(SB), DIGESTAVX512
MOVQ $_PADDING_16<>(SB), PADDINGAVX512
Expand Down Expand Up @@ -2046,9 +2046,9 @@ avx512_loop:

// SHA-ni section
shani:
MOVQ digests+0(FP), OUTPUT_PTR // digests *[][32]byte
MOVQ p_base+8(FP), DATA_PTR // p [][32]byte
MOVL count+32(FP), NUM_BLKS // NUM_BLKS uint32
MOVQ digests+0(FP), OUTPUT_PTR // digests *[][32]byte or *[]byte
MOVQ p+8(FP), DATA_PTR // p *[][32]byte or *[]byte
MOVL count+16(FP), NUM_BLKS // NUM_BLKS uint32

// Golang assembly does not guarantee stack aligned at 16 bytes
MOVQ SP, SAVE_SP
Expand Down
4 changes: 2 additions & 2 deletions hash_arm64.s
Original file line number Diff line number Diff line change
Expand Up @@ -461,8 +461,8 @@ Copied parts are

TEXT ·_hash(SB), 0, $1024-36
MOVD digests+0(FP), OUTPUT_PTR
MOVD p_base+8(FP), DATA_PTR
MOVWU count+32(FP), NUM_BLKS
MOVD p+8(FP), DATA_PTR
MOVWU count+16(FP), NUM_BLKS

MOVBU ·hasShani(SB), check_shani
CBNZ check_shani, shani
Expand Down
48 changes: 48 additions & 0 deletions hash_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,15 @@ func BenchmarkHash_1(b *testing.B) {
}
}

func BenchmarkHash_slice_1(b *testing.B) {
chunks := make([]byte, 64)
digests := make([]byte, 32)
b.ResetTimer()
for i := 0; i < b.N; i++ {
gohashtree.HashByteSlice(digests, chunks)
}
}

func BenchmarkHash_4_minio(b *testing.B) {
chunks := [64 * 4]byte{'A'}
digests := make([][32]byte, 4)
Expand All @@ -336,6 +345,15 @@ func BenchmarkHash_4(b *testing.B) {
}
}

func BenchmarkHash_slice_4(b *testing.B) {
chunks := make([]byte, 8*32)
digests := make([]byte, 4*32)
b.ResetTimer()
for i := 0; i < b.N; i++ {
gohashtree.HashByteSlice(digests, chunks)
}
}

func BenchmarkHash_8_minio(b *testing.B) {
chunks := [64 * 8]byte{'A'}
digests := make([][32]byte, 8)
Expand All @@ -356,6 +374,15 @@ func BenchmarkHash_8(b *testing.B) {
}
}

func BenchmarkHash_slice_8(b *testing.B) {
chunks := make([]byte, 16*32)
digests := make([]byte, 8*32)
b.ResetTimer()
for i := 0; i < b.N; i++ {
gohashtree.HashByteSlice(digests, chunks)
}
}

func BenchmarkHash_16_minio(b *testing.B) {
chunks := [64 * 16]byte{'A'}
digests := make([][32]byte, 16)
Expand All @@ -376,6 +403,15 @@ func BenchmarkHash_16(b *testing.B) {
}
}

func BenchmarkHash_slice_16(b *testing.B) {
chunks := make([]byte, 32*32)
digests := make([]byte, 16*32)
b.ResetTimer()
for i := 0; i < b.N; i++ {
gohashtree.HashByteSlice(digests, chunks)
}
}

func BenchmarkHashLargeList_minio(b *testing.B) {
balances := make([][32]byte, 400000)
for i := 0; i < len(balances); i++ {
Expand All @@ -402,3 +438,15 @@ func BenchmarkHashList(b *testing.B) {
gohashtree.Hash(digests, balances)
}
}

func BenchmarkHashList_slice(b *testing.B) {
balances := make([]byte, 400000*32)
for i := 0; i < len(balances); i += 32 {
balances[i] = byte('A')
}
digests := make([]byte, 200000*32)
b.ResetTimer()
for i := 0; i < b.N; i++ {
gohashtree.HashByteSlice(digests, balances)
}
}