From a2f6cd9265d6d8655f8381e4d0b444c67a4e1240 Mon Sep 17 00:00:00 2001 From: Lukas Rist Date: Sun, 23 Oct 2022 14:47:52 +0200 Subject: [PATCH] depluplicate processing --- hash.go | 39 +------------------------- tlsh.go | 86 ++++++++++++++++++++++++++++++++++----------------------- 2 files changed, 52 insertions(+), 73 deletions(-) diff --git a/hash.go b/hash.go index 0e52b91..826a783 100644 --- a/hash.go +++ b/hash.go @@ -4,17 +4,6 @@ import ( "hash" ) -var salt = [6]byte{2, 3, 5, 7, 11, 13} - -type chunkState struct { - buckets [numBuckets]uint - chunk [windowLength]byte - chunkSlice []byte - fileSize int - checksum byte - chunk3 *[3]byte -} - var _ hash.Hash = &TLSH{} func (t *TLSH) Reset() { @@ -54,33 +43,6 @@ func (t *TLSH) Sum(b []byte) []byte { return t.Binary() } -func (s *chunkState) process() { - s.chunk3[0] = s.chunk[0] - s.chunk3[1] = s.chunk[1] - s.chunk3[2] = s.checksum - s.checksum = pearsonHash(0, s.chunk3) - - s.chunk3[2] = s.chunk[2] - s.buckets[pearsonHash(salt[0], s.chunk3)]++ - - s.chunk3[2] = s.chunk[3] - s.buckets[pearsonHash(salt[1], s.chunk3)]++ - - s.chunk3[1] = s.chunk[2] - s.buckets[pearsonHash(salt[2], s.chunk3)]++ - - s.chunk3[2] = s.chunk[4] - s.buckets[pearsonHash(salt[3], s.chunk3)]++ - - s.chunk3[1] = s.chunk[1] - s.buckets[pearsonHash(salt[4], s.chunk3)]++ - - s.chunk3[1] = s.chunk[3] - s.buckets[pearsonHash(salt[5], s.chunk3)]++ - - copy(s.chunk[1:], s.chunk[0:4]) -} - func (t *TLSH) Write(p []byte) (int, error) { t.state.fileSize += len(p) if len(t.state.chunkSlice) < windowLength { @@ -97,6 +59,7 @@ func (t *TLSH) Write(p []byte) (int, error) { t.state.process() } } + for _, b := range p { t.state.chunk[0] = b t.state.process() diff --git a/tlsh.go b/tlsh.go index 5c28765..0df2519 100644 --- a/tlsh.go +++ b/tlsh.go @@ -260,58 +260,74 @@ func reverse(s [5]byte) [5]byte { return s } -func fillBuckets(r FuzzyReader) ([numBuckets]uint, byte, int, error) { - buckets := [numBuckets]uint{} - chunkSlice := make([]byte, windowLength) - chunk := [windowLength]byte{} - fileSize := 0 - checksum := byte(0) +type chunkState struct { + buckets [numBuckets]uint + chunk [windowLength]byte + chunkSlice []byte + fileSize int + checksum byte + chunk3 *[3]byte +} - n, err := r.Read(chunkSlice) - if err != nil { - return [numBuckets]uint{}, 0, 0, err - } - copy(chunk[:], chunkSlice[0:5]) - chunk = reverse(chunk) - fileSize += n +func (s *chunkState) process() { + s.chunk3[0] = s.chunk[0] + s.chunk3[1] = s.chunk[1] + s.chunk3[2] = s.checksum + s.checksum = pearsonHash(0, s.chunk3) - chunk3 := &[3]byte{} + s.chunk3[2] = s.chunk[2] + s.buckets[pearsonHash(salt[0], s.chunk3)]++ - for { - chunk3[0] = chunk[0] - chunk3[1] = chunk[1] - chunk3[2] = checksum - checksum = pearsonHash(0, chunk3) + s.chunk3[2] = s.chunk[3] + s.buckets[pearsonHash(salt[1], s.chunk3)]++ + + s.chunk3[1] = s.chunk[2] + s.buckets[pearsonHash(salt[2], s.chunk3)]++ - chunk3[2] = chunk[2] - buckets[pearsonHash(salt[0], chunk3)]++ + s.chunk3[2] = s.chunk[4] + s.buckets[pearsonHash(salt[3], s.chunk3)]++ - chunk3[2] = chunk[3] - buckets[pearsonHash(salt[1], chunk3)]++ + s.chunk3[1] = s.chunk[1] + s.buckets[pearsonHash(salt[4], s.chunk3)]++ - chunk3[1] = chunk[2] - buckets[pearsonHash(salt[2], chunk3)]++ + s.chunk3[1] = s.chunk[3] + s.buckets[pearsonHash(salt[5], s.chunk3)]++ - chunk3[2] = chunk[4] - buckets[pearsonHash(salt[3], chunk3)]++ + copy(s.chunk[1:], s.chunk[0:4]) +} - chunk3[1] = chunk[1] - buckets[pearsonHash(salt[4], chunk3)]++ +var salt = [6]byte{2, 3, 5, 7, 11, 13} - chunk3[1] = chunk[3] - buckets[pearsonHash(salt[5], chunk3)]++ +func fillBuckets(r FuzzyReader) ([numBuckets]uint, byte, int, error) { + state := chunkState{} + state.buckets = [numBuckets]uint{} + state.chunkSlice = make([]byte, windowLength) + state.chunk = [windowLength]byte{} + state.fileSize = 0 + state.checksum = byte(0) + + n, err := r.Read(state.chunkSlice) + if err != nil { + return [numBuckets]uint{}, 0, 0, err + } + copy(state.chunk[:], state.chunkSlice[0:5]) + state.chunk = reverse(state.chunk) + state.fileSize += n + + state.chunk3 = &[3]byte{} - copy(chunk[1:], chunk[0:4]) - chunk[0], err = r.ReadByte() + for { + state.process() + state.chunk[0], err = r.ReadByte() if err != nil { if err != io.EOF { return [numBuckets]uint{}, 0, 0, err } break } - fileSize++ + state.fileSize++ } - return buckets, checksum, fileSize, nil + return state.buckets, state.checksum, state.fileSize, nil } // hashCalculate calculate TLSH