diff --git a/app/tlsh.go b/app/tlsh.go index ef59754..f97c533 100644 --- a/app/tlsh.go +++ b/app/tlsh.go @@ -3,21 +3,29 @@ package main import ( "flag" "fmt" + "os" "github.com/glaslos/tlsh" ) func main() { + var file = flag.String("f", "", "path to the file to be hashed") + var raw = flag.Bool("r", false, "set to get only the hash") flag.Parse() - if len(flag.Args()) < 1 { - fmt.Println("Please provide a file path: ./tlsh /tmp/file") + if *file == "" { + fmt.Fprintf(os.Stderr, "Usage of %s [-f ]\n\n", os.Args[0]) + flag.PrintDefaults() + fmt.Println() return } - fileName := flag.Args()[0] - hash, err := tlsh.Hash(fileName) + hash, err := tlsh.Hash(*file) if err != nil { fmt.Println(err) return } - fmt.Printf("%s %s\n", hash, fileName) + if *raw { + fmt.Println(hash) + } else { + fmt.Printf("%s %s\n", hash, *file) + } } diff --git a/tlsh.go b/tlsh.go index a58ab7b..23bfc5d 100644 --- a/tlsh.go +++ b/tlsh.go @@ -1,9 +1,11 @@ package tlsh import ( + "bufio" "fmt" - "io/ioutil" + "io" "math" + "os" ) const ( @@ -215,40 +217,72 @@ func makeStringTLSH(biHash []byte) (hash string) { return } -func fillBuckets(data []byte) (buckets [numBuckets]uint, checksum byte) { - chunk := [windowLength]byte{} - salt := [6]byte{2, 3, 5, 7, 11, 13} - sw := 0 - - for sw <= len(data)-windowLength { +func reverse(s [5]byte) [5]byte { + for i, j := 0, len(s)-1; i < j; i, j = i+1, j-1 { + s[i], s[j] = s[j], s[i] + } + return s +} - for j, x := sw+windowLength-1, 0; j >= sw; j, x = j-1, x+1 { - chunk[x] = data[j] - } +func fillBuckets(r fuzzyReader) ([numBuckets]uint, byte, int, error) { + buckets := [numBuckets]uint{} + chunkSlice := make([]byte, windowLength) + chunk := [5]byte{} + salt := [6]byte{2, 3, 5, 7, 11, 13} + fileSize := 0 + checksum := byte(0) - sw++ + n, err := r.Read(chunkSlice) + if err != nil { + return [numBuckets]uint{}, 0, 0, err + } + copy(chunk[:], chunkSlice[0:5]) + chunk = reverse(chunk) + fileSize += n + for { triplets := getTriplets(chunk) - checksumTriplet := [3]byte{chunk[0], chunk[1], checksum} checksum = pearsonHash(0, checksumTriplet) - for i, triplet := range triplets { buckets[pearsonHash(salt[i], triplet)]++ } + copy(chunk[1:], chunk[0:4]) + chunk[0], err = r.ReadByte() + if err != nil { + if err != io.EOF { + return [numBuckets]uint{}, 0, 0, err + } + break + } + fileSize++ } - return buckets, checksum + return buckets, checksum, fileSize, nil } -//Hash calculates the TLSH for the input file -func Hash(filename string) (hash string, err error) { - data, err := ioutil.ReadFile(filename) +type fuzzyReader interface { + Read([]byte) (int, error) + ReadByte() (byte, error) +} + +//HashReader calculates the TLSH for the input reader +func HashReader(r fuzzyReader) (hash string, err error) { + buckets, checksum, fileSize, err := fillBuckets(r) if err != nil { return } - - buckets, checksum := fillBuckets(data) q1, q2, q3 := quartilePoints(buckets) - hash = makeStringTLSH(hashTLSH(len(data), buckets, checksum, q1, q2, q3)) + hash = makeStringTLSH(hashTLSH(fileSize, buckets, checksum, q1, q2, q3)) return hash, nil } + +//Hash calculates the TLSH for the input file +func Hash(filename string) (hash string, err error) { + f, err := os.Open(filename) + defer f.Close() + if err != nil { + return + } + r := bufio.NewReader(f) + return HashReader(r) +} diff --git a/tlsh_test.go b/tlsh_test.go index 7143d62..2267358 100644 --- a/tlsh_test.go +++ b/tlsh_test.go @@ -1,8 +1,8 @@ package tlsh import ( - "fmt" - "io/ioutil" + "bufio" + "os" "testing" ) @@ -25,12 +25,11 @@ var ( func TestReal(t *testing.T) { for _, tc := range testCases { - if bar, err := Hash(tc.filename); bar != tc.hash { - fmt.Printf("%s\n", bar) + if hash, err := Hash(tc.filename); hash != tc.hash { if err != nil { t.Error(err) } - t.Errorf("\nfilename: %s\n%s\n%s - doesn't match real hash\n", tc.filename, tc.hash, bar) + t.Errorf("\nfilename: %s\n%s\n%s - doesn't match real hash\n", tc.filename, tc.hash, hash) } } } @@ -44,21 +43,30 @@ func BenchmarkPearson(b *testing.B) { } func BenchmarkFillBuckets(b *testing.B) { - data, err := ioutil.ReadFile("tests/test_file_1") + f, err := os.Open("tests/test_file_1") + defer f.Close() if err != nil { b.Error(err) } + f.Seek(0, 0) for n := 0; n < b.N; n++ { - fillBuckets(data) + r := bufio.NewReader(f) + fillBuckets(r) + f.Seek(0, 0) } } func BenchmarkQuartilePoints(b *testing.B) { - data, err := ioutil.ReadFile("tests/test_file_1") + f, err := os.Open("tests/test_file_1") + defer f.Close() + if err != nil { + b.Error(err) + } + r := bufio.NewReader(f) + buckets, _, _, err := fillBuckets(r) if err != nil { b.Error(err) } - buckets, _ := fillBuckets(data) for n := 0; n < b.N; n++ { quartilePoints(buckets) }