diff --git a/internal/fingerprint/fingerprint.go b/internal/fingerprint/fingerprint.go index 67f889bd..1ea72a6b 100644 --- a/internal/fingerprint/fingerprint.go +++ b/internal/fingerprint/fingerprint.go @@ -1,9 +1,11 @@ package fingerprint import ( + "archive/zip" "bufio" "crypto/md5" // #nosec "fmt" + "io" "log" "os" "path/filepath" @@ -27,7 +29,7 @@ var EXCLUDED_EXT = []string{ ".rst", ".scss", ".sha", ".sha1", ".sha2", ".sha256", ".sln", ".spec", ".sql", ".sub", ".svg", ".svn-base", ".tab", ".template", ".test", ".tex", ".tiff", ".toml", ".ttf", ".txt", ".utf-8", ".vim", ".wav", ".whl", ".woff", ".woff2", ".xht", - ".xhtml", ".xls", ".xlsx", ".xml", ".xpm", ".xsd", ".xul", ".yaml", ".yml", ".wfp", + ".xhtml", ".xls", ".xlsx", ".xpm", ".xsd", ".xul", ".yaml", ".yml", ".wfp", ".editorconfig", ".dotcover", ".pid", ".lcov", ".egg", ".manifest", ".cache", ".coverage", ".cover", ".gem", ".lst", ".pickle", ".pdb", ".gml", ".pot", ".plt", } @@ -109,6 +111,7 @@ func (f FileFingerprint) ToString() string { return fmt.Sprintf("file=%x,%d,%s", f.fingerprint, f.contentLength, path) } + func (f *Fingerprinter) FingerprintFiles(rootPath string, exclusions []string) (Fingerprints, error) { log.Println("Warning: Fingerprinting is beta and may not work as expected.") if len(rootPath) == 0 { @@ -128,20 +131,18 @@ func (f *Fingerprinter) FingerprintFiles(rootPath string, exclusions []string) ( return err } - if !shouldProcessFile(fileInfo, exclusions, path) { - return nil - } - - nbFiles++ - fingerprint, err := computeMD5(path) + fingerprintsZip, err := computeMD5ForFileAndZip(fileInfo, path, exclusions) if err != nil { return err } + if len(fingerprintsZip) != 0 { + fingerprints.Entries = append(fingerprints.Entries, fingerprintsZip...) - fingerprints.Append(fingerprint) + nbFiles += len(fingerprintsZip) - if nbFiles%100 == 0 { - f.spinnerManager.SetSpinnerMessage(spinner, spinnerMessage, fmt.Sprintf("%d", nbFiles)) + if nbFiles%100 == 0 { + f.spinnerManager.SetSpinnerMessage(spinner, spinnerMessage, fmt.Sprintf("%d", nbFiles)) + } } return nil @@ -160,6 +161,32 @@ func (f *Fingerprinter) FingerprintFiles(rootPath string, exclusions []string) ( return fingerprints, err } +func computeMD5ForFileAndZip(fileInfo os.FileInfo, path string, exclusions []string) ([]FileFingerprint, error) { + fingerprints := []FileFingerprint{} + + if !shouldProcessFile(fileInfo, exclusions, path) { + return fingerprints, nil + } + + // Scan the contents of compressed files + // such as .jar and .nupkg + if shouldUnzip(path) { + fingerprintsZip, err := inMemFingerprintingCompressedContent(path, exclusions) + if err != nil { + return nil, err + } + fingerprints = append(fingerprints, fingerprintsZip...) + } + fingerprint, err := computeMD5ForFile(path) + if err != nil { + return nil, err + } + + fingerprints = append(fingerprints, fingerprint) + + return fingerprints, nil +} + func isSymlink(filename string) (bool, error) { info, err := os.Lstat(filename) if err != nil { @@ -169,6 +196,8 @@ func isSymlink(filename string) (bool, error) { return info.Mode()&os.ModeSymlink != 0, nil } +var isSymlinkFunc = isSymlink + func shouldProcessFile(fileInfo os.FileInfo, exclusions []string, path string) bool { if fileInfo.IsDir() { return false @@ -182,15 +211,21 @@ func shouldProcessFile(fileInfo os.FileInfo, exclusions []string, path string) b return false } - isSymlink, err := isSymlink(path) + isSymlink, err := isSymlinkFunc(path) if err != nil { - return false + // Handle error with reading inmem files in windows + if strings.HasSuffix(err.Error(), "The system cannot find the path specified.") { + return true + } + // If we get a "not a directory" error, we can assume it's not a symlink + // otherwise, we don't know, so we return false + return strings.HasSuffix(err.Error(), "not a directory") } return !isSymlink } -func computeMD5(filename string) (FileFingerprint, error) { +func computeMD5ForFile(filename string) (FileFingerprint, error) { data, err := os.ReadFile(filename) if err != nil { return FileFingerprint{}, err @@ -223,8 +258,10 @@ func (f *Fingerprints) Len() int { return len(f.Entries) } +var osCreate = os.Create + func (f *Fingerprints) ToFile(ouputFile string) error { - file, err := os.Create(ouputFile) + file, err := osCreate(ouputFile) if err != nil { return err } @@ -243,6 +280,57 @@ func (f *Fingerprints) ToFile(ouputFile string) error { } -func (f *Fingerprints) Append(fingerprint FileFingerprint) { - f.Entries = append(f.Entries, fingerprint) +var filesToUnzip = []string{".jar", ".nupkg"} + +func shouldUnzip(filename string) bool { + for _, file := range filesToUnzip { + if filepath.Ext(filename) == file { + return true + } + } + + return false +} + +func inMemFingerprintingCompressedContent(filename string, exclusions []string) ([]FileFingerprint, error) { + + r, err := zip.OpenReader(filename) + if err != nil { + return nil, err + } + defer r.Close() + + fingerprints := []FileFingerprint{} + + for _, f := range r.File { + if filepath.IsAbs(f.Name) || strings.HasPrefix(f.Name, "..") { + continue + } + longFileName := filepath.Join(filename, f.Name) // #nosec + + if !shouldProcessFile(f.FileInfo(), exclusions, longFileName) { + continue + } + rc, err := f.Open() + if err != nil { + return nil, err + } + hasher := md5.New() // #nosec + _, err = io.Copy(hasher, rc) // #nosec + if err != nil { + rc.Close() + + return nil, err + } + + fingerprints = append(fingerprints, FileFingerprint{ + path: longFileName, + contentLength: int64(f.UncompressedSize64), + fingerprint: hasher.Sum(nil), + }) + + rc.Close() + } + + return fingerprints, nil } diff --git a/internal/fingerprint/fingerprint_test.go b/internal/fingerprint/fingerprint_test.go index 8319de9c..4099d658 100644 --- a/internal/fingerprint/fingerprint_test.go +++ b/internal/fingerprint/fingerprint_test.go @@ -1,9 +1,11 @@ package fingerprint import ( + "errors" "fmt" "os" "path/filepath" + "strings" "testing" "github.com/stretchr/testify/assert" @@ -39,6 +41,12 @@ func TestIsExcludedFile(t *testing.T) { assert.False(t, isExcludedFile("file.jar"), "Expected .jar to not be excluded") } +var errorString = "mock error" + +// Test errors in symlink +func mockSymlink(filename string) (bool, error) { + return false, fmt.Errorf(errorString) +} func TestShouldProcessFile(t *testing.T) { // Create a temporary directory to use for testing tempDir, err := os.MkdirTemp("", "should-process-file-test") @@ -57,30 +65,80 @@ func TestShouldProcessFile(t *testing.T) { t.Fatalf("Failed to create symbolic link %s: %v", testLink, err) } - // Test with a regular file - fileInfo, err := os.Stat(testFile) - if err != nil { - t.Fatalf("Failed to get file info for %s: %v", testFile, err) - } - if !shouldProcessFile(fileInfo, []string{}, testFile) { - t.Errorf("Expected shouldProcessFile to return true for %s, but it returned false", testFile) - } - - // Test with a symbolic link - linkInfo, err := os.Stat(testLink) - if err != nil { - t.Fatalf("Failed to get file info for %s: %v", testLink, err) - } - - if shouldProcessFile(linkInfo, []string{}, testLink) { - t.Errorf("Expected shouldProcessFile to return false for %s, but it returned true", testLink) + tests := []struct { + name string + filePath string + excludes []string + mock func() + want bool + }{ + { + name: "Test with a regular file", + filePath: testFile, + excludes: []string{}, + mock: func() {}, + want: true, + }, + { + name: "Test with a symbolic link", + filePath: testLink, + excludes: []string{}, + mock: func() {}, + want: false, + }, + { + name: "Test Excluded", + filePath: testFile, + excludes: []string{"**/test.py"}, + mock: func() {}, + want: false, + }, + { + name: "Test with mockSymlink", + filePath: testFile, + excludes: []string{}, + mock: func() { isSymlinkFunc = mockSymlink }, + want: false, + }, + { + name: "Test with errorString: The system cannot find the path specified.", + filePath: testFile, + excludes: []string{}, + mock: func() { errorString = "The system cannot find the path specified." }, + want: true, + }, + { + name: "Test with errorString: not a directory", + filePath: testFile, + excludes: []string{}, + mock: func() { errorString = "not a directory" }, + want: true, + }, + { + name: "Test with generic error", + filePath: testFile, + excludes: []string{}, + mock: func() { errorString = "generic error" }, + want: false, + }, } - // Test Excluded - if shouldProcessFile(fileInfo, []string{"**/test.py"}, testFile) { - t.Errorf("Expected shouldProcessFile to return true for %s, but it returned false", testFile) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + defer func() { errorString = "mock error" }() + tt.mock() + fileInfo, err := os.Stat(tt.filePath) + if err != nil { + t.Fatalf("Failed to get file info for %s: %v", tt.filePath, err) + } + if got := shouldProcessFile(fileInfo, tt.excludes, tt.filePath); got != tt.want { + t.Errorf("Expected shouldProcessFile to return %v for %s, but it returned %v", tt.want, tt.filePath, got) + } + }) } + // Reset isSymlinkFunc and errorString + isSymlinkFunc = isSymlink } func TestNewFingerprinter(t *testing.T) { @@ -140,25 +198,178 @@ func TestFileFingerprintToString(t *testing.T) { func TestComputeMD5(t *testing.T) { // Test file not found - _, err := computeMD5("testdata/fingerprinter/testfile-not-found.py") + _, err := computeMD5ForFile("testdata/fingerprinter/testfile-not-found.py") assert.Error(t, err) // Test file found - entry, err := computeMD5("testdata/fingerprinter/testfile.py") + entry, err := computeMD5ForFile("testdata/fingerprinter/testfile.py") assert.NoError(t, err) entryS := fmt.Sprintf("%x", entry.fingerprint) assert.Equal(t, "72214db4e1e543018d1bafe86ea3b444", entryS) } func TestFingerprintsToFile(t *testing.T) { - fingerprints := Fingerprints{} - fingerprints.Entries = append(fingerprints.Entries, FileFingerprint{path: "path", contentLength: 10, fingerprint: []byte("fingerprint")}) - // Create temp dir - dir, err := os.MkdirTemp("", "test") - assert.NoError(t, err) - defer os.RemoveAll(dir) - // Write fingerprints to file - err = fingerprints.ToFile(dir + "/fingerprints.wfp") - assert.NoError(t, err) + tests := []struct { + name string + outputFile string + setupMock func() + expectedError bool + }{ + { + name: "Successful write", + outputFile: "fingerprints.wfp", + setupMock: func() {}, + expectedError: false, + }, + { + name: "Failed to create file", + setupMock: func() { + osCreate = func(name string) (*os.File, error) { + return nil, errors.New("forced error") + } + }, + outputFile: "test/fingerprints.wfp", + expectedError: true, + }, + { + name: "Failed to write to file", + setupMock: func() { + osCreate = func(name string) (*os.File, error) { + return os.Create("test/fingerprints.wfp") + } + }, + outputFile: "/invalid/path/fingerprints.wfp", + expectedError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Reset osCreate to its original function after each test + defer func() { osCreate = os.Create }() + + // Setup the mock function + tt.setupMock() + + // Create temp dir + dir, err := os.MkdirTemp("", "test") + if err != nil { + t.Fatalf("Failed to create temporary directory: %v", err) + } + defer os.RemoveAll(dir) + + // Create fingerprints + fingerprints := Fingerprints{} + fingerprints.Entries = append(fingerprints.Entries, FileFingerprint{path: "path", contentLength: 10, fingerprint: []byte("fingerprint")}) + // Write fingerprints to file + err = fingerprints.ToFile(filepath.Join(dir, tt.outputFile)) + if tt.expectedError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + }) + } +} + +func TestShouldUnzip(t *testing.T) { + tests := []struct { + name string + filename string + want bool + }{ + { + name: "Should unzip .jar file", + filename: "test.jar", + want: true, + }, + { + name: "Should unzip .nupkg file", + filename: "test.nupkg", + want: true, + }, + { + name: "Should not unzip .txt file", + filename: "test.txt", + want: false, + }, + { + name: "Should not unzip .go file", + filename: "test.go", + want: false, + }, + { + name: "Should pick up .jar file in nested folder", + filename: "deep/folder/test.jar", + want: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := shouldUnzip(tt.filename); got != tt.want { + t.Errorf("shouldUnzip() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestInMemFingerprintingCompressedContent(t *testing.T) { + tests := []struct { + name string + path string + expected int + suffix string + }{ + { + name: "Jar", + path: "testdata/zipfile/jar", + expected: 5, + suffix: "log4j-api-2.18.0.jar", + }, + { + name: "Nupkg", + path: "testdata/zipfile/nupkg", + expected: 22, + suffix: "newtonsoft.json.13.0.3.nupkg", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fingerprinter := NewFingerprinter() + fingerprints, err := fingerprinter.FingerprintFiles(tt.path, []string{}) + assert.NoError(t, err) + assert.NotNil(t, fingerprints) + assert.NotEmpty(t, fingerprints) + assert.Equal(t, tt.expected, fingerprints.Len()) + lastRow := fingerprints.Entries[len(fingerprints.Entries)-1] + assert.True(t, strings.HasSuffix(lastRow.ToString(), tt.suffix)) + }) + } +} + +func TestComputeMD5ForFile(t *testing.T) { + tests := []struct { + name string + file string + wantErr bool + }{ + { + name: "Non-existent file", + file: "non_existent_file.txt", + wantErr: true, + }, + // Add more test cases as needed + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := computeMD5ForFile(tt.file) + if (err != nil) != tt.wantErr { + t.Errorf("computeMD5ForFile() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } } diff --git a/internal/fingerprint/testdata/zipfile/jar/log4j-api-2.18.0.jar b/internal/fingerprint/testdata/zipfile/jar/log4j-api-2.18.0.jar new file mode 100644 index 00000000..b4e933f7 Binary files /dev/null and b/internal/fingerprint/testdata/zipfile/jar/log4j-api-2.18.0.jar differ diff --git a/internal/fingerprint/testdata/zipfile/nupkg/newtonsoft.json.13.0.3.nupkg b/internal/fingerprint/testdata/zipfile/nupkg/newtonsoft.json.13.0.3.nupkg new file mode 100644 index 00000000..5829e3da Binary files /dev/null and b/internal/fingerprint/testdata/zipfile/nupkg/newtonsoft.json.13.0.3.nupkg differ