diff --git a/go.mod b/go.mod index a5716a3f9..7759bc95f 100644 --- a/go.mod +++ b/go.mod @@ -6,10 +6,10 @@ require ( github.com/agext/levenshtein v1.2.3 github.com/chainguard-dev/clog v1.5.0 github.com/fatih/color v1.17.0 + github.com/gabriel-vasile/mimetype v1.4.5 github.com/google/go-cmp v0.6.0 github.com/google/go-containerregistry v0.20.2 github.com/hillu/go-yara/v4 v4.3.3 - github.com/liamg/magic v0.0.1 github.com/olekukonko/tablewriter v0.0.5 github.com/shirou/gopsutil/v4 v4.24.9 github.com/ulikunitz/xz v0.5.12 @@ -50,5 +50,6 @@ require ( github.com/vbatts/tar-split v0.11.5 // indirect github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect github.com/yusufpapurcu/wmi v1.2.4 // indirect + golang.org/x/net v0.27.0 // indirect golang.org/x/sys v0.26.0 // indirect ) diff --git a/go.sum b/go.sum index 281e5f7ec..455af5e6b 100644 --- a/go.sum +++ b/go.sum @@ -23,6 +23,8 @@ github.com/ebitengine/purego v0.8.0 h1:JbqvnEzRvPpxhCJzJJ2y0RbiZ8nyjccVUrSM3q+Gv github.com/ebitengine/purego v0.8.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= github.com/fatih/color v1.17.0 h1:GlRw1BRJxkpqUCBKzKOw098ed57fEsKeNjpTe3cSjK4= github.com/fatih/color v1.17.0/go.mod h1:YZ7TlrGPkiz6ku9fK3TLD/pl3CpsiFyu8N92HLgmosI= +github.com/gabriel-vasile/mimetype v1.4.5 h1:J7wGKdGu33ocBOhGy0z653k/lFKLFDPJMG8Gql0kxn4= +github.com/gabriel-vasile/mimetype v1.4.5/go.mod h1:ibHel+/kbxn9x2407k1izTA1S81ku1z/DlgOW2QE0M4= github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= @@ -40,8 +42,6 @@ github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfn github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/liamg/magic v0.0.1 h1:Ru22ElY+sCh6RvRTWjQzKKCxsEco8hE0co8n1qe7TBM= -github.com/liamg/magic v0.0.1/go.mod h1:yQkOmZZI52EA+SQ2xyHpVw8fNvTBruF873Y+Vt6S+fk= github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4= github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= @@ -97,6 +97,8 @@ github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGC github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0= github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= +golang.org/x/net v0.27.0 h1:5K3Njcw06/l2y9vpGCSdcxWOYHOUk3dVNGDXN+FvAys= +golang.org/x/net v0.27.0/go.mod h1:dDi0PyhWNoiUOrAS8uXv/vnScO4wnHQO4mj9fn/RytE= golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -117,7 +119,5 @@ gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EV gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo= -gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw= gotest.tools/v3 v3.0.3 h1:4AuOwCGf4lLR9u3YOe2awrHygurzhO/HeQ6laiA6Sx0= gotest.tools/v3 v3.0.3/go.mod h1:Z7Lb0S5l+klDB31fvDQX8ss/FlKDxtlFlw3Oa8Ymbl8= diff --git a/pkg/action/archive.go b/pkg/action/archive.go index d9ba6e85b..b4b53c7fb 100644 --- a/pkg/action/archive.go +++ b/pkg/action/archive.go @@ -12,6 +12,7 @@ import ( "io/fs" "os" "path/filepath" + "regexp" "strings" "sync" @@ -19,6 +20,54 @@ import ( "github.com/ulikunitz/xz" ) +var archiveMap = map[string]bool{ + ".apk": true, + ".bz2": true, + ".bzip2": true, + ".gem": true, + ".gz": true, + ".jar": true, + ".tar.gz": true, + ".tar.xz": true, + ".tar": true, + ".tgz": true, + ".xz": true, + ".zip": true, +} + +// isSupportedArchive returns whether a path can be processed by our archive extractor. +func isSupportedArchive(path string) bool { + return archiveMap[getExt(path)] +} + +// getExt returns the extension of a file path +// and attempts to avoid including fragments of filenames with other dots before the extension. +func getExt(path string) string { + base := filepath.Base(path) + + // Handle files with version numbers in the name + // e.g. file1.2.3.tar.gz -> .tar.gz + re := regexp.MustCompile(`\d+\.\d+\.\d+$`) + base = re.ReplaceAllString(base, "") + + ext := filepath.Ext(base) + + if ext != "" && strings.Contains(base, ".") { + parts := strings.Split(base, ".") + if len(parts) > 2 { + subExt := fmt.Sprintf(".%s%s", parts[len(parts)-2], ext) + if isValidExt := func(ext string) bool { + _, ok := archiveMap[ext] + return ok + }(subExt); isValidExt { + return subExt + } + } + } + + return ext +} + const maxBytes = 1 << 29 // 512MB // extractTar extracts .apk and .tar* archives. diff --git a/pkg/action/archive_test.go b/pkg/action/archive_test.go index 4765b7c10..1734b7e43 100644 --- a/pkg/action/archive_test.go +++ b/pkg/action/archive_test.go @@ -249,3 +249,76 @@ func TestScanArchive(t *testing.T) { t.Errorf("output mismatch: (-want +got):\n%s", diff) } } + +func TestGetExt(t *testing.T) { + tests := []struct { + path string + want string + }{ + { + path: "testdata/file.apk", + want: ".apk", + }, { + path: "testdata/file.jar", + want: ".jar", + }, { + path: "testdata/file.tar", + want: ".tar", + }, { + path: "testdata/file.tgz", + want: ".tgz", + }, { + path: "testdata/file.tar.gz", + want: ".tar.gz", + }, { + path: "testdata/file.tar.xz", + want: ".tar.xz", + }, { + path: "testdata/file.zip", + want: ".zip", + }, { + path: "testdata/file_1.0.0", + want: "", + }, { + path: "testdata/file_1.0.0.apk", + want: ".apk", + }, { + path: "testdata/file_1.0.0.jar", + want: ".jar", + }, { + path: "testdata/file_1.0.0.tar", + want: ".tar", + }, { + path: "testdata/file_1.0.0.tgz", + want: ".tgz", + }, { + path: "testdata/file_1.0.0.tar.gz", + want: ".tar.gz", + }, { + path: "testdata/file_1.0.0.tar.xz", + want: ".tar.xz", + }, { + path: "testdata/file_1.0.0.zip", + want: ".zip", + }, { + path: "testdata/file.a.b.c.tar.gz", + want: ".tar.gz", + }, { + path: "testdata/file_a.b.c.tar.xz", + want: ".tar.xz", + }, { + path: "testdata/file_a.b.0.tar", + want: ".tar", + }, { + path: "testdata/file_no_ext", + want: "", + }, + } + for _, tt := range tests { + t.Run(tt.path, func(t *testing.T) { + if got := getExt(tt.path); got != tt.want { + t.Errorf("Ext() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/pkg/action/programkind.go b/pkg/action/programkind.go deleted file mode 100644 index c1e704ed6..000000000 --- a/pkg/action/programkind.go +++ /dev/null @@ -1,180 +0,0 @@ -// Copyright 2024 Chainguard, Inc. -// SPDX-License-Identifier: Apache-2.0 - -package action - -import ( - "context" - "errors" - "fmt" - "io" - "log/slog" - "os" - "path/filepath" - "regexp" - "strings" - - "github.com/chainguard-dev/clog" - "github.com/liamg/magic" -) - -var archiveMap = map[string]bool{ - ".apk": true, - ".bz2": true, - ".bzip2": true, - ".gem": true, - ".gz": true, - ".jar": true, - ".tar.gz": true, - ".tar.xz": true, - ".tar": true, - ".tgz": true, - ".xz": true, - ".zip": true, -} - -// map from extensions to program kinds. -var extMap = map[string]string{ - ".7z": "", - ".asm": "", - ".bat": "Batch file", - ".c": "C source", - ".cpp": "C++ source", - ".dll": "Windows Dynamic Library", - ".cron": "crontab", - ".crontab": "crontab", - ".expect": "Expect script", - ".fish": "Fish script", - ".gem": "Ruby gem", - ".go": "Go source", - ".gz": "Gzip compressed", - ".h": "C header", - ".html": "", - ".jar": "Java program", - ".java": "Java source", - ".js": "Javascript", - ".json": "Javascript", - ".md": "", - ".php": "PHP file", - ".pl": "PERL script", - ".ps1": "Powershell", - ".py": "Python script", - ".pyc": "Python script (compiled)", - ".rb": "Ruby script", - ".rs": "Rust source", - ".scpt": "compiled AppleScript", - ".scptd": "compiled AppleScript", - ".service": "systemd", - ".sh": "Shell script", - ".ts": "Typescript", - ".yaml": "", - ".yara": "", - ".yml": "", - ".zsh": "Zshell script", -} - -// programKind tries to identify if a path is a program. -func programKind(ctx context.Context, path string) string { - var header [263]byte - logger := clog.FromContext(ctx).With("path", path) - f, err := os.Open(path) - if err != nil { - logger.Error("os.Open", slog.Any("error", err)) - return "" - } - defer f.Close() - - desc := "" - headerString := "" - n, err := io.ReadFull(f, header[:]) - if err == nil || errors.Is(err, io.ErrUnexpectedEOF) { - kind, err := magic.LookupSync(header[:n]) - if err == nil { - desc = kind.Description - } - headerString = string(header[:n]) - } - - // TODO: Is it safe to log unsanitized file stuff? - logger.Debug("magic", slog.String("desc", desc), slog.String("header", headerString), slog.Any("err", err)) - - if found, kind := byExtension(path); found { - return kind - } - - d := strings.ToLower(desc) - switch { - // By magic - case strings.Contains(d, "executable") || - strings.Contains(d, "mach-o") || - strings.Contains(d, "script"): - return desc - // By header string - case strings.Contains(headerString, "import "): - return "Python script" - case strings.HasPrefix(headerString, "#!/bin/ash") || - strings.HasPrefix(headerString, "#!/bin/bash") || - strings.HasPrefix(headerString, "#!/bin/fish") || - strings.HasPrefix(headerString, "#!/bin/sh") || - strings.HasPrefix(headerString, "#!/bin/zsh") || - strings.Contains(headerString, `echo "`) || - strings.Contains(headerString, `if [`) || - strings.Contains(headerString, `grep `) || - strings.Contains(headerString, "if !"): - return "Shell script" - case strings.HasPrefix(headerString, "#!"): - return "script" - case strings.Contains(headerString, "#include <"): - return "C Program" - // By filename or extension - case strings.Contains(path, "systemd"): - return "systemd" - case strings.Contains(path, ".elf"): - return "Linux ELF binary" - case strings.Contains(path, ".xcoff"): - return "XCOFF program" - case strings.Contains(path, ".dylib"): - return "macOS dynamic library" - case strings.HasSuffix(path, "profile"): - return "Shell script" - // the magic library gets these wrong - case strings.HasSuffix(path, ".json"): - return "" - } - return "" -} - -// byExtension returns true, and descriptive file type if the extension is -// known, and false otherwise. -func byExtension(path string) (bool, string) { - ret, ok := extMap[filepath.Ext(path)] - return ok, ret -} - -// getExt returns the extension of a file path -// and attempts to avoid including fragments of filenames with other dots before the extension. -func getExt(path string) string { - base := filepath.Base(path) - - // Handle files with version numbers in the name - // e.g. file1.2.3.tar.gz -> .tar.gz - re := regexp.MustCompile(`\d+\.\d+\.\d+$`) - base = re.ReplaceAllString(base, "") - - ext := filepath.Ext(base) - - if ext != "" && strings.Contains(base, ".") { - parts := strings.Split(base, ".") - if len(parts) > 2 { - subExt := fmt.Sprintf(".%s%s", parts[len(parts)-2], ext) - if isValidExt := func(ext string) bool { - _, ok := archiveMap[ext] - return ok - }(subExt); isValidExt { - return subExt - } - } - } - - return ext -} diff --git a/pkg/action/programkind_test.go b/pkg/action/programkind_test.go deleted file mode 100644 index 11ac0d753..000000000 --- a/pkg/action/programkind_test.go +++ /dev/null @@ -1,212 +0,0 @@ -// Copyright 2024 Chainguard, Inc. -// SPDX-License-Identifier: Apache-2.0 - -package action - -import ( - "fmt" - "testing" - - "github.com/chainguard-dev/clog/slogtest" -) - -func TestProgramKindMagic(_ *testing.T) { - // nop for now -} - -func TestProgramStringMatch(t *testing.T) { - tests := []struct { - filename string - want string - }{{ - filename: "python", - want: "Python script", - }, { - filename: "shell", - want: "Shell script", - }, { - filename: "short", - want: "", - }, { - filename: "empty", - want: "", - }, { - filename: "rando", // generated with : `head -c 1024 pkg/action/testdata/rando` - }, { - filename: "juttu", - want: "", - }} - for _, tt := range tests { - t.Run(tt.filename, func(t *testing.T) { - ctx := slogtest.Context(t) - got := programKind(ctx, fmt.Sprintf("testdata/%s", tt.filename)) - if got != tt.want { - t.Errorf("programKind() = %v, want %v", got, tt.want) - } - }) - } -} - -func TestProgramKindExtensions(t *testing.T) { - tests := []struct { - filename string - want string - notFound bool // true if the file extension is not found in the map - }{{ - filename: "applescript.scpt", - want: "compiled AppleScript", - }, { - filename: "applescript.scptd", - want: "compiled AppleScript", - }, { - filename: "shell.sh", - want: "Shell script", - }, { - filename: "ruby.rb", - want: "Ruby script", - }, { - filename: "python.py", - want: "Python script", - }, { - filename: "perl.pl", - want: "PERL script", - }, { - filename: "yara.yara", - want: "", - }, { - filename: "expect.expect", - want: "Expect script", - }, { - filename: "php.php", - want: "PHP file", - }, { - filename: "html.html", - want: "", - }, { - filename: "javascript.js", - want: "Javascript", - }, { - filename: "typescript.ts", - want: "Typescript", - }, { - filename: "7z.7z", - want: "", - }, { - filename: "json.json", - want: "Javascript", - }, { - filename: "yaml.yml", - want: "", - }, { - filename: "yaml.yaml", - want: "", - }, { - filename: "java.java", - want: "Java source", - }, { - filename: "java.jar", - want: "Java program", - }, { - filename: "asm.asm", - want: "", - }, { - filename: "systemd.service", - want: "systemd", - }, { - filename: "crontab.cron", - want: "crontab", - }, { - filename: "crontab.crontab", - want: "crontab", - }, { - filename: "c.c", - want: "C source", - }, { - filename: "juttu.juttu", - notFound: true, - }} - for _, tt := range tests { - t.Run(tt.filename, func(t *testing.T) { - exists, kind := byExtension(tt.filename) - if exists != !tt.notFound { - t.Errorf("byExtension(%s) exists = %v, want %v", tt.filename, exists, !tt.notFound) - } - if kind != tt.want { - t.Errorf("byExtension(%s) kind = %v, want %v", tt.filename, kind, tt.want) - } - }) - } -} - -func TestGetExt(t *testing.T) { - tests := []struct { - path string - want string - }{ - { - path: "testdata/file.apk", - want: ".apk", - }, { - path: "testdata/file.jar", - want: ".jar", - }, { - path: "testdata/file.tar", - want: ".tar", - }, { - path: "testdata/file.tgz", - want: ".tgz", - }, { - path: "testdata/file.tar.gz", - want: ".tar.gz", - }, { - path: "testdata/file.tar.xz", - want: ".tar.xz", - }, { - path: "testdata/file.zip", - want: ".zip", - }, { - path: "testdata/file_1.0.0", - want: "", - }, { - path: "testdata/file_1.0.0.apk", - want: ".apk", - }, { - path: "testdata/file_1.0.0.jar", - want: ".jar", - }, { - path: "testdata/file_1.0.0.tar", - want: ".tar", - }, { - path: "testdata/file_1.0.0.tgz", - want: ".tgz", - }, { - path: "testdata/file_1.0.0.tar.gz", - want: ".tar.gz", - }, { - path: "testdata/file_1.0.0.tar.xz", - want: ".tar.xz", - }, { - path: "testdata/file_1.0.0.zip", - want: ".zip", - }, { - path: "testdata/file.a.b.c.tar.gz", - want: ".tar.gz", - }, { - path: "testdata/file_a.b.c.tar.xz", - want: ".tar.xz", - }, { - path: "testdata/file_a.b.0.tar", - want: ".tar", - }, { - path: "testdata/file_no_ext", - want: "", - }, - } - for _, tt := range tests { - t.Run(tt.path, func(t *testing.T) { - if got := getExt(tt.path); got != tt.want { - t.Errorf("getExt() = %v, want %v", got, tt.want) - } - }) - } -} diff --git a/pkg/action/scan.go b/pkg/action/scan.go index 69c355a35..c97529b1d 100644 --- a/pkg/action/scan.go +++ b/pkg/action/scan.go @@ -18,6 +18,7 @@ import ( "github.com/chainguard-dev/clog" "github.com/chainguard-dev/malcontent/pkg/compile" "github.com/chainguard-dev/malcontent/pkg/malcontent" + "github.com/chainguard-dev/malcontent/pkg/programkind" "github.com/chainguard-dev/malcontent/pkg/render" "github.com/chainguard-dev/malcontent/pkg/report" @@ -90,15 +91,21 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF logger := clog.FromContext(ctx) var mrs yara.MatchRules logger = logger.With("path", path) - kind := programKind(ctx, path) - logger = logger.With("kind", kind) - logger.Info("scanning") - if !c.IncludeDataFiles && kind == "" { - // logger.Info("not a program") - return &malcontent.FileReport{Skipped: "data file", Path: path}, nil + + mime := "" + kind, err := programkind.File(path) + if err != nil { + logger.Errorf("file type failure: %s: %s", path, err) + } + if kind != nil { + mime = kind.MIME } + if !c.IncludeDataFiles && kind == nil { + logger.Infof("skipping %s [%s]: data file or empty", path, mime) + return &malcontent.FileReport{Skipped: "data file or empty", Path: path}, nil + } + logger = logger.With("mime", mime) - logger.Debug("calling YARA ScanFile") f, err := os.Open(path) if err != nil { return nil, err @@ -137,11 +144,6 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF return &fr, nil } -// isSupportedArchive returns whether a path can be processed by our archive extractor. -func isSupportedArchive(path string) bool { - return archiveMap[getExt(path)] -} - // errIfMatch generates the right error if a match is encountered. func errIfHitOrMiss(frs *sync.Map, kind string, scanPath string, errIfHit bool, errIfMiss bool) error { var ( diff --git a/pkg/programkind/programkind.go b/pkg/programkind/programkind.go new file mode 100644 index 000000000..965668da8 --- /dev/null +++ b/pkg/programkind/programkind.go @@ -0,0 +1,158 @@ +// Copyright 2024 Chainguard, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package programkind + +import ( + "errors" + "fmt" + "io" + "os" + "path/filepath" + "strings" + + "github.com/gabriel-vasile/mimetype" +) + +// file extension to MIME type, if it's a good scanning target. +var supportedKind = map[string]string{ + "7z": "", + "asm": "", + "bash": "application/x-bsh", + "bat": "application/bat", + "bin": "application/octet-stream", + "c": "text/x-c", + "cc": "text/x-c", + "com": "application/octet-stream", + "cpp": "text/x-c", + "cron": "text/x-cron", + "crontab": "text/x-crontab", + "csh": "application/x-csh", + "cxx": "text/x-c", + "dll": "application/octet-stream", + "dylib": "application/x-sharedlib", + "elf": "application/x-elf", + "exe": "application/octet-stream", + "expect": "text/x-expect", + "fish": "text/x-fish", + "go": "text/x-go", + "h": "text/x-h", + "hh": "text/x-h", + "html": "", + "java": "text/x-java", + "js": "application/javascript", + "json": "application/json", + "lnk": "application/x-ms-shortcut", + "lua": "text/x-lua", + "macho": "application/x-mach-binary", + "md": "", + "o": "application/octet-stream", + "php": "text/x-php", + "pl": "text/x-perl", + "pm": "text/x-script.perl-module", + "ps1": "text/x-powershell", + "py": "text/x-python", + "pyc": "application/x-python-code", + "rb": "text/x-ruby", + "rs": "text/x-rust", + "script": "text/x-generic-script", + "scpt": "application/x-applescript", + "scptd": "application/x-applescript", + "service": "text/x-systemd", + "sh": "application/x-sh", + "so": "application/x-sharedlib", + "ts": "application/typescript", + "yaml": "", + "yara": "", + "yml": "", + "zsh": "application/x-zsh", +} + +type FileType struct { + Ext string + MIME string +} + +func makeFileType(path string, ext string, mime string) *FileType { + ext = strings.TrimPrefix(ext, ".") + if supportedKind[ext] == "" { + return nil + } + + // fix mimetype bug that defaults elf binaries to x-sharedlib + if mime == "application/x-sharedlib" && !strings.Contains(path, ".so") { + return Path(".elf") + } + + if strings.Contains(mime, "application") || strings.Contains(mime, "text/x-") || strings.Contains(mime, "text/x-") || strings.Contains(mime, "executable") { + return &FileType{MIME: mime, Ext: ext} + } + + return nil +} + +// File detects what kind of program this file might be. +func File(path string) (*FileType, error) { + // first strategy: mimetype + mtype, err := mimetype.DetectFile(path) + if err == nil { + if ft := makeFileType(path, mtype.Extension(), mtype.String()); ft != nil { + return ft, nil + } + } + + // second strategy: path (extension, mostly) + if mtype := Path(path); mtype != nil { + return mtype, nil + } + + // read file header + var hdr [256]byte + f, err := os.Open(path) + if err != nil { + return nil, fmt.Errorf("open: %w", err) + } + defer f.Close() + + _, err = io.ReadFull(f, hdr[:]) + if err != nil && !errors.Is(err, io.ErrUnexpectedEOF) { + return nil, fmt.Errorf("read: %w", err) + } + + // final strategy: DIY matching where mimetype is too strict. + s := string(hdr[:]) + switch { + case hdr[0] == '\x7f' && hdr[1] == 'E' || hdr[2] == 'L' || hdr[3] == 'F': + return Path(".elf"), nil + case strings.Contains(s, " + * @author Tomas V.V.Cox + * @copyright 1997-2009 The Authors + * @license http://opensource.org/licenses/bsd-license.php New BSD License + * @link http://pear.php.net/package/PEAR + */ + +/** + * @nodep Gtk + */ +//the space is needed for windows include paths with trailing backslash +// http://pear.php.net/bugs/bug.php?id=19482 +if ('/opt/homebrew/Cellar/php/8.3.12/share/php/pear ' != '@'.'include_path'.'@ ') { + ini_set('include_path', trim('/opt/homebrew/Cellar/php/8.3.12/share/php/pear '). PATH_SEPARATOR . get_include_path()); + $raw = false; +} else { + // this is a raw, uninstalled pear, either a cvs checkout, or php distro + ini_set('include_path', __DIR__ . PATH_SEPARATOR . get_include_path()); + $raw = true; +} +define('PEAR_RUNTYPE', 'pecl'); +require_once 'pearcmd.php'; +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * indent-tabs-mode: nil + * mode: php + * End: + */ +// vim600:syn=php + +?> diff --git a/pkg/programkind/testdata/snmpd b/pkg/programkind/testdata/snmpd new file mode 100755 index 000000000..07c90f31c --- /dev/null +++ b/pkg/programkind/testdata/snmpd @@ -0,0 +1,2 @@ +#!/bin/sh +echo "hello" diff --git a/pkg/programkind/testdata/test.pl b/pkg/programkind/testdata/test.pl new file mode 100755 index 000000000..94118b0ff --- /dev/null +++ b/pkg/programkind/testdata/test.pl @@ -0,0 +1,5 @@ +#!/usr/bin/perl -w +# this is a test +use strict; +use Getopt::Std; +getopts('cf:h'); diff --git a/pkg/programkind/testdata/test.sh b/pkg/programkind/testdata/test.sh new file mode 100644 index 000000000..2281ebed2 --- /dev/null +++ b/pkg/programkind/testdata/test.sh @@ -0,0 +1,2 @@ +#!/bin/bash -x +echo "hello-bash" diff --git a/pkg/programkind/testdata/tiny b/pkg/programkind/testdata/tiny new file mode 100644 index 000000000..44bc3981f Binary files /dev/null and b/pkg/programkind/testdata/tiny differ diff --git a/test_data/windows/2024.black_basta/dropper.lnk.simple b/test_data/windows/2024.black_basta/dropper.lnk.simple index e69de29bb..4083981c7 100644 --- a/test_data/windows/2024.black_basta/dropper.lnk.simple +++ b/test_data/windows/2024.black_basta/dropper.lnk.simple @@ -0,0 +1,2 @@ +# windows/2024.black_basta/dropper.lnk +process/chdir