Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

programkind: return MIME type & file extension, swap magic library #507

Merged
merged 29 commits into from
Oct 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
ec2fad9
pip: add known good list
tstromberg Sep 21, 2024
2c8e84c
Merge remote-tracking branch 'upstream/main'
tstromberg Sep 21, 2024
4a0ba70
Merge remote-tracking branch 'upstream/main'
tstromberg Sep 26, 2024
9726263
Merge remote-tracking branch 'upstream/main'
tstromberg Oct 2, 2024
bd14230
Merge remote-tracking branch 'upstream/main'
tstromberg Oct 2, 2024
9da0be6
Merge remote-tracking branch 'upstream/main'
tstromberg Oct 6, 2024
c562943
Merge remote-tracking branch 'upstream/main'
tstromberg Oct 7, 2024
126a8b6
Merge remote-tracking branch 'upstream/main'
tstromberg Oct 8, 2024
321e7e3
experimental filetypes work
tstromberg Oct 8, 2024
22a3b93
programkind refactor for filetype support
tstromberg Oct 8, 2024
0f4e2e7
Merge branch 'main' into filetypes
egibs Oct 8, 2024
ce33cb5
further README tunilng
tstromberg Oct 8, 2024
d578da8
Add more tests
tstromberg Oct 8, 2024
364cfd2
Merge remote-tracking branch 'upstream/main'
tstromberg Oct 8, 2024
e4fab83
fix data
tstromberg Oct 8, 2024
7c25671
fix lint issues
tstromberg Oct 8, 2024
f9fc454
Add 'tiny' testcase
tstromberg Oct 8, 2024
22e1261
Handle more Python/PHP edge cases
tstromberg Oct 8, 2024
8408e1e
better error handling
tstromberg Oct 8, 2024
50231d0
Merge branch 'main' into filetypes
tstromberg Oct 8, 2024
29a2233
looser shell matching
tstromberg Oct 8, 2024
6f122e3
improve comment
tstromberg Oct 8, 2024
3eba90e
use the same old janky shell script logic that once worked
tstromberg Oct 8, 2024
a71d841
fix comment
tstromberg Oct 8, 2024
60acd0c
increase amount of header we read
tstromberg Oct 8, 2024
6d25a7f
add export
tstromberg Oct 8, 2024
337a747
Don't require a root-derived path for she-bang scripts
tstromberg Oct 8, 2024
d23b6f3
Assume profile is a script
tstromberg Oct 8, 2024
aa9e141
remove unnecessary newline
tstromberg Oct 8, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ require (
github.com/agext/levenshtein v1.2.3
github.com/chainguard-dev/clog v1.5.0
github.com/fatih/color v1.17.0
github.com/gabriel-vasile/mimetype v1.4.5
github.com/google/go-cmp v0.6.0
github.com/google/go-containerregistry v0.20.2
github.com/hillu/go-yara/v4 v4.3.3
github.com/liamg/magic v0.0.1
github.com/olekukonko/tablewriter v0.0.5
github.com/shirou/gopsutil/v4 v4.24.9
github.com/ulikunitz/xz v0.5.12
Expand Down Expand Up @@ -50,5 +50,6 @@ require (
github.com/vbatts/tar-split v0.11.5 // indirect
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
github.com/yusufpapurcu/wmi v1.2.4 // indirect
golang.org/x/net v0.27.0 // indirect
golang.org/x/sys v0.26.0 // indirect
)
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ github.com/ebitengine/purego v0.8.0 h1:JbqvnEzRvPpxhCJzJJ2y0RbiZ8nyjccVUrSM3q+Gv
github.com/ebitengine/purego v0.8.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
github.com/fatih/color v1.17.0 h1:GlRw1BRJxkpqUCBKzKOw098ed57fEsKeNjpTe3cSjK4=
github.com/fatih/color v1.17.0/go.mod h1:YZ7TlrGPkiz6ku9fK3TLD/pl3CpsiFyu8N92HLgmosI=
github.com/gabriel-vasile/mimetype v1.4.5 h1:J7wGKdGu33ocBOhGy0z653k/lFKLFDPJMG8Gql0kxn4=
github.com/gabriel-vasile/mimetype v1.4.5/go.mod h1:ibHel+/kbxn9x2407k1izTA1S81ku1z/DlgOW2QE0M4=
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
Expand All @@ -40,8 +42,6 @@ github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfn
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/liamg/magic v0.0.1 h1:Ru22ElY+sCh6RvRTWjQzKKCxsEco8hE0co8n1qe7TBM=
github.com/liamg/magic v0.0.1/go.mod h1:yQkOmZZI52EA+SQ2xyHpVw8fNvTBruF873Y+Vt6S+fk=
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4=
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
Expand Down Expand Up @@ -97,6 +97,8 @@ github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGC
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
golang.org/x/net v0.27.0 h1:5K3Njcw06/l2y9vpGCSdcxWOYHOUk3dVNGDXN+FvAys=
golang.org/x/net v0.27.0/go.mod h1:dDi0PyhWNoiUOrAS8uXv/vnScO4wnHQO4mj9fn/RytE=
golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
Expand All @@ -117,7 +119,5 @@ gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EV
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo=
gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw=
gotest.tools/v3 v3.0.3 h1:4AuOwCGf4lLR9u3YOe2awrHygurzhO/HeQ6laiA6Sx0=
gotest.tools/v3 v3.0.3/go.mod h1:Z7Lb0S5l+klDB31fvDQX8ss/FlKDxtlFlw3Oa8Ymbl8=
49 changes: 49 additions & 0 deletions pkg/action/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,62 @@ import (
"io/fs"
"os"
"path/filepath"
"regexp"
"strings"
"sync"

"github.com/chainguard-dev/clog"
"github.com/ulikunitz/xz"
)

var archiveMap = map[string]bool{
".apk": true,
".bz2": true,
".bzip2": true,
".gem": true,
".gz": true,
".jar": true,
".tar.gz": true,
".tar.xz": true,
".tar": true,
".tgz": true,
".xz": true,
".zip": true,
}

// isSupportedArchive returns whether a path can be processed by our archive extractor.
func isSupportedArchive(path string) bool {
return archiveMap[getExt(path)]
}

// getExt returns the extension of a file path
// and attempts to avoid including fragments of filenames with other dots before the extension.
func getExt(path string) string {
base := filepath.Base(path)

// Handle files with version numbers in the name
// e.g. file1.2.3.tar.gz -> .tar.gz
re := regexp.MustCompile(`\d+\.\d+\.\d+$`)
base = re.ReplaceAllString(base, "")

ext := filepath.Ext(base)

if ext != "" && strings.Contains(base, ".") {
parts := strings.Split(base, ".")
if len(parts) > 2 {
subExt := fmt.Sprintf(".%s%s", parts[len(parts)-2], ext)
if isValidExt := func(ext string) bool {
_, ok := archiveMap[ext]
return ok
}(subExt); isValidExt {
return subExt
}
}
}

return ext
}

const maxBytes = 1 << 29 // 512MB

// extractTar extracts .apk and .tar* archives.
Expand Down
73 changes: 73 additions & 0 deletions pkg/action/archive_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,3 +249,76 @@ func TestScanArchive(t *testing.T) {
t.Errorf("output mismatch: (-want +got):\n%s", diff)
}
}

func TestGetExt(t *testing.T) {
tests := []struct {
path string
want string
}{
{
path: "testdata/file.apk",
want: ".apk",
}, {
path: "testdata/file.jar",
want: ".jar",
}, {
path: "testdata/file.tar",
want: ".tar",
}, {
path: "testdata/file.tgz",
want: ".tgz",
}, {
path: "testdata/file.tar.gz",
want: ".tar.gz",
}, {
path: "testdata/file.tar.xz",
want: ".tar.xz",
}, {
path: "testdata/file.zip",
want: ".zip",
}, {
path: "testdata/file_1.0.0",
want: "",
}, {
path: "testdata/file_1.0.0.apk",
want: ".apk",
}, {
path: "testdata/file_1.0.0.jar",
want: ".jar",
}, {
path: "testdata/file_1.0.0.tar",
want: ".tar",
}, {
path: "testdata/file_1.0.0.tgz",
want: ".tgz",
}, {
path: "testdata/file_1.0.0.tar.gz",
want: ".tar.gz",
}, {
path: "testdata/file_1.0.0.tar.xz",
want: ".tar.xz",
}, {
path: "testdata/file_1.0.0.zip",
want: ".zip",
}, {
path: "testdata/file.a.b.c.tar.gz",
want: ".tar.gz",
}, {
path: "testdata/file_a.b.c.tar.xz",
want: ".tar.xz",
}, {
path: "testdata/file_a.b.0.tar",
want: ".tar",
}, {
path: "testdata/file_no_ext",
want: "",
},
}
for _, tt := range tests {
t.Run(tt.path, func(t *testing.T) {
if got := getExt(tt.path); got != tt.want {
t.Errorf("Ext() = %v, want %v", got, tt.want)
}
})
}
}
180 changes: 0 additions & 180 deletions pkg/action/programkind.go

This file was deleted.

Loading