diff --git a/client.go b/client.go index ea8ee736..4c53db3e 100644 --- a/client.go +++ b/client.go @@ -68,7 +68,7 @@ func WithPlatform(platform string) Option { } // GetImageFromSource returns an image from the explicitly provided source. -func GetImageFromSource(ctx context.Context, imgStr string, source image.Source, options ...Option) (*image.Image, error) { +func GetImageFromSource(ctx context.Context, imgStr string, source image.Source, filter image.PathFilter, options ...Option) (*image.Image, error) { log.Debugf("image: source=%+v location=%+v", source, imgStr) var cfg config @@ -91,7 +91,7 @@ func GetImageFromSource(ctx context.Context, imgStr string, source image.Source, return nil, fmt.Errorf("unable to use %s source: %w", source, err) } - err = img.Read() + err = img.Read(filter) if err != nil { return nil, fmt.Errorf("could not read image: %+v", err) } @@ -170,12 +170,12 @@ func defaultPlatformIfNil(cfg *config) { // GetImage parses the user provided image string and provides an image object; // note: the source where the image should be referenced from is automatically inferred. -func GetImage(ctx context.Context, userStr string, options ...Option) (*image.Image, error) { +func GetImage(ctx context.Context, userStr string, filter image.PathFilter, options ...Option) (*image.Image, error) { source, imgStr, err := image.DetectSource(userStr) if err != nil { return nil, err } - return GetImageFromSource(ctx, imgStr, source, options...) + return GetImageFromSource(ctx, imgStr, source, filter, options...) } func SetLogger(logger logger.Logger) { diff --git a/examples/basic.go b/examples/basic.go index 1609304f..c29b8924 100644 --- a/examples/basic.go +++ b/examples/basic.go @@ -33,7 +33,8 @@ func main() { // ./path/to.tar // // This will catalog the file metadata and resolve all squash trees - image, err := stereoscope.GetImage(ctx, os.Args[1]) + filter := func(path string) bool { return true } + image, err := stereoscope.GetImage(ctx, os.Args[1], filter) if err != nil { panic(err) } diff --git a/pkg/image/file_catalog_test.go b/pkg/image/file_catalog_test.go index 7685707f..b7e8efbe 100644 --- a/pkg/image/file_catalog_test.go +++ b/pkg/image/file_catalog_test.go @@ -222,7 +222,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { // we don't need the index itself, just the side effect on the file catalog after indexing _, err := file.NewTarIndex( fixtureTarFile.Name(), - layerTarIndexer(ft, fileCatalog, &size, nil, nil), + layerTarIndexer(ft, fileCatalog, &size, nil, nil, nil), ) require.NoError(t, err) @@ -389,7 +389,7 @@ func TestFileCatalog_GetByBasename(t *testing.T) { // we don't need the index itself, just the side effect on the file catalog after indexing _, err := file.NewTarIndex( fixtureTarFile.Name(), - layerTarIndexer(ft, fileCatalog, &size, nil, nil), + layerTarIndexer(ft, fileCatalog, &size, nil, nil, nil), ) require.NoError(t, err) @@ -493,7 +493,7 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { // we don't need the index itself, just the side effect on the file catalog after indexing _, err := file.NewTarIndex( fixtureTarFile.Name(), - layerTarIndexer(ft, fileCatalog, &size, nil, nil), + layerTarIndexer(ft, fileCatalog, &size, nil, nil, nil), ) require.NoError(t, err) @@ -605,7 +605,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { // we don't need the index itself, just the side effect on the file catalog after indexing _, err := file.NewTarIndex( fixtureTarFile.Name(), - layerTarIndexer(ft, fileCatalog, &size, nil, nil), + layerTarIndexer(ft, fileCatalog, &size, nil, nil, nil), ) require.NoError(t, err) @@ -706,7 +706,7 @@ func TestFileCatalog_GetBasenames(t *testing.T) { // we don't need the index itself, just the side effect on the file catalog after indexing _, err := file.NewTarIndex( fixtureTarFile.Name(), - layerTarIndexer(ft, fileCatalog, &size, nil, nil), + layerTarIndexer(ft, fileCatalog, &size, nil, nil, nil), ) require.NoError(t, err) diff --git a/pkg/image/image.go b/pkg/image/image.go index 188d546a..e429c17c 100644 --- a/pkg/image/image.go +++ b/pkg/image/image.go @@ -185,7 +185,7 @@ func (i *Image) applyOverrideMetadata() error { // Read parses information from the underlying image tar into this struct. This includes image metadata, layer // metadata, layer file trees, and layer squash trees (which implies the image squash tree). -func (i *Image) Read() error { +func (i *Image) Read(filter PathFilter) error { var layers = make([]*Layer, 0) var err error i.Metadata, err = readImageMetadata(i.image) @@ -215,7 +215,7 @@ func (i *Image) Read() error { for idx, v1Layer := range v1Layers { layer := NewLayer(v1Layer) - err := layer.Read(fileCatalog, i.Metadata, idx, i.contentCacheDir) + err := layer.Read(fileCatalog, i.Metadata, idx, i.contentCacheDir, filter) if err != nil { return err } diff --git a/pkg/image/layer.go b/pkg/image/layer.go index f803bca6..4cf79f22 100644 --- a/pkg/image/layer.go +++ b/pkg/image/layer.go @@ -24,6 +24,9 @@ import ( const SingularitySquashFSLayer = "application/vnd.sylabs.sif.layer.v1.squashfs" +// PathFilter decides if a path has to be included in the index +type PathFilter = func(path string) bool + // Layer represents a single layer within a container image. type Layer struct { // layer is the raw layer metadata and content provider from the GCR lib @@ -80,7 +83,7 @@ func (l *Layer) uncompressedTarCache(uncompressedLayersCacheDir string) (string, // Read parses information from the underlying layer tar into this struct. This includes layer metadata, the layer // file tree, and the layer squash tree. -func (l *Layer) Read(catalog *FileCatalog, imgMetadata Metadata, idx int, uncompressedLayersCacheDir string) error { +func (l *Layer) Read(catalog *FileCatalog, imgMetadata Metadata, idx int, uncompressedLayersCacheDir string, filter PathFilter) error { var err error tree := filetree.New() l.Tree = tree @@ -113,7 +116,7 @@ func (l *Layer) Read(catalog *FileCatalog, imgMetadata Metadata, idx int, uncomp l.indexedContent, err = file.NewTarIndex( tarFilePath, - layerTarIndexer(tree, l.fileCatalog, &l.Metadata.Size, l, monitor), + layerTarIndexer(tree, l.fileCatalog, &l.Metadata.Size, l, monitor, filter), ) if err != nil { return fmt.Errorf("failed to read layer=%q tar : %w", l.Metadata.Digest, err) @@ -128,9 +131,9 @@ func (l *Layer) Read(catalog *FileCatalog, imgMetadata Metadata, idx int, uncomp // Walk the more efficient walk if we're blessed with an io.ReaderAt. if ra, ok := r.(io.ReaderAt); ok { - err = file.WalkSquashFS(ra, squashfsVisitor(tree, l.fileCatalog, &l.Metadata.Size, l, monitor)) + err = file.WalkSquashFS(ra, squashfsVisitor(tree, l.fileCatalog, &l.Metadata.Size, l, monitor, filter)) } else { - err = file.WalkSquashFSFromReader(r, squashfsVisitor(tree, l.fileCatalog, &l.Metadata.Size, l, monitor)) + err = file.WalkSquashFSFromReader(r, squashfsVisitor(tree, l.fileCatalog, &l.Metadata.Size, l, monitor, filter)) } if err != nil { return fmt.Errorf("failed to walk layer=%q: %w", l.Metadata.Digest, err) @@ -205,7 +208,7 @@ func (l *Layer) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.Reference return refs, nil } -func layerTarIndexer(ft filetree.Writer, fileCatalog *FileCatalog, size *int64, layerRef *Layer, monitor *progress.Manual) file.TarIndexVisitor { +func layerTarIndexer(ft filetree.Writer, fileCatalog *FileCatalog, size *int64, layerRef *Layer, monitor *progress.Manual, filter PathFilter) file.TarIndexVisitor { builder := filetree.NewBuilder(ft, fileCatalog.Index) return func(index file.TarIndexEntry) error { @@ -220,6 +223,10 @@ func layerTarIndexer(ft filetree.Writer, fileCatalog *FileCatalog, size *int64, }() metadata := file.NewMetadata(entry.Header, contents) + if !filter(metadata.Path) { + return nil + } + // note: the tar header name is independent of surrounding structure, for example, there may be a tar header entry // for /some/path/to/file.txt without any entries to constituent paths (/some, /some/path, /some/path/to ). // This is ok, and the FileTree will account for this by automatically adding directories for non-existing @@ -247,10 +254,14 @@ func layerTarIndexer(ft filetree.Writer, fileCatalog *FileCatalog, size *int64, } } -func squashfsVisitor(ft filetree.Writer, fileCatalog *FileCatalog, size *int64, layerRef *Layer, monitor *progress.Manual) file.SquashFSVisitor { +func squashfsVisitor(ft filetree.Writer, fileCatalog *FileCatalog, size *int64, layerRef *Layer, monitor *progress.Manual, filter PathFilter) file.SquashFSVisitor { builder := filetree.NewBuilder(ft, fileCatalog.Index) return func(fsys fs.FS, path string, d fs.DirEntry) error { + if !filter(path) { + return nil + } + ff, err := fsys.Open(path) if err != nil { return err diff --git a/pkg/image/sif/provider_test.go b/pkg/image/sif/provider_test.go index 0c3901e1..add7298f 100644 --- a/pkg/image/sif/provider_test.go +++ b/pkg/image/sif/provider_test.go @@ -37,6 +37,8 @@ func TestSingularityImageProvider_Provide(t *testing.T) { t.Run(tt.name, func(t *testing.T) { p := NewProviderFromPath(tt.path, file.NewTempDirGenerator("")) + filter := func(path string) bool { return true } + i, err := p.Provide(context.Background(), tt.userMetadata...) t.Cleanup(func() { _ = i.Cleanup() }) @@ -45,7 +47,7 @@ func TestSingularityImageProvider_Provide(t *testing.T) { } if err == nil { - if err := i.Read(); err != nil { + if err := i.Read(filter); err != nil { t.Fatal(err) } } diff --git a/pkg/imagetest/image_fixtures.go b/pkg/imagetest/image_fixtures.go index 51f31862..9946b9c5 100644 --- a/pkg/imagetest/image_fixtures.go +++ b/pkg/imagetest/image_fixtures.go @@ -60,8 +60,9 @@ func PrepareFixtureImage(t testing.TB, source, name string) string { func GetFixtureImage(t testing.TB, source, name string) *image.Image { request := PrepareFixtureImage(t, source, name) + filter := func(path string) bool { return true } - i, err := stereoscope.GetImage(context.TODO(), request) + i, err := stereoscope.GetImage(context.TODO(), request, filter) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, i.Cleanup()) @@ -110,8 +111,9 @@ func skopeoCopyDockerArchiveToPath(t testing.TB, dockerArchivePath, destination func getFixtureImageFromTar(t testing.TB, tarPath string) *image.Image { request := fmt.Sprintf("docker-archive:%s", tarPath) + filter := func(path string) bool { return true } - i, err := stereoscope.GetImage(context.TODO(), request) + i, err := stereoscope.GetImage(context.TODO(), request, filter) require.NoError(t, err) t.Cleanup(func() { diff --git a/test/integration/fixture_image_simple_test.go b/test/integration/fixture_image_simple_test.go index c4323a3a..47133797 100644 --- a/test/integration/fixture_image_simple_test.go +++ b/test/integration/fixture_image_simple_test.go @@ -145,12 +145,13 @@ func BenchmarkSimpleImage_GetImage(b *testing.B) { continue } request := imagetest.PrepareFixtureImage(b, c.source, "image-simple") + filter := func(path string) bool { return true } b.Run(c.source, func(b *testing.B) { var bi *image.Image for i := 0; i < b.N; i++ { - bi, err = stereoscope.GetImage(context.TODO(), request) + bi, err = stereoscope.GetImage(context.TODO(), request, filter) b.Cleanup(func() { require.NoError(b, bi.Cleanup()) }) diff --git a/test/integration/mime_type_detection_test.go b/test/integration/mime_type_detection_test.go index eb088685..d4ef7de9 100644 --- a/test/integration/mime_type_detection_test.go +++ b/test/integration/mime_type_detection_test.go @@ -13,8 +13,9 @@ import ( func TestContentMIMETypeDetection(t *testing.T) { request := imagetest.PrepareFixtureImage(t, "docker-archive", "image-simple") + filter := func(path string) bool { return true } - img, err := stereoscope.GetImage(context.TODO(), request) + img, err := stereoscope.GetImage(context.TODO(), request, filter) assert.NoError(t, err) t.Cleanup(stereoscope.Cleanup) diff --git a/test/integration/oci_registry_source_test.go b/test/integration/oci_registry_source_test.go index feab94d9..0b450f81 100644 --- a/test/integration/oci_registry_source_test.go +++ b/test/integration/oci_registry_source_test.go @@ -32,13 +32,15 @@ func TestOciRegistrySourceMetadata(t *testing.T) { imgStr := "anchore/test_images" ref := fmt.Sprintf("%s@%s", imgStr, digest) - img, err := stereoscope.GetImage(context.TODO(), "registry:"+ref) + filter := func(path string) bool { return true } + + img, err := stereoscope.GetImage(context.TODO(), "registry:"+ref, filter) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, img.Cleanup()) }) - require.NoError(t, img.Read()) + require.NoError(t, img.Read(filter)) assert.Len(t, img.Metadata.RepoDigests, 1) assert.Equal(t, "index.docker.io/"+ref, img.Metadata.RepoDigests[0]) diff --git a/test/integration/platform_test.go b/test/integration/platform_test.go index 1206e62e..148375fb 100644 --- a/test/integration/platform_test.go +++ b/test/integration/platform_test.go @@ -85,7 +85,8 @@ func TestPlatformSelection(t *testing.T) { tt.expectedErr = require.NoError } platformOpt := stereoscope.WithPlatform(platform) - img, err := stereoscope.GetImageFromSource(context.TODO(), imageName, tt.source, platformOpt) + filter := func(path string) bool { return true } + img, err := stereoscope.GetImageFromSource(context.TODO(), imageName, tt.source, filter, platformOpt) tt.expectedErr(t, err) require.NotNil(t, img) @@ -113,9 +114,10 @@ func TestDigestThatNarrowsToOnePlatform(t *testing.T) { source: image.OciRegistrySource, }, } + filter := func(path string) bool { return true } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - img, err := stereoscope.GetImageFromSource(context.TODO(), imageStrWithDigest, tt.source) + img, err := stereoscope.GetImageFromSource(context.TODO(), imageStrWithDigest, tt.source, filter) assert.NoError(t, err) assertArchAndOs(t, img, "linux", "s390x") }) @@ -123,7 +125,8 @@ func TestDigestThatNarrowsToOnePlatform(t *testing.T) { } func TestDefaultPlatformWithOciRegistry(t *testing.T) { - img, err := stereoscope.GetImageFromSource(context.TODO(), "busybox:1.31", image.OciRegistrySource) + filter := func(path string) bool { return true } + img, err := stereoscope.GetImageFromSource(context.TODO(), "busybox:1.31", image.OciRegistrySource, filter) require.NoError(t, err) assertArchAndOs(t, img, "linux", runtime.GOARCH) }