Skip to content

Commit

Permalink
ignore minor parsing error when reading dpkg status files (#786)
Browse files Browse the repository at this point in the history
* ignore minor parsing error when reading dpkg status files

helps with #733

Question: should we add a smarter parser to guess approximate installed-size
value?

Signed-off-by: Jonas Galvão Xavier <[email protected]>

* add datasize lib to help dpkg parsing

added unit tests to expand coverage of dpkg parsing

Signed-off-by: Jonas Galvão Xavier <[email protected]>

* drop parse error

added unit tests to handleNewKeyValue

Signed-off-by: Jonas Galvão Xavier <[email protected]>

* don't return parsing errors from dpkg

Signed-off-by: Jonas Galvão Xavier <[email protected]>

* go mod tidy

Signed-off-by: Jonas Galvão Xavier <[email protected]>

* test higher level functions

Signed-off-by: Jonas Galvão Xavier <[email protected]>

* return parsing err to let cataloger handle it

Signed-off-by: Jonas Galvão Xavier <[email protected]>

* feedback changes

Signed-off-by: Jonas Galvão Xavier <[email protected]>

* ignore key parsing error

log warning with relevant context

Signed-off-by: Jonas Galvão Xavier <[email protected]>

* go mod tidy

Signed-off-by: Jonas Galvão Xavier <[email protected]>

* add context info to log lines

simpler error assertion

Signed-off-by: Jonas Galvão Xavier <[email protected]>

* use error.As to assert error in chain

Signed-off-by: Jonas Galvão Xavier <[email protected]>
  • Loading branch information
jonasagx authored Feb 17, 2022
1 parent e6b5872 commit 4b16737
Show file tree
Hide file tree
Showing 6 changed files with 197 additions and 16 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ require (
github.com/gookit/color v1.2.7
github.com/hashicorp/go-multierror v1.1.0
github.com/jinzhu/copier v0.3.2
github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d // indirect
github.com/mholt/archiver/v3 v3.5.1
github.com/mitchellh/go-homedir v1.1.0
github.com/mitchellh/hashstructure/v2 v2.0.2
Expand Down Expand Up @@ -85,7 +86,6 @@ require (
github.com/mattn/go-colorable v0.0.9 // indirect
github.com/mattn/go-isatty v0.0.6 // indirect
github.com/mattn/go-runewidth v0.0.7 // indirect
github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d // indirect
github.com/nwaples/rardecode v1.1.0 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.0.2 // indirect
Expand Down
3 changes: 2 additions & 1 deletion syft/pkg/cataloger/deb/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ func (c *Cataloger) Catalog(resolver source.FileResolver) ([]pkg.Package, []arti
pkgs, err := parseDpkgStatus(dbContents)
internal.CloseAndLogError(dbContents, dbLocation.VirtualPath)
if err != nil {
return nil, nil, fmt.Errorf("unable to catalog dpkg package=%+v: %w", dbLocation.RealPath, err)
log.Warnf("dpkg cataloger: unable to catalog package=%+v: %w", dbLocation.RealPath, err)
continue
}

for i := range pkgs {
Expand Down
1 change: 0 additions & 1 deletion syft/pkg/cataloger/deb/cataloger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (
)

func TestDpkgCataloger(t *testing.T) {

tests := []struct {
name string
sources map[string][]string
Expand Down
15 changes: 8 additions & 7 deletions syft/pkg/cataloger/deb/parse_dpkg_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ import (
"fmt"
"io"
"regexp"
"strconv"
"strings"

"github.com/anchore/syft/internal"

"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/pkg"
"github.com/dustin/go-humanize"
"github.com/mitchellh/mapstructure"
)

Expand Down Expand Up @@ -133,7 +133,8 @@ func extractAllFields(reader *bufio.Reader) (map[string]interface{}, error) {
var val interface{}
key, val, err = handleNewKeyValue(line)
if err != nil {
return nil, err
log.Warnf("parsing dpkg status: extracting key-value from line: %s err: %v", line, err)
continue
}

if _, ok := dpkgFields[key]; ok {
Expand All @@ -154,21 +155,21 @@ func extractSourceVersion(source string) (string, string) {
}

// handleNewKeyValue parse a new key-value pair from the given unprocessed line
func handleNewKeyValue(line string) (string, interface{}, error) {
func handleNewKeyValue(line string) (key string, val interface{}, err error) {
if i := strings.Index(line, ":"); i > 0 {
var key = strings.TrimSpace(line[0:i])
key = strings.TrimSpace(line[0:i])
// mapstruct cant handle "-"
key = strings.ReplaceAll(key, "-", "")
val := strings.TrimSpace(line[i+1:])

// further processing of values based on the key that was discovered
switch key {
case "InstalledSize":
numVal, err := strconv.Atoi(val)
s, err := humanize.ParseBytes(val)
if err != nil {
return "", nil, fmt.Errorf("bad installed-size value=%q: %w", val, err)
}
return key, numVal, nil
return key, int(s), nil
default:
return key, val, nil
}
Expand Down
161 changes: 155 additions & 6 deletions syft/pkg/cataloger/deb/parse_dpkg_status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,15 @@ package deb

import (
"bufio"
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"testing"

"github.com/anchore/syft/syft/file"
"github.com/stretchr/testify/assert"

"github.com/anchore/syft/syft/pkg"
"github.com/go-test/deep"
Expand All @@ -20,11 +25,13 @@ func compareEntries(t *testing.T, left, right pkg.DpkgMetadata) {

func TestSinglePackage(t *testing.T) {
tests := []struct {
name string
expected pkg.DpkgMetadata
name string
expected pkg.DpkgMetadata
fixturePath string
}{
{
name: "Test Single Package",
name: "Test Single Package",
fixturePath: filepath.Join("test-fixtures", "status", "single"),
expected: pkg.DpkgMetadata{
Package: "apt",
Source: "apt-dev",
Expand Down Expand Up @@ -68,11 +75,22 @@ func TestSinglePackage(t *testing.T) {
},
},
},
}
{
name: "parse storage notation",
fixturePath: filepath.Join("test-fixtures", "status", "installed-size-4KB"),
expected: pkg.DpkgMetadata{
Package: "apt",
Source: "apt-dev",
Version: "1.8.2",
Architecture: "amd64",
InstalledSize: 4000,
Maintainer: "APT Development Team <[email protected]>",
},
}}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
file, err := os.Open("test-fixtures/status/single")
file, err := os.Open(test.fixturePath)
if err != nil {
t.Fatal("Unable to read test_fixtures/single: ", err)
}
Expand Down Expand Up @@ -204,7 +222,6 @@ func TestMultiplePackages(t *testing.T) {
}

func TestSourceVersionExtract(t *testing.T) {

tests := []struct {
name string
input string
Expand Down Expand Up @@ -242,3 +259,135 @@ func TestSourceVersionExtract(t *testing.T) {
})
}
}

func assertAs(expected error) assert.ErrorAssertionFunc {
return func(t assert.TestingT, err error, i ...interface{}) bool {
return assert.ErrorAs(t, err, &expected)
}
}

func Test_parseDpkgStatus(t *testing.T) {
tests := []struct {
name string
input string
want []pkg.Package
wantErr assert.ErrorAssertionFunc
}{
{
name: "no more packages",
input: `Package: apt`,
wantErr: assert.NoError,
},
{
name: "duplicated key",
input: `Package: apt
Package: apt-get
`,
wantErr: assertAs(errors.New("duplicate key discovered: Package")),
},
{
name: "no match for continuation",
input: ` Package: apt
`,
wantErr: assertAs(errors.New("no match for continuation: line: ' Package: apt'")),
},
{
name: "find keys",
input: `Package: apt
Status: install ok installed
Installed-Size: 10kib
`,
want: []pkg.Package{
{
Name: "apt",
Type: "deb",
MetadataType: "DpkgMetadata",
Metadata: pkg.DpkgMetadata{
Package: "apt",
InstalledSize: 10240,
Files: []pkg.DpkgFileRecord{},
},
},
},
wantErr: assert.NoError,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
r := bufio.NewReader(strings.NewReader(tt.input))
got, err := parseDpkgStatus(r)
tt.wantErr(t, err, fmt.Sprintf("parseDpkgStatus"))
assert.Equal(t, tt.want, got)
})
}
}

func Test_handleNewKeyValue(t *testing.T) {
tests := []struct {
name string
line string
wantKey string
wantVal interface{}
wantErr assert.ErrorAssertionFunc
}{
{
name: "cannot parse field",
line: "blabla",
wantErr: assertAs(errors.New("cannot parse field from line: 'blabla'")),
},
{
name: "parse field",
line: "key: val",
wantKey: "key",
wantVal: "val",
wantErr: assert.NoError,
},
{
name: "parse installed size",
line: "InstalledSize: 128",
wantKey: "InstalledSize",
wantVal: 128,
wantErr: assert.NoError,
},
{
name: "parse installed kib size",
line: "InstalledSize: 1kib",
wantKey: "InstalledSize",
wantVal: 1024,
wantErr: assert.NoError,
},
{
name: "parse installed kb size",
line: "InstalledSize: 1kb",
wantKey: "InstalledSize",
wantVal: 1000,
wantErr: assert.NoError,
},
{
name: "parse installed-size mb",
line: "Installed-Size: 1 mb",
wantKey: "InstalledSize",
wantVal: 1000000,
wantErr: assert.NoError,
},
{
name: "fail parsing installed-size",
line: "Installed-Size: 1bla",
wantKey: "",
wantErr: assertAs(fmt.Errorf("unhandled size name: %s", "bla")),
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotKey, gotVal, err := handleNewKeyValue(tt.line)
tt.wantErr(t, err, fmt.Sprintf("handleNewKeyValue(%v)", tt.line))

assert.Equalf(t, tt.wantKey, gotKey, "handleNewKeyValue(%v)", tt.line)
assert.Equalf(t, tt.wantVal, gotVal, "handleNewKeyValue(%v)", tt.line)
})
}
}
31 changes: 31 additions & 0 deletions syft/pkg/cataloger/deb/test-fixtures/status/installed-size-4KB
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
Package: apt
Status: install ok installed
Priority: required
Section: admin
Installed-Size: 4KB
Maintainer: APT Development Team <[email protected]>
Architecture: amd64
Version: 1.8.2
Source: apt-dev
Replaces: apt-transport-https (<< 1.5~alpha4~), apt-utils (<< 1.3~exp2~)
Provides: apt-transport-https (= 1.8.2)
Depends: adduser, gpgv | gpgv2 | gpgv1, debian-archive-keyring, libapt-pkg5.0 (>= 1.7.0~alpha3~), libc6 (>= 2.15), libgcc1 (>= 1:3.0), libgnutls30 (>= 3.6.6), libseccomp2 (>= 1.0.1), libstdc++6 (>= 5.2)
Recommends: ca-certificates
Suggests: apt-doc, aptitude | synaptic | wajig, dpkg-dev (>= 1.17.2), gnupg | gnupg2 | gnupg1, powermgmt-base
Breaks: apt-transport-https (<< 1.5~alpha4~), apt-utils (<< 1.3~exp2~), aptitude (<< 0.8.10)
Conffiles:
Description: commandline package manager
This package provides commandline tools for searching and
managing as well as querying information about packages
as a low-level access to all features of the libapt-pkg library.
.
These include:
* apt-get for retrieval of packages and information about them
from authenticated sources and for installation, upgrade and
removal of packages together with their dependencies
* apt-cache for querying available information about installed
as well as installable packages
* apt-cdrom to use removable media as a source for packages
* apt-config as an interface to the configuration settings
* apt-key as an interface to manage authentication keys

0 comments on commit 4b16737

Please sign in to comment.