diff --git a/internal/dynamicanalysis/analysis.go b/internal/dynamicanalysis/analysis.go index 03e61cab..f6cc629d 100644 --- a/internal/dynamicanalysis/analysis.go +++ b/internal/dynamicanalysis/analysis.go @@ -64,7 +64,7 @@ func Run(ctx context.Context, sb sandbox.Sandbox, command string, args []string, } defer l.Close() - straceResult, err := strace.Parse(l, straceLogger) + straceResult, err := strace.Parse(ctx, l, straceLogger) if err != nil { return resultError, fmt.Errorf("strace parsing failed (%w)", err) } diff --git a/internal/featureflags/features.go b/internal/featureflags/features.go index c6ccbb3f..4fe61859 100644 --- a/internal/featureflags/features.go +++ b/internal/featureflags/features.go @@ -17,4 +17,10 @@ var ( // which may uncover extra malicious behaviour. The names of executed functions, // methods and classes are logged to a file. CodeExecution = new("CodeExecution", false) + + // StraceDebugLogging enables verbose logging of strace parsing during dynamic analysis. + // This feature can only be used in the analysis image, and if enabled, the -strace-logs-dir + // flag must also be set. When enabled, the log files are then accessible via an explicit + // docker mount or copy of the specified directory from the container to the host filesystem. + StraceDebugLogging = new("StraceDebugLogging", false) ) diff --git a/internal/log/log.go b/internal/log/log.go index c23423fb..23500609 100644 --- a/internal/log/log.go +++ b/internal/log/log.go @@ -29,6 +29,10 @@ func (e LoggingEnv) String() string { const ( LoggingEnvDev LoggingEnv = "dev" LoggingEnvProd LoggingEnv = "prod" + + // StraceDebugLogDir is a hardcoded directory that can be used to store + // the strace debug log, if the strace debug logging feature is enabled + StraceDebugLogDir = "/straceLogs" ) var ( diff --git a/internal/strace/strace.go b/internal/strace/strace.go index 94d7dd1d..162204ab 100644 --- a/internal/strace/strace.go +++ b/internal/strace/strace.go @@ -2,6 +2,7 @@ package strace import ( "bufio" + "context" "crypto/sha256" "encoding/hex" "encoding/json" @@ -154,7 +155,7 @@ func (r *Result) recordFileAccess(file string, read, write, del bool) { r.files[file].Delete = r.files[file].Delete || del } -func (r *Result) recordFileWrite(file string, writeBuffer []byte, bytesWritten int64, logger *slog.Logger) error { +func (r *Result) recordFileWrite(file string, writeBuffer []byte, bytesWritten int64) error { r.recordFileAccess(file, false, true, false) if !featureflags.WriteFileContents.Enabled() { // Abort writing file contents when feature is disabled. @@ -220,7 +221,7 @@ func (r *Result) parseEnterSyscall(syscall, args string, logger *slog.Logger) er writeBuffer = args[firstQuoteIndex+1 : lastQuoteIndex] } logger.Debug("write", "path", path, "size", bytesWritten) - return r.recordFileWrite(path, []byte(writeBuffer), bytesWritten, logger) + return r.recordFileWrite(path, []byte(writeBuffer), bytesWritten) } return nil } @@ -323,9 +324,9 @@ func (r *Result) parseExitSyscall(syscall, args string, logger *slog.Logger) err return nil } -// Parse reads an strace and collects the files, sockets and commands that were -// accessed. -func Parse(r io.Reader, logger *slog.Logger) (*Result, error) { +// Parse reads the output from strace and collects the files, sockets and commands that +// were accessed. debugLogger can be used to log verbose information about strace parsing. +func Parse(ctx context.Context, r io.Reader, debugLogger *slog.Logger) (*Result, error) { result := &Result{ files: make(map[string]*FileInfo), sockets: make(map[string]*SocketInfo), @@ -345,18 +346,18 @@ func Parse(r io.Reader, logger *slog.Logger) (*Result, error) { if match != nil { if match[2] == "E" { // Analyze entry events. - if err := result.parseEnterSyscall(match[3], match[4], logger); errors.Is(err, ErrParseFailure) { + if err := result.parseEnterSyscall(match[3], match[4], debugLogger); errors.Is(err, ErrParseFailure) { // Log parsing errors and continue. - logger.Warn("Failed to parse entry syscall", "error", err) + slog.WarnContext(ctx, "Failed to parse entry syscall", "error", err) } else if err != nil { return nil, err } } if match[2] == "X" { // Analyze exit events. - if err := result.parseExitSyscall(match[3], match[4], logger); errors.Is(err, ErrParseFailure) { + if err := result.parseExitSyscall(match[3], match[4], debugLogger); errors.Is(err, ErrParseFailure) { // Log parsing errors and continue. - logger.Warn("Failed to parse exit syscall", "error", err) + slog.WarnContext(ctx, "Failed to parse exit syscall", "error", err) } else if err != nil { return nil, err } diff --git a/internal/strace/strace_test.go b/internal/strace/strace_test.go index 8d2a9e20..25059f5a 100644 --- a/internal/strace/strace_test.go +++ b/internal/strace/strace_test.go @@ -1,6 +1,7 @@ package strace_test import ( + "context" "fmt" "io" "log/slog" @@ -12,12 +13,12 @@ import ( "github.com/ossf/package-analysis/internal/utils" ) -var nopLogger = slog.New(slog.NewTextHandler(io.Discard, &slog.HandlerOptions{})) +var nopLogger = slog.New(slog.NewTextHandler(io.Discard, nil)) func TestIgnoreEntryLogs(t *testing.T) { input := "I1203 05:29:21.585712 173 strace.go:625] [ 2] python3 E creat(0x7f015d7865d0 /tmp/abctest, 0o600)" r := strings.NewReader(input) - res, err := strace.Parse(r, nopLogger) + res, err := strace.Parse(context.Background(), r, nopLogger) if err != nil || res == nil { t.Errorf(`Parse(r) = %v, %v, want _, nil`, res, err) } @@ -36,7 +37,7 @@ func TestParseFileReadThenCreate(t *testing.T) { } r := strings.NewReader(input) - res, err := strace.Parse(r, nopLogger) + res, err := strace.Parse(context.Background(), r, nopLogger) if err != nil || res == nil { t.Errorf(`Parse(r) = %v, %v, want _, nil`, res, err) } @@ -65,7 +66,7 @@ func TestParseFileWriteMultipleWritesToSameFile(t *testing.T) { WriteInfo: writeInfoWantArray} r := strings.NewReader(input) - res, err := strace.Parse(r, nopLogger) + res, err := strace.Parse(context.Background(), r, nopLogger) if err != nil || res == nil { t.Errorf(`Parse(r) = %v, %v, want _, nil`, res, err) } @@ -106,7 +107,7 @@ func TestParseFileWritesToDifferentFiles(t *testing.T) { want := []strace.FileInfo{firstFileWant, secondFileWant} r := strings.NewReader(input) - res, err := strace.Parse(r, nopLogger) + res, err := strace.Parse(context.Background(), r, nopLogger) if err != nil || res == nil { t.Errorf(`Parse(r) = %v, %v, want _, nil`, res, err) } @@ -134,7 +135,7 @@ func TestParseFileWriteWithZeroBytesWritten(t *testing.T) { } r := strings.NewReader(input) - res, err := strace.Parse(r, nopLogger) + res, err := strace.Parse(context.Background(), r, nopLogger) if err != nil || res == nil { t.Errorf(`Parse(r) = %v, %v, want _, nil`, res, err) } @@ -314,7 +315,7 @@ func TestParseFilesOneEntry(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { r := strings.NewReader(test.input) - res, err := strace.Parse(r, nopLogger) + res, err := strace.Parse(context.Background(), r, nopLogger) if err != nil || res == nil { t.Errorf(`Parse(r) = %v, %v, want _, nil`, res, err) } @@ -331,7 +332,7 @@ func TestParseIgnoredSockets(t *testing.T) { "I1206 02:02:36.989375 205 strace.go:622] [ 2] gem X bind(0x5 socket:[1], 0x7f414ed92cf8 {Family: AF_NETLINK, PortID: 0, Groups: 0}, 0xc) = 0x0 (16.276µs)\n" + "I1206 02:02:36.990646 205 strace.go:622] [ 2] gem X connect(0x5 socket:[2], 0x7f414ed93080 {Family: AF_UNSPEC, family addr format unknown}, 0x10) = 0x0 (8.598µs)\n" r := strings.NewReader(input) - res, err := strace.Parse(r, nopLogger) + res, err := strace.Parse(context.Background(), r, nopLogger) if err != nil || res == nil { t.Errorf(`Parse(r) = %v, %v, want _, nil`, res, err) } @@ -408,7 +409,7 @@ func TestParseSocketsOneEntry(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { r := strings.NewReader(test.input) - res, err := strace.Parse(r, nopLogger) + res, err := strace.Parse(context.Background(), r, nopLogger) if err != nil || res == nil { t.Errorf(`Parse(r) = %v, %v, want _, nil`, res, err) } @@ -426,7 +427,7 @@ func TestReallyLongLogLine(t *testing.T) { input := fmt.Sprintf(inputTmpl, strings.Repeat(part, 1000)) r := strings.NewReader(input) - res, err := strace.Parse(r, nopLogger) + res, err := strace.Parse(context.Background(), r, nopLogger) if err != nil || res == nil { t.Fatalf(`Parse(r) = %v, %v, want _, nil`, res, err) } diff --git a/internal/worker/rundynamic.go b/internal/worker/rundynamic.go index 0f8ff5bf..8f87c344 100644 --- a/internal/worker/rundynamic.go +++ b/internal/worker/rundynamic.go @@ -2,9 +2,14 @@ package worker import ( "context" + "fmt" + "io" "log/slog" + "os" + "path/filepath" "runtime" "strconv" + "strings" "time" "github.com/ossf/package-analysis/internal/analysis" @@ -148,11 +153,58 @@ func RunDynamicAnalysis(ctx context.Context, pkg *pkgmanager.Pkg, sbOpts []sandb return result, nil } +// openStraceDebugLogFile creates and returns the file to be used for debug logging of strace parsing +// during a dynamic analysis phase. The file is created with the given filename in log.StraceDebugLogDir. +// It is truncated on open (so a unique name per analysis phase should be used) and is the caller's +// responsibility to close. If strace debug logging is disabled, or some error occurs during creation, +// a nil file pointer is returned, and nothing more need be done by the caller. +func openStraceDebugLogFile(ctx context.Context, name string) *os.File { + if !featureflags.StraceDebugLogging.Enabled() { + return nil + } + + var logDir = log.StraceDebugLogDir + if err := os.MkdirAll(logDir, 0o777); err != nil { + slog.WarnContext(ctx, "could not create directory for strace debug logs", "path", logDir, "error", err) + } + + logPath := filepath.Join(logDir, name) + if logFile, err := os.Create(logPath); err != nil { + slog.WarnContext(ctx, "could not create strace debug log file", "path", logPath, "error", err) + return nil + } else { + return logFile + } +} + +func straceDebugLogFilename(pkg *pkgmanager.Pkg, phase analysisrun.DynamicPhase) string { + filename := fmt.Sprintf("%s-%s", pkg.Ecosystem(), pkg.Name()) + if pkg.Version() != "" { + filename += "-" + pkg.Version() + } + filename += fmt.Sprintf("-%s-strace.log", phase) + + // Protect against e.g. a package name that contains a slash. + // This may cause name collisions, but it's probably fine for a debug log + return strings.ReplaceAll(filename, string(os.PathSeparator), "-") +} + func runDynamicAnalysisPhase(ctx context.Context, pkg *pkgmanager.Pkg, sb sandbox.Sandbox, analysisCmd string, phase analysisrun.DynamicPhase, result *DynamicAnalysisResult) error { phaseCtx := log.ContextWithAttrs(ctx, log.Label("phase", string(phase))) startTime := time.Now() args := dynamicanalysis.MakeAnalysisArgs(pkg, phase) - phaseResult, err := dynamicanalysis.Run(ctx, sb, analysisCmd, args, log.LoggerWithContext(slog.Default(), ctx)) + + straceLogger := slog.New(slog.NewTextHandler(io.Discard, nil)) // default is nop logger + if logFile := openStraceDebugLogFile(phaseCtx, straceDebugLogFilename(pkg, phase)); logFile != nil { + slog.InfoContext(phaseCtx, "strace debug logging enabled") + defer logFile.Close() + + enableDebug := &slog.HandlerOptions{Level: slog.LevelDebug} + straceLogger = slog.New(log.NewContextLogHandler(slog.NewTextHandler(logFile, enableDebug))) + straceLogger.InfoContext(phaseCtx, "running dynamic analysis") + } + + phaseResult, err := dynamicanalysis.Run(phaseCtx, sb, analysisCmd, args, straceLogger) result.LastRunPhase = phase runDuration := time.Since(startTime) slog.InfoContext(phaseCtx, "Dynamic analysis phase finished", diff --git a/scripts/run_analysis.sh b/scripts/run_analysis.sh index bda9aa1b..23c8021c 100755 --- a/scripts/run_analysis.sh +++ b/scripts/run_analysis.sh @@ -5,6 +5,7 @@ STATIC_RESULTS_DIR=${STATIC_RESULTS_DIR:-"/tmp/staticResults"} FILE_WRITE_RESULTS_DIR=${FILE_WRITE_RESULTS_DIR:-"/tmp/writeResults"} ANALYZED_PACKAGES_DIR=${ANALYZED_PACKAGES_DIR:-"/tmp/analyzedPackages"} LOGS_DIR=${LOGS_DIR:-"/tmp/dockertmp"} +STRACE_LOGS_DIR=${STRACE_LOGS_DIR:-"/tmp/straceLogs"} # for pretty printing LINE="-----------------------------------------" @@ -45,6 +46,7 @@ function print_results_dirs { echo "File write results: $FILE_WRITE_RESULTS_DIR" echo "Analyzed package saved: $ANALYZED_PACKAGES_DIR" echo "Debug logs: $LOGS_DIR" + echo "Strace logs: $STRACE_LOGS_DIR" } @@ -81,12 +83,13 @@ while [[ $i -lt $# ]]; do HELP=1 ;; "-local") + # need to create a mount to pass the package archive to the docker image LOCAL=1 i=$((i+1)) # -m preserves invalid/non-existent paths (which will be detected below) PKG_PATH=$(realpath -m "${args[$i]}") if [[ -z "$PKG_PATH" ]]; then - echo "--local specified but no package path given" + echo "-local specified but no package path given" exit 255 fi PKG_FILE=$(basename "$PKG_PATH") @@ -150,7 +153,7 @@ elif is_mount_type overlay /var/lib; then fi -DOCKER_MOUNTS=("-v" "$CONTAINER_MOUNT_DIR:/var/lib/containers" "-v" "$RESULTS_DIR:/results" "-v" "$STATIC_RESULTS_DIR:/staticResults" "-v" "$FILE_WRITE_RESULTS_DIR:/writeResults" "-v" "$LOGS_DIR:/tmp" "-v" "$ANALYZED_PACKAGES_DIR:/analyzedPackages") +DOCKER_MOUNTS=("-v" "$CONTAINER_MOUNT_DIR:/var/lib/containers" "-v" "$RESULTS_DIR:/results" "-v" "$STATIC_RESULTS_DIR:/staticResults" "-v" "$FILE_WRITE_RESULTS_DIR:/writeResults" "-v" "$LOGS_DIR:/tmp" "-v" "$ANALYZED_PACKAGES_DIR:/analyzedPackages" "-v" "$STRACE_LOGS_DIR:/straceLogs") ANALYSIS_IMAGE=gcr.io/ossf-malware-analysis/analysis @@ -222,6 +225,7 @@ mkdir -p "$STATIC_RESULTS_DIR" mkdir -p "$FILE_WRITE_RESULTS_DIR" mkdir -p "$ANALYZED_PACKAGES_DIR" mkdir -p "$LOGS_DIR" +mkdir -p "$STRACE_LOGS_DIR" docker "${DOCKER_OPTS[@]}" "${DOCKER_MOUNTS[@]}" "$ANALYSIS_IMAGE" "${ANALYSIS_ARGS[@]}" @@ -246,6 +250,7 @@ echo $LINE rmdir --ignore-fail-on-non-empty "$FILE_WRITE_RESULTS_DIR" rmdir --ignore-fail-on-non-empty "$ANALYZED_PACKAGES_DIR" rmdir --ignore-fail-on-non-empty "$LOGS_DIR" + rmdir --ignore-fail-on-non-empty "$STRACE_LOGS_DIR" fi echo $LINE