From 0f94ba9cffed51df2521dc8cecd68622a38c0445 Mon Sep 17 00:00:00 2001 From: Matt Rutkowski Date: Wed, 28 Jun 2023 23:55:25 -0500 Subject: [PATCH] Add CSV output for validation errors and further streamline formatting code path (#43) * Assure txt format still prints schema errors with quiet flag Signed-off-by: Matt Rutkowski * rename error type normalization function to reduce output size Signed-off-by: Matt Rutkowski * rename error type normalization function to reduce output size Signed-off-by: Matt Rutkowski * rename error type normalization function to reduce output size Signed-off-by: Matt Rutkowski * Assure all validate tests output valid JSON where applicable Signed-off-by: Matt Rutkowski * Support csv formatted output for validation errors Signed-off-by: Matt Rutkowski * Fix G104 linter complaint Signed-off-by: Matt Rutkowski * Fix G104 linter complaint Signed-off-by: Matt Rutkowski --------- Signed-off-by: Matt Rutkowski --- .vscode/settings.json | 1 + cmd/license_list.go | 4 +- cmd/validate.go | 34 ++++---- cmd/validate_format.go | 182 +++++++++++++++++++++++++++++++---------- cmd/validate_test.go | 34 ++++++-- 5 files changed, 187 insertions(+), 68 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 72f77f2a..9b60ea8a 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -40,6 +40,7 @@ "gojsonschema", "gomod", "GTPL", + "hashstructure", "HBOM", "hokaccha", "HSQLDB", diff --git a/cmd/license_list.go b/cmd/license_list.go index 0ee50247..8e29f832 100644 --- a/cmd/license_list.go +++ b/cmd/license_list.go @@ -281,7 +281,7 @@ func DisplayLicenseListJson(output io.Writer) { } json, _ := log.FormatInterfaceAsJson(lc) - // Note: JSON data files MUST ends in a newline s as this is a POSIX standard + // Note: JSON data files MUST ends in a newline as this is a POSIX standard fmt.Fprintf(output, "%s\n", json) } @@ -330,7 +330,7 @@ func DisplayLicenseListCSV(output io.Writer) (err error) { lc.License.Text.Content) if errWrite := w.Write(currentRow); errWrite != nil { - return getLogger().Errorf("error writing to output (%v): %s", currentRow, err) + return getLogger().Errorf("error writing to output (%v): %s", currentRow, errWrite) } } } diff --git a/cmd/validate.go b/cmd/validate.go index bf0b44a1..223944b5 100644 --- a/cmd/validate.go +++ b/cmd/validate.go @@ -57,7 +57,7 @@ const ( ) var VALIDATE_SUPPORTED_ERROR_FORMATS = MSG_VALIDATE_FLAG_ERR_FORMAT + - strings.Join([]string{FORMAT_TEXT, FORMAT_JSON}, ", ") + " (default: txt)" + strings.Join([]string{FORMAT_TEXT, FORMAT_JSON, FORMAT_CSV}, ", ") + " (default: txt)" // limits const ( @@ -150,8 +150,9 @@ func validateCmdImpl(cmd *cobra.Command, args []string) error { return nil } -// Normalize error/normalizeValidationErrorTypes from the Validate() function -func normalizeValidationErrorTypes(document *schema.Sbom, valid bool, err error) { +// Normalize ErrorTypes from the Validate() function +// Note: this function name should not be changed +func validationError(document *schema.Sbom, valid bool, err error) { // Consistently display errors before exiting if err != nil { @@ -185,7 +186,8 @@ func Validate(output io.Writer, persistentFlags utils.PersistentCommandFlags, va // use function closure to assure consistent error output based upon error type defer func() { if err != nil { - normalizeValidationErrorTypes(document, valid, err) + // normalize the error output to console + validationError(document, valid, err) } }() @@ -280,7 +282,7 @@ func Validate(output io.Writer, persistentFlags utils.PersistentCommandFlags, va getLogger().Infof("SBOM valid against JSON schema: `%t`", result.Valid()) valid = result.Valid() - // Catch general errors from the validation module itself and pass them on' + // Catch general errors from the validation package/library itself and display them if errValidate != nil { // we force result to INVALID as any errors from the library means // we could NOT actually confirm the input documents validity @@ -297,19 +299,21 @@ func Validate(output io.Writer, persistentFlags utils.PersistentCommandFlags, va schemaErrors) // TODO: de-duplicate errors (e.g., array item not "unique"...) - var formattedErrors string - switch persistentFlags.OutputFormat { + format := persistentFlags.OutputFormat + switch format { case FORMAT_JSON: - // Note: JSON data files MUST ends in a newline s as this is a POSIX standard - formattedErrors = FormatSchemaErrors(schemaErrors, validateFlags, FORMAT_JSON) - // getLogger().Debugf("%s", formattedErrors) - fmt.Fprintf(output, "%s", formattedErrors) - case FORMAT_TEXT: fallthrough + case FORMAT_CSV: + fallthrough + case FORMAT_TEXT: + // Note: we no longer add the formatted errors to the actual error "detail" field; + // since BOMs can have large numbers of errors. The new method is to allow + // the user to control the error result output (e.g., file, detail, etc.) via flags + FormatSchemaErrors(output, schemaErrors, validateFlags, format) default: - // Format error results and append to InvalidSBOMError error "details" - formattedErrors = FormatSchemaErrors(schemaErrors, validateFlags, FORMAT_TEXT) - errInvalid.Details = formattedErrors + // Notify caller that we are defaulting to "txt" format + getLogger().Warningf(MSG_WARN_INVALID_FORMAT, format, FORMAT_TEXT) + FormatSchemaErrors(output, schemaErrors, validateFlags, FORMAT_TEXT) } return INVALID, document, schemaErrors, errInvalid diff --git a/cmd/validate_format.go b/cmd/validate_format.go index 781262b6..9cc8b389 100644 --- a/cmd/validate_format.go +++ b/cmd/validate_format.go @@ -19,7 +19,9 @@ package cmd // "github.com/iancoleman/orderedmap" import ( + "encoding/csv" "fmt" + "io" "strconv" "strings" @@ -30,16 +32,17 @@ import ( ) const ( + ERROR_DETAIL_KEY_DATA_TYPE = "type" ERROR_DETAIL_KEY_FIELD = "field" ERROR_DETAIL_KEY_CONTEXT = "context" ERROR_DETAIL_KEY_VALUE = "value" - ERROR_DETAIL_KEY_DATA_TYPE = "type" ERROR_DETAIL_KEY_VALUE_TYPE_ARRAY = "array" ERROR_DETAIL_KEY_VALUE_INDEX = "index" ERROR_DETAIL_KEY_VALUE_ITEM = "item" ERROR_DETAIL_KEY_VALUE_DESCRIPTION = "description" ERROR_DETAIL_ARRAY_ITEM_INDEX_I = "i" ERROR_DETAIL_ARRAY_ITEM_INDEX_J = "j" + ERROR_DETAIL_CONTEXT_EMPTY = "" ) const ( @@ -65,21 +68,28 @@ const ( MSG_WARN_INVALID_FORMAT = "invalid format. error results not supported for `%s` format; defaulting to `%s` format..." ) -type ValidationResultFormatter struct { - Results []ValidationResultFormat +var VALIDATION_ERROR_TITLES = []string{ + ERROR_DETAIL_KEY_DATA_TYPE, + ERROR_DETAIL_KEY_FIELD, + ERROR_DETAIL_KEY_CONTEXT, + ERROR_DETAIL_KEY_VALUE_DESCRIPTION, } +// Holds resources (e.g., components, services) declared license(s) +//var errorResultMap = slicemultimap.New() + // JsonContext is a linked-list of JSON key strings -type ValidationResultFormat struct { +type ValidationErrorResult struct { + ResultError gojsonschema.ResultError // read only + hashMap *orderedmap.OrderedMap resultMap *orderedmap.OrderedMap valuesMap *orderedmap.OrderedMap - ResultError gojsonschema.ResultError - Context *gojsonschema.JsonContext `json:"context"` // jsonErrorMap["context"] = resultError.Context() + Context *gojsonschema.JsonContext `json:"context"` // resultError.Context() } -func NewValidationErrResult(resultError gojsonschema.ResultError) (validationErrResult *ValidationResultFormat) { +func NewValidationErrorResult(resultError gojsonschema.ResultError) (validationErrResult *ValidationErrorResult) { // Prepare values that are optionally output as JSON - validationErrResult = &ValidationResultFormat{ + validationErrResult = &ValidationErrorResult{ ResultError: resultError, } // Prepare for JSON output by adding all required fields to our ordered map @@ -88,24 +98,33 @@ func NewValidationErrResult(resultError gojsonschema.ResultError) (validationErr validationErrResult.resultMap.Set(ERROR_DETAIL_KEY_FIELD, resultError.Field()) if context := resultError.Context(); context != nil { validationErrResult.resultMap.Set(ERROR_DETAIL_KEY_CONTEXT, resultError.Context().String()) + } else { + validationErrResult.resultMap.Set(ERROR_DETAIL_KEY_CONTEXT, ERROR_DETAIL_CONTEXT_EMPTY) } validationErrResult.resultMap.Set(ERROR_DETAIL_KEY_VALUE_DESCRIPTION, resultError.Description()) return } -func (validationErrResult *ValidationResultFormat) MarshalJSON() (marshalled []byte, err error) { +func (validationErrResult *ValidationErrorResult) MarshalJSON() (marshalled []byte, err error) { return validationErrResult.resultMap.MarshalJSON() } -func (result *ValidationResultFormat) Format(flags utils.ValidateCommandFlags) { +func (validationErrResult *ValidationErrorResult) HashResultError() { + fmt.Printf("re:=%v", validationErrResult.ResultError) + validationErrResult.hashMap.Set(ERROR_DETAIL_KEY_DATA_TYPE, validationErrResult.ResultError.Type()) + validationErrResult.hashMap.Set(ERROR_DETAIL_KEY_CONTEXT, validationErrResult.ResultError.Context().String()) + validationErrResult.hashMap.Set(ERROR_DETAIL_KEY_VALUE, validationErrResult.ResultError.Value()) +} + +func (result *ValidationErrorResult) MapResultError(flags utils.ValidateCommandFlags) { // Conditionally, add optional values as requested (via flags) if flags.ShowErrorValue { result.resultMap.Set(ERROR_DETAIL_KEY_VALUE, result.ResultError.Value()) } } -func (result *ValidationResultFormat) FormatItemsMustBeUniqueError(flags utils.ValidateCommandFlags) { +func (result *ValidationErrorResult) MapItemsMustBeUniqueError(flags utils.ValidateCommandFlags) { // For this error type, we want to reduce the information show to the end user. // Originally, the entire array with duplicate items was show for EVERY occurrence; @@ -137,27 +156,34 @@ func (result *ValidationResultFormat) FormatItemsMustBeUniqueError(flags utils.V } } -func FormatSchemaErrors(schemaErrors []gojsonschema.ResultError, flags utils.ValidateCommandFlags, format string) (formattedSchemaErrors string) { +func FormatSchemaErrors(output io.Writer, schemaErrors []gojsonschema.ResultError, flags utils.ValidateCommandFlags, format string) (formattedSchemaErrors string) { - getLogger().Infof(MSG_INFO_FORMATTING_ERROR_RESULTS, format) - switch format { - case FORMAT_JSON: - formattedSchemaErrors = FormatSchemaErrorsJson(schemaErrors, utils.GlobalFlags.ValidateFlags) - case FORMAT_TEXT: - formattedSchemaErrors = FormatSchemaErrorsText(schemaErrors, utils.GlobalFlags.ValidateFlags) - default: - getLogger().Warningf(MSG_WARN_INVALID_FORMAT, format, FORMAT_TEXT) - formattedSchemaErrors = FormatSchemaErrorsText(schemaErrors, utils.GlobalFlags.ValidateFlags) + if lenErrs := len(schemaErrors); lenErrs > 0 { + getLogger().Infof(MSG_INFO_SCHEMA_ERRORS_DETECTED, lenErrs) + getLogger().Infof(MSG_INFO_FORMATTING_ERROR_RESULTS, format) + switch format { + case FORMAT_JSON: + DisplaySchemaErrorsJson(output, schemaErrors, utils.GlobalFlags.ValidateFlags) + case FORMAT_TEXT: + DisplaySchemaErrorsText(output, schemaErrors, utils.GlobalFlags.ValidateFlags) + case FORMAT_CSV: + DisplaySchemaErrorsCsv(output, schemaErrors, utils.GlobalFlags.ValidateFlags) + default: + getLogger().Warningf(MSG_WARN_INVALID_FORMAT, format, FORMAT_TEXT) + DisplaySchemaErrorsText(output, schemaErrors, utils.GlobalFlags.ValidateFlags) + fmt.Fprintf(output, "%s", formattedSchemaErrors) + } } + return } -// Custom formatting based upon possible JSON schema error types -// the custom formatting handlers SHOULD adjust the fields/keys and their values within the `resultMap` +// Custom mapping of schema error results (for formatting) based upon possible JSON schema error types +// the custom mapping handlers SHOULD adjust the fields/keys and their values within the `resultMap` // for the respective errorResult being operated on. -func formatSchemaErrorTypes(resultError gojsonschema.ResultError, flags utils.ValidateCommandFlags) (formattedResult string) { +func mapSchemaErrorResult(resultError gojsonschema.ResultError, flags utils.ValidateCommandFlags) (validationErrorResult *ValidationErrorResult) { - validationErrorResult := NewValidationErrResult(resultError) + validationErrorResult = NewValidationErrorResult(resultError) // The cases below represent the complete set of typed errors possible. // Most are commented out as placeholder for future custom format methods. @@ -181,7 +207,7 @@ func formatSchemaErrorTypes(resultError gojsonschema.ResultError, flags utils.Va // case *gojsonschema.InvalidPropertyPatternError: // case *gojsonschema.InvalidTypeError: case *gojsonschema.ItemsMustBeUniqueError: - validationErrorResult.FormatItemsMustBeUniqueError(flags) + validationErrorResult.MapItemsMustBeUniqueError(flags) // case *gojsonschema.MissingDependencyError: // case *gojsonschema.MultipleOfError: // case *gojsonschema.NumberAllOfError: @@ -197,13 +223,13 @@ func formatSchemaErrorTypes(resultError gojsonschema.ResultError, flags utils.Va // case *gojsonschema.StringLengthLTEError: default: getLogger().Debugf("default formatting: ResultError Type: [%v]", errorType) - validationErrorResult.Format(flags) + validationErrorResult.MapResultError(flags) } - return validationErrorResult.formatResultMap(flags) + return } -func (result *ValidationResultFormat) formatResultMap(flags utils.ValidateCommandFlags) string { +func (result *ValidationErrorResult) formatResultMap(flags utils.ValidateCommandFlags) string { // format information on the failing "value" (details) with proper JSON indenting var formattedResult string var errFormatting error @@ -227,15 +253,16 @@ func (result *ValidationResultFormat) formatResultMap(flags utils.ValidateComman return formattedResult } -func FormatSchemaErrorsJson(errs []gojsonschema.ResultError, flags utils.ValidateCommandFlags) string { +func DisplaySchemaErrorsJson(output io.Writer, errs []gojsonschema.ResultError, flags utils.ValidateCommandFlags) { + getLogger().Enter() + defer getLogger().Exit() + var sb strings.Builder - lenErrs := len(errs) - if lenErrs > 0 { - getLogger().Infof(MSG_INFO_SCHEMA_ERRORS_DETECTED, lenErrs) - errLimit := flags.MaxNumErrors + if lenErrs := len(errs); lenErrs > 0 { // If we have more errors than the (default or user set) limit; notify user + errLimit := flags.MaxNumErrors if lenErrs > errLimit { // notify users more errors exist getLogger().Infof(MSG_INFO_TOO_MANY_ERRORS, errLimit, len(errs)) @@ -251,11 +278,12 @@ func FormatSchemaErrorsJson(errs []gojsonschema.ResultError, flags utils.Validat } // add to the result errors - schemaErrorText := formatSchemaErrorTypes(resultError, flags) + validationErrorResult := mapSchemaErrorResult(resultError, flags) + formattedResult := validationErrorResult.formatResultMap(flags) // NOTE: we must add the prefix (indent) ourselves // see issue: https://github.com/golang/go/issues/49261 sb.WriteString(ERROR_DETAIL_JSON_DEFAULT_PREFIX) - sb.WriteString(schemaErrorText) + sb.WriteString(formattedResult) if i < (lenErrs-1) && i < (errLimit-1) { sb.WriteString(JSON_ARRAY_ITEM_SEP) @@ -266,19 +294,22 @@ func FormatSchemaErrorsJson(errs []gojsonschema.ResultError, flags utils.Validat sb.WriteString(JSON_ARRAY_END) } - return sb.String() + // Note: JSON data files MUST ends in a newline as this is a POSIX standard + fmt.Fprintf(output, "%s\n", sb.String()) } -func FormatSchemaErrorsText(errs []gojsonschema.ResultError, flags utils.ValidateCommandFlags) string { +func DisplaySchemaErrorsText(output io.Writer, errs []gojsonschema.ResultError, flags utils.ValidateCommandFlags) { + getLogger().Enter() + defer getLogger().Exit() + var sb strings.Builder var lineOutput string - lenErrs := len(errs) - if lenErrs > 0 { - getLogger().Infof(MSG_INFO_SCHEMA_ERRORS_DETECTED, lenErrs) - errLimit := utils.GlobalFlags.ValidateFlags.MaxNumErrors + + if lenErrs := len(errs); lenErrs > 0 { var errorIndex string // If we have more errors than the (default or user set) limit; notify user + errLimit := flags.MaxNumErrors if lenErrs > errLimit { // notify users more errors exist getLogger().Infof(MSG_INFO_TOO_MANY_ERRORS, errLimit, len(errs)) @@ -295,12 +326,73 @@ func FormatSchemaErrorsText(errs []gojsonschema.ResultError, flags utils.Validat errorIndex = strconv.Itoa(i + 1) // emit formatted error result - formattedResult := formatSchemaErrorTypes(resultError, utils.GlobalFlags.ValidateFlags) + validationErrorResult := mapSchemaErrorResult(resultError, flags) + formattedResult := validationErrorResult.formatResultMap(flags) + // NOTE: we must add the prefix (indent) ourselves // see issue: https://github.com/golang/go/issues/49261 - lineOutput = fmt.Sprintf("\n%v. %s", errorIndex, formattedResult) + lineOutput = fmt.Sprintf("%v. %s\n", errorIndex, formattedResult) sb.WriteString(lineOutput) } } - return sb.String() + + fmt.Fprintf(output, "%s", sb.String()) +} + +func DisplaySchemaErrorsCsv(output io.Writer, errs []gojsonschema.ResultError, flags utils.ValidateCommandFlags) { + getLogger().Enter() + defer getLogger().Exit() + + var currentRow []string + + w := csv.NewWriter(output) + defer w.Flush() + + // Emit title row + if err := w.Write(VALIDATION_ERROR_TITLES); err != nil { + _ = getLogger().Errorf("error writing to output (%v): %s", LICENSE_LIST_TITLES_LICENSE_CHOICE, err) + return + } + + if lenErrs := len(errs); lenErrs > 0 { + + // If we have more errors than the (default or user set) limit; notify user + errLimit := flags.MaxNumErrors + if lenErrs > errLimit { + // notify users more errors exist + getLogger().Infof(MSG_INFO_TOO_MANY_ERRORS, errLimit, len(errs)) + } + + for i, resultError := range errs { + currentRow = nil + + // short-circuit if too many errors (i.e., using the error limit flag value) + if i == errLimit { + break + } + + // emit formatted error result + validationErrorResult := mapSchemaErrorResult(resultError, flags) + validationErrorResult.formatResultMap(flags) + + // Each row will contain every field of a CDX LicenseChoice object + datatype, _ := validationErrorResult.resultMap.Get(ERROR_DETAIL_KEY_DATA_TYPE) + field, _ := validationErrorResult.resultMap.Get(ERROR_DETAIL_KEY_FIELD) + context, _ := validationErrorResult.resultMap.Get(ERROR_DETAIL_KEY_CONTEXT) + description, _ := validationErrorResult.resultMap.Get(ERROR_DETAIL_KEY_VALUE_DESCRIPTION) + + currentRow = append(currentRow, + fmt.Sprintf("%v", datatype), + fmt.Sprintf("%v", field), + fmt.Sprintf("%v", context), + fmt.Sprintf("%v", description), + ) + + if errWrite := w.Write(currentRow); errWrite != nil { + _ = getLogger().Errorf("error writing to output (%v): %s", currentRow, errWrite) + return + } + + } + } } diff --git a/cmd/validate_test.go b/cmd/validate_test.go index 3133073a..2af5341e 100644 --- a/cmd/validate_test.go +++ b/cmd/validate_test.go @@ -59,9 +59,10 @@ func innerValidateError(t *testing.T, filename string, variant string, format st // Invoke the actual validate function var isValid bool + var outputBuffer bytes.Buffer // TODO: support additional tests on output buffer (e.g., format==valid JSON) - isValid, document, schemaErrors, _, actualError = innerValidateErrorBuffered( + isValid, document, schemaErrors, outputBuffer, actualError = innerValidateErrorBuffered( t, utils.GlobalFlags.PersistentFlags, utils.GlobalFlags.ValidateFlags, @@ -94,6 +95,15 @@ func innerValidateError(t *testing.T, filename string, variant string, format st t.Errorf("Input file invalid (%t); expected valid (no error)", isValid) } + // Assure it is valid JSON output + if format == FORMAT_JSON { + if !utils.IsValidJsonRaw(outputBuffer.Bytes()) { + err := getLogger().Errorf("output did not contain valid format data; expected: `%s`", FORMAT_JSON) + t.Error(err.Error()) + t.Logf("%s", outputBuffer.String()) + return + } + } return } @@ -309,7 +319,23 @@ func TestValidateCdx14ErrorResultsFormatIriReferencesText(t *testing.T) { &InvalidSBOMError{}) } +func TestValidateCdx14ErrorResultsUniqueComponentsCsv(t *testing.T) { + innerValidateError(t, + TEST_CDX_1_4_VALIDATE_ERR_COMPONENTS_UNIQUE, + SCHEMA_VARIANT_NONE, + FORMAT_CSV, + &InvalidSBOMError{}) +} + // TODO: add additional checks on the buffered output +func TestValidateCdx14ErrorResultsFormatIriReferencesCsv(t *testing.T) { + innerValidateError(t, + TEST_CDX_1_4_VALIDATE_ERR_FORMAT_IRI_REFERENCE, + SCHEMA_VARIANT_NONE, + FORMAT_CSV, + &InvalidSBOMError{}) +} + func TestValidateCdx14ErrorResultsUniqueComponentsJson(t *testing.T) { var EXPECTED_ERROR_NUM = 2 var EXPECTED_ERROR_CONTEXT = "(root).components" @@ -318,11 +344,9 @@ func TestValidateCdx14ErrorResultsUniqueComponentsJson(t *testing.T) { SCHEMA_VARIANT_NONE, FORMAT_JSON, &InvalidSBOMError{}) - //output, _ := log.FormatIndentedInterfaceAsJson(schemaErrors, " ", " ") if len(schemaErrors) != EXPECTED_ERROR_NUM { t.Errorf("invalid schema error count: expected `%v`; actual: `%v`)", EXPECTED_ERROR_NUM, len(schemaErrors)) - //fmt.Printf("schemaErrors:\n %s", output) } if schemaErrors[0].Context().String() != EXPECTED_ERROR_CONTEXT { @@ -340,14 +364,12 @@ func TestValidateCdx14ErrorResultsFormatIriReferencesJson(t *testing.T) { FORMAT_JSON, &InvalidSBOMError{}) - //output, _ := log.FormatIndentedInterfaceAsJson(schemaErrors, " ", " ") - if len(schemaErrors) != EXPECTED_ERROR_NUM { t.Errorf("invalid schema error count: expected `%v`; actual: `%v`)", EXPECTED_ERROR_NUM, len(schemaErrors)) - //fmt.Printf("schemaErrors:\n %s", output) } if schemaErrors[0].Context().String() != EXPECTED_ERROR_CONTEXT { t.Errorf("invalid schema error context: expected `%v`; actual: `%v`)", EXPECTED_ERROR_CONTEXT, schemaErrors[0].Context().String()) } + }