Skip to content

SBOM parsing improvements. #339

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Apr 13, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 30 additions & 7 deletions internal/sbom/cyclonedx.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package sbom

import (
"errors"
"fmt"
"io"
"path/filepath"
Expand All @@ -11,10 +12,21 @@ import (

type CycloneDX struct{}

type cyclonedxType struct {
name string
bomType cyclonedx.BOMFileFormat
}

var (
cycloneDXTypes = []cyclonedx.BOMFileFormat{
cyclonedx.BOMFileFormatJSON,
cyclonedx.BOMFileFormatXML,
cycloneDXTypes = []cyclonedxType{
{
name: "json",
bomType: cyclonedx.BOMFileFormatJSON,
},
{
name: "xml",
bomType: cyclonedx.BOMFileFormatXML,
},
}
)

Expand Down Expand Up @@ -77,19 +89,30 @@ func (c *CycloneDX) enumeratePackages(bom *cyclonedx.BOM, callback func(Identifi
}

func (c *CycloneDX) GetPackages(r io.ReadSeeker, callback func(Identifier) error) error {
//nolint:prealloc // Not sure how many there will be in advance.
var errs []error
var bom cyclonedx.BOM

for _, formatType := range cycloneDXTypes {
_, err := r.Seek(0, io.SeekStart)
if err != nil {
return fmt.Errorf("failed to seek to start of file: %w", err)
}
decoder := cyclonedx.NewBOMDecoder(r, formatType)
decoder := cyclonedx.NewBOMDecoder(r, formatType.bomType)
err = decoder.Decode(&bom)
if err == nil && (bom.BOMFormat == "CycloneDX" || strings.HasPrefix(bom.XMLNS, "http://cyclonedx.org/schema/bom")) {
return c.enumeratePackages(&bom, callback)
if err == nil {
if bom.BOMFormat == "CycloneDX" || strings.HasPrefix(bom.XMLNS, "http://cyclonedx.org/schema/bom") {
return c.enumeratePackages(&bom, callback)
} else {
err = errors.New("invalid BOMFormat")
}
}

errs = append(errs, fmt.Errorf("failed trying %s: %w", formatType.name, err))
}

return ErrInvalidFormat
return InvalidFormatError{
msg: "failed to parse CycloneDX",
errs: errs,
}
}
21 changes: 16 additions & 5 deletions internal/sbom/sbom.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
package sbom

import (
"errors"
"fmt"
"io"
"strings"
)

// Identifier is the identifier extracted from the SBOM.
Expand All @@ -18,13 +19,23 @@ type SBOMReader interface {
GetPackages(io.ReadSeeker, func(Identifier) error) error
}

var (
ErrInvalidFormat = errors.New("invalid format")
)

var (
Providers = []SBOMReader{
&SPDX{},
&CycloneDX{},
}
)

type InvalidFormatError struct {
msg string
errs []error
}

func (e InvalidFormatError) Error() string {
errStrings := make([]string, 0, len(e.errs))
for _, e := range e.errs {
errStrings = append(errStrings, "\t"+e.Error())
}

return fmt.Sprintf("%s:\n%s", e.msg, strings.Join(errStrings, "\n"))
}
32 changes: 26 additions & 6 deletions internal/sbom/spdx.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,25 @@ import (
type SPDX struct{}
type spdxLoader func(io.Reader) (*v2_3.Document, error)

type loader struct {
name string
loader spdxLoader
}

var (
spdxLoaders = []spdxLoader{
spdx_json.Load2_3,
rdfloader.Load2_3,
tvloader.Load2_3,
spdxLoaders = []loader{
{
name: "json",
loader: spdx_json.Load2_3,
},
{
name: "rdf",
loader: rdfloader.Load2_3,
},
{
name: "tv",
loader: tvloader.Load2_3,
},
}
)

Expand Down Expand Up @@ -52,16 +66,22 @@ func (s *SPDX) enumeratePackages(doc *v2_3.Document, callback func(Identifier) e
}

func (s *SPDX) GetPackages(r io.ReadSeeker, callback func(Identifier) error) error {
//nolint:prealloc // Not sure how many there will be in advance.
var errs []error
for _, loader := range spdxLoaders {
_, err := r.Seek(0, io.SeekStart)
if err != nil {
return fmt.Errorf("failed to seek to start of file: %w", err)
}
doc, err := loader(r)
doc, err := loader.loader(r)
if err == nil {
return s.enumeratePackages(doc, callback)
}
errs = append(errs, fmt.Errorf("failed trying %s: %w", loader.name, err))
}

return ErrInvalidFormat
return InvalidFormatError{
msg: "failed to parse SPDX",
errs: errs,
}
}
27 changes: 21 additions & 6 deletions pkg/osvscanner/osvscanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ func scanDir(r *output.Reporter, query *osv.BatchedQuery, dir string, skipGit bo
// No need to check for error
// If scan fails, it means it isn't a valid SBOM file,
// so just move onto the next file
_ = scanSBOMFile(r, query, path)
_ = scanSBOMFile(r, query, path, true)
}

if !root && !recursive && info.IsDir() {
Expand Down Expand Up @@ -221,10 +221,15 @@ func scanLockfile(r *output.Reporter, query *osv.BatchedQuery, path string, pars

// scanSBOMFile will load, identify, and parse the SBOM path passed in, and add the dependencies specified
// within to `query`
func scanSBOMFile(r *output.Reporter, query *osv.BatchedQuery, path string) error {
func scanSBOMFile(r *output.Reporter, query *osv.BatchedQuery, path string, fromFSScan bool) error {
var errs []error
for _, provider := range sbom.Providers {
if !provider.MatchesRecognizedFileNames(path) {
// Skip if filename is not usually a sbom file of this format
if fromFSScan && !provider.MatchesRecognizedFileNames(path) {
// Skip if filename is not usually a sbom file of this format.
// Only do this if this is being done in a filesystem scanning context, where we need to be
// careful about spending too much time attempting to parse unrelated files.
// If this is coming from an explicit scan argument, be more relaxed here since it's common for
// filenames to not conform to expected filename standards.
continue
}

Expand Down Expand Up @@ -265,13 +270,23 @@ func scanSBOMFile(r *output.Reporter, query *osv.BatchedQuery, path string) erro
return nil
}

if errors.Is(err, sbom.ErrInvalidFormat) {
var formatErr sbom.InvalidFormatError
if errors.As(err, &formatErr) {
errs = append(errs, err)
continue
}

return err
}

// Don't log these errors if we're coming from a FS scan, since it can get very noisy.
if !fromFSScan {
r.PrintText("Failed to parse SBOM using all supported formats:\n")
for _, err := range errs {
r.PrintText(err.Error() + "\n")
}
}

return nil
}

Expand Down Expand Up @@ -485,7 +500,7 @@ func DoScan(actions ScannerActions, r *output.Reporter) (models.VulnerabilityRes
if err != nil {
return models.VulnerabilityResults{}, fmt.Errorf("failed to resolved path with error %w", err)
}
err = scanSBOMFile(r, &query, sbomElem)
err = scanSBOMFile(r, &query, sbomElem, false)
if err != nil {
return models.VulnerabilityResults{}, err
}
Expand Down