From 3c2a3a4d653060b97ee07d6cd4d274a94129ae04 Mon Sep 17 00:00:00 2001 From: Yuan Gong Date: Wed, 5 Jan 2022 06:28:33 +0000 Subject: [PATCH 1/8] feat(v2/csv): use module dir as find license boundary --- csv.go | 40 +++++--------------------------------- licenses/find.go | 47 ++++++++++++++++++++------------------------- licenses/library.go | 8 ++++++-- 3 files changed, 32 insertions(+), 63 deletions(-) diff --git a/csv.go b/csv.go index c3fa44a..14f1622 100644 --- a/csv.go +++ b/csv.go @@ -18,7 +18,6 @@ import ( "context" "encoding/csv" "os" - "strings" "github.com/golang/glog" "github.com/google/go-licenses/licenses" @@ -56,41 +55,12 @@ func csvMain(_ *cobra.Command, args []string) error { } for _, lib := range libs { licenseURL := "Unknown" - licenseName := "Unknown" - if lib.LicensePath != "" { - // Find a URL for the license file, based on the URL of a remote for the Git repository. - var errs []string - repo, err := licenses.FindGitRepo(lib.LicensePath) - if err != nil { - // Can't find Git repo (possibly a Go Module?) - derive URL from lib name instead. - lURL, err := lib.FileURL(lib.LicensePath) - if err != nil { - errs = append(errs, err.Error()) - } else { - licenseURL = lURL.String() - } - } else { - for _, remote := range gitRemotes { - url, err := repo.FileURL(lib.LicensePath, remote) - if err != nil { - errs = append(errs, err.Error()) - continue - } - licenseURL = url.String() - break - } - } - if licenseURL == "Unknown" { - glog.Errorf("Error discovering URL for %q:\n- %s", lib.LicensePath, strings.Join(errs, "\n- ")) - } - licenseName, _, err = classifier.Identify(lib.LicensePath) - if err != nil { - glog.Errorf("Error identifying license in %q: %v", lib.LicensePath, err) - licenseName = "Unknown" - } + licenseName, _, err := classifier.Identify(lib.LicensePath) + if err != nil { + glog.Errorf("Error identifying license in %q: %v", lib.LicensePath, err) + licenseName = "Unknown" } - // Remove the "*/vendor/" prefix from the library name for conciseness. - if err := writer.Write([]string{unvendor(lib.Name()), licenseURL, licenseName}); err != nil { + if err := writer.Write([]string{lib.Name(), licenseURL, licenseName}); err != nil { return err } } diff --git a/licenses/find.go b/licenses/find.go index 6bef1cf..269f545 100644 --- a/licenses/find.go +++ b/licenses/find.go @@ -16,30 +16,34 @@ package licenses import ( "fmt" - "go/build" "io/ioutil" "path/filepath" "regexp" + "strings" ) var ( licenseRegexp = regexp.MustCompile(`^(?i)(LICEN(S|C)E|COPYING|README|NOTICE).*$`) - srcDirRegexps = func() []*regexp.Regexp { - var rs []*regexp.Regexp - for _, s := range build.Default.SrcDirs() { - rs = append(rs, regexp.MustCompile("^"+regexp.QuoteMeta(s)+"$")) - } - return rs - }() - vendorRegexp = regexp.MustCompile(`.+/vendor(/)?$`) ) // Find returns the file path of the license for this package. -func Find(dir string, classifier Classifier) (string, error) { - var stopAt []*regexp.Regexp - stopAt = append(stopAt, srcDirRegexps...) - stopAt = append(stopAt, vendorRegexp) - return findUpwards(dir, licenseRegexp, stopAt, func(path string) bool { +// +// dir is path of the directory where we want to find a license. +// rootDir is path of the module containing this package. Find will not search out of the +// rootDir. +func Find(dir string, rootDir string, classifier Classifier) (string, error) { + dir, err := filepath.Abs(dir) + if err != nil { + return "", err + } + rootDir, err = filepath.Abs(rootDir) + if err != nil { + return "", err + } + if !strings.HasPrefix(dir, rootDir) { + return "", fmt.Errorf("licenses.Find: rootDir %s should contain dir %s", rootDir, dir) + } + return findUpwards(dir, licenseRegexp, rootDir, func(path string) bool { // TODO(RJPercival): Return license details if _, _, err := classifier.Identify(path); err != nil { return false @@ -48,15 +52,15 @@ func Find(dir string, classifier Classifier) (string, error) { }) } -func findUpwards(dir string, r *regexp.Regexp, stopAt []*regexp.Regexp, predicate func(path string) bool) (string, error) { +func findUpwards(dir string, r *regexp.Regexp, stopAt string, predicate func(path string) bool) (string, error) { // Dir must be made absolute for reliable matching with stopAt regexps dir, err := filepath.Abs(dir) if err != nil { return "", err } start := dir - // Stop once dir matches a stopAt regexp or dir is the filesystem root - for !matchAny(stopAt, dir) { + // Stop once we go out of the stopAt dir. + for strings.HasPrefix(dir, stopAt) { dirContents, err := ioutil.ReadDir(dir) if err != nil { return "", err @@ -79,12 +83,3 @@ func findUpwards(dir string, r *regexp.Regexp, stopAt []*regexp.Regexp, predicat } return "", fmt.Errorf("no file/directory matching regexp %q found for %s", r, start) } - -func matchAny(patterns []*regexp.Regexp, s string) bool { - for _, p := range patterns { - if p.MatchString(s) { - return true - } - } - return false -} diff --git a/licenses/library.go b/licenses/library.go index 1ac665b..24b2e2f 100644 --- a/licenses/library.go +++ b/licenses/library.go @@ -68,7 +68,7 @@ func (e PackagesError) Error() string { func Libraries(ctx context.Context, classifier Classifier, importPaths ...string) ([]*Library, error) { cfg := &packages.Config{ Context: ctx, - Mode: packages.NeedImports | packages.NeedDeps | packages.NeedFiles | packages.NeedName, + Mode: packages.NeedImports | packages.NeedDeps | packages.NeedFiles | packages.NeedName | packages.NeedModule, } rootPkgs, err := packages.Load(cfg, importPaths...) @@ -103,7 +103,7 @@ func Libraries(ctx context.Context, classifier Classifier, importPaths ...string // This package is empty - nothing to do. return true } - licensePath, err := Find(pkgDir, classifier) + licensePath, err := Find(pkgDir, p.Module.Dir, classifier) if err != nil { glog.Errorf("Failed to find license for %s: %v", p.PkgPath, err) } @@ -202,6 +202,10 @@ func (l *Library) FileURL(filePath string) (*url.URL, error) { // isStdLib returns true if this package is part of the Go standard library. func isStdLib(pkg *packages.Package) bool { + if pkg.Name == "unsafe" { + // Special case unsafe stdlib, because it does not contain go files. + return true + } if len(pkg.GoFiles) == 0 { return false } From 2bc09b7c33985f9cb58158041c13b3938e2013f1 Mon Sep 17 00:00:00 2001 From: Yuan Gong Date: Wed, 29 Dec 2021 08:24:07 +0000 Subject: [PATCH 2/8] feat(v2/csv): get license remote URLs --- csv.go | 29 +++++++++++------ licenses/find_test.go | 2 +- licenses/git.go | 2 +- licenses/library.go | 70 ++++++++++++++++++++++------------------ licenses/library_test.go | 58 ++++++++++++++++++++++++++++++--- main.go | 15 +++++++++ 6 files changed, 128 insertions(+), 48 deletions(-) diff --git a/csv.go b/csv.go index 14f1622..93befcf 100644 --- a/csv.go +++ b/csv.go @@ -16,7 +16,7 @@ package main import ( "context" - "encoding/csv" + "fmt" "os" "github.com/golang/glog" @@ -42,8 +42,6 @@ func init() { } func csvMain(_ *cobra.Command, args []string) error { - writer := csv.NewWriter(os.Stdout) - classifier, err := licenses.NewClassifier(confidenceThreshold) if err != nil { return err @@ -54,16 +52,27 @@ func csvMain(_ *cobra.Command, args []string) error { return err } for _, lib := range libs { + licenseName := "Unknown" licenseURL := "Unknown" - licenseName, _, err := classifier.Identify(lib.LicensePath) - if err != nil { - glog.Errorf("Error identifying license in %q: %v", lib.LicensePath, err) - licenseName = "Unknown" + if lib.LicensePath != "" { + licenseName, _, err = classifier.Identify(lib.LicensePath) + if err != nil { + glog.Errorf("Error identifying license in %q: %v", lib.LicensePath, err) + licenseName = "Unknown" + } + licenseURL, err = lib.FileURL(context.Background(), lib.LicensePath) + if err != nil { + glog.Warningf("Error discovering license URL: %s", err) + } + if licenseURL == "" { + licenseURL = "Unknown" + } } - if err := writer.Write([]string{lib.Name(), licenseURL, licenseName}); err != nil { + // Adding spaces after each "," makes vscode/terminal recognize the correct license URL. + // Otherwise, if there's no space, vscode interprets the URL as concatenated with the license name. + if _, err := os.Stdout.WriteString(fmt.Sprintf("%s, %s, %s\n", lib.Name(), licenseURL, licenseName)); err != nil { return err } } - writer.Flush() - return writer.Error() + return nil } diff --git a/licenses/find_test.go b/licenses/find_test.go index 7fe5c3e..d19a73e 100644 --- a/licenses/find_test.go +++ b/licenses/find_test.go @@ -101,7 +101,7 @@ func TestFind(t *testing.T) { }, } { t.Run(test.desc, func(t *testing.T) { - licensePath, err := Find(test.dir, classifier) + licensePath, err := Find(test.dir, "./testdata", classifier) if err != nil || licensePath != test.wantLicensePath { t.Fatalf("Find(%q) = (%#v, %q), want (%q, nil)", test.dir, licensePath, err, test.wantLicensePath) } diff --git a/licenses/git.go b/licenses/git.go index e3c619e..b8e60b1 100644 --- a/licenses/git.go +++ b/licenses/git.go @@ -46,7 +46,7 @@ type GitRepo struct { // FindGitRepo finds the Git repository that contains the specified filePath // by searching upwards through the directory tree for a ".git" directory. func FindGitRepo(filePath string) (*GitRepo, error) { - path, err := findUpwards(filepath.Dir(filePath), gitRegexp, srcDirRegexps, nil) + path, err := findUpwards(filepath.Dir(filePath), gitRegexp, "/", nil) if err != nil { return nil, err } diff --git a/licenses/library.go b/licenses/library.go index 24b2e2f..1e7429a 100644 --- a/licenses/library.go +++ b/licenses/library.go @@ -18,24 +18,16 @@ import ( "context" "fmt" "go/build" - "net/url" - "path" "path/filepath" "sort" "strings" + "time" "github.com/golang/glog" + "github.com/google/go-licenses/internal/third_party/pkgsite/source" "golang.org/x/tools/go/packages" ) -var ( - // TODO(RJPercival): Support replacing "master" with Go Module version - repoPathPrefixes = map[string]string{ - "github.com": "blob/master/", - "bitbucket.org": "src/master/", - } -) - // Library is a collection of packages covered by the same license file. type Library struct { // LicensePath is the path of the file containing the library's license. @@ -43,6 +35,8 @@ type Library struct { // Packages contains import paths for Go packages in this library. // It may not be the complete set of all packages in the library. Packages []string + // Parent go module. + Module *packages.Module } // PackagesError aggregates all Packages[].Errors into a single error. @@ -124,6 +118,7 @@ func Libraries(ctx context.Context, classifier Classifier, importPaths ...string for _, p := range pkgs { libraries = append(libraries, &Library{ Packages: []string{p.PkgPath}, + Module: p.Module, }) } continue @@ -133,6 +128,10 @@ func Libraries(ctx context.Context, classifier Classifier, importPaths ...string } for _, pkg := range pkgs { lib.Packages = append(lib.Packages, pkg.PkgPath) + if lib.Module == nil { + // All the sub packages should belong to the same module. + lib.Module = pkg.Module + } } libraries = append(libraries, lib) } @@ -173,31 +172,40 @@ func (l *Library) String() string { return l.Name() } -// FileURL attempts to determine the URL for a file in this library. -// This only works for certain supported package prefixes, such as github.com, -// bitbucket.org and googlesource.com. Prefer GitRepo.FileURL() if possible. -func (l *Library) FileURL(filePath string) (*url.URL, error) { - relFilePath, err := filepath.Rel(filepath.Dir(l.LicensePath), filePath) - if err != nil { - return nil, err +// FileURL attempts to determine the URL for a file in this library using +// go module name and version. +func (l *Library) FileURL(ctx context.Context, filePath string) (string, error) { + if l == nil { + return "", fmt.Errorf("library is nil") } - nameParts := strings.SplitN(l.Name(), "/", 4) - if len(nameParts) < 3 { - return nil, fmt.Errorf("cannot determine URL for %q package", l.Name()) + wrap := func(err error) error { + return fmt.Errorf("getting file URL in library %s: %w", l.Name(), err) } - host, user, project := nameParts[0], nameParts[1], nameParts[2] - pathPrefix, ok := repoPathPrefixes[host] - if !ok { - return nil, fmt.Errorf("unsupported package host %q for %q", host, l.Name()) + m := l.Module + if m == nil { + return "", wrap(fmt.Errorf("empty go module info")) } - if len(nameParts) == 4 { - pathPrefix = path.Join(pathPrefix, nameParts[3]) + if m.Dir == "" { + return "", wrap(fmt.Errorf("empty go module dir")) + } + client := source.NewClient(time.Second * 20) + ver := m.Version + if ver == "" { + // This always happens for the module in development. + ver = "master" + glog.Warningf("module %s has empty version, defaults to master. The file URL may be incorrect. Please verify!", m.Path) + } + remote, err := source.ModuleInfo(ctx, client, m.Path, ver) + if err != nil { + return "", wrap(err) + } + relativePath, err := filepath.Rel(m.Dir, filePath) + if err != nil { + return "", wrap(err) } - return &url.URL{ - Scheme: "https", - Host: host, - Path: path.Join(user, project, pathPrefix, relFilePath), - }, nil + // TODO: there are still rare cases this may result in an incorrect URL. + // https://github.com/google/go-licenses/issues/73#issuecomment-1005587408 + return remote.FileURL(relativePath), nil } // isStdLib returns true if this package is part of the Go standard library. diff --git a/licenses/library_test.go b/licenses/library_test.go index e082fe3..c3f5c2b 100644 --- a/licenses/library_test.go +++ b/licenses/library_test.go @@ -21,6 +21,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" + "golang.org/x/tools/go/packages" ) func TestLibraries(t *testing.T) { @@ -154,9 +155,14 @@ func TestLibraryFileURL(t *testing.T) { "github.com/google/trillian/crypto", }, LicensePath: "/go/src/github.com/google/trillian/LICENSE", + Module: &packages.Module{ + Path: "github.com/google/trillian", + Dir: "/go/src/github.com/google/trillian", + Version: "v1.2.3", + }, }, path: "/go/src/github.com/google/trillian/foo/README.md", - wantURL: "https://github.com/google/trillian/blob/master/foo/README.md", + wantURL: "https://github.com/google/trillian/blob/v1.2.3/foo/README.md", }, { desc: "Library on bitbucket.org", @@ -166,9 +172,14 @@ func TestLibraryFileURL(t *testing.T) { "bitbucket.org/user/project/pkg2", }, LicensePath: "/foo/bar/bitbucket.org/user/project/LICENSE", + Module: &packages.Module{ + Path: "bitbucket.org/user/project", + Dir: "/foo/bar/bitbucket.org/user/project", + Version: "v1.2.3", + }, }, path: "/foo/bar/bitbucket.org/user/project/foo/README.md", - wantURL: "https://bitbucket.org/user/project/src/master/pkg/foo/README.md", + wantURL: "https://bitbucket.org/user/project/src/v1.2.3/foo/README.md", }, { desc: "Library on example.com", @@ -178,19 +189,56 @@ func TestLibraryFileURL(t *testing.T) { "example.com/user/project/pkg2", }, LicensePath: "/foo/bar/example.com/user/project/LICENSE", + Module: &packages.Module{ + Path: "example.com/user/project", + Dir: "/foo/bar/example.com/user/project", + Version: "v1.2.3", + }, }, path: "/foo/bar/example.com/user/project/foo/README.md", - wantErr: true, + wantURL: "https://example.com/user/project/blob/v1.2.3/foo/README.md", + }, + { + desc: "Library without version defaults to master branch", + lib: &Library{ + Packages: []string{ + "github.com/google/trillian", + "github.com/google/trillian/crypto", + }, + LicensePath: "/go/src/github.com/google/trillian/LICENSE", + Module: &packages.Module{ + Path: "github.com/google/trillian", + Dir: "/go/src/github.com/google/trillian", + }, + }, + path: "/go/src/github.com/google/trillian/foo/README.md", + wantURL: "https://github.com/google/trillian/blob/master/foo/README.md", + }, + { + desc: "Library on k8s.io", + lib: &Library{ + Packages: []string{ + "k8s.io/api/core/v1", + }, + LicensePath: "/go/modcache/k8s.io/api/LICENSE", + Module: &packages.Module{ + Path: "k8s.io/api", + Dir: "/go/modcache/k8s.io/api", + Version: "v0.23.1", + }, + }, + path: "/go/modcache/k8s.io/api/LICENSE", + wantURL: "https://github.com/kubernetes/api/blob/v0.23.1/LICENSE", }, } { t.Run(test.desc, func(t *testing.T) { - fileURL, err := test.lib.FileURL(test.path) + fileURL, err := test.lib.FileURL(context.Background(), test.path) if gotErr := err != nil; gotErr != test.wantErr { t.Fatalf("FileURL(%q) = (_, %q), want err? %t", test.path, err, test.wantErr) } else if gotErr { return } - if got, want := fileURL.String(), test.wantURL; got != want { + if got, want := fileURL, test.wantURL; got != want { t.Fatalf("FileURL(%q) = %q, want %q", test.path, got, want) } }) diff --git a/main.go b/main.go index c1b9d30..d4c1c94 100644 --- a/main.go +++ b/main.go @@ -16,6 +16,8 @@ package main import ( "flag" + "fmt" + "os" "strings" "github.com/golang/glog" @@ -32,6 +34,19 @@ var ( ) func init() { + // Change glog default log level to INFO. + // Note glog is not initialized yet, so we can only use fmt for printing + // errors. + err := flag.Set("logtostderr", "true") + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + err = flag.Set("stderrthreshold", "INFO") + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } rootCmd.PersistentFlags().Float64Var(&confidenceThreshold, "confidence_threshold", 0.9, "Minimum confidence required in order to positively identify a license.") } From 7d974272ad460011aadf939afdabf836ead3603e Mon Sep 17 00:00:00 2001 From: Yuan Gong Date: Sun, 23 Jan 2022 03:11:53 +0000 Subject: [PATCH 3/8] chore: apply feedback from @wlynch --- csv.go | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/csv.go b/csv.go index 93befcf..34bf847 100644 --- a/csv.go +++ b/csv.go @@ -18,6 +18,7 @@ import ( "context" "fmt" "os" + "strings" "github.com/golang/glog" "github.com/google/go-licenses/licenses" @@ -55,22 +56,27 @@ func csvMain(_ *cobra.Command, args []string) error { licenseName := "Unknown" licenseURL := "Unknown" if lib.LicensePath != "" { - licenseName, _, err = classifier.Identify(lib.LicensePath) - if err != nil { + name, _, err := classifier.Identify(lib.LicensePath) + if err == nil { + licenseName = name + } else { glog.Errorf("Error identifying license in %q: %v", lib.LicensePath, err) - licenseName = "Unknown" } - licenseURL, err = lib.FileURL(context.Background(), lib.LicensePath) - if err != nil { + url, err := lib.FileURL(context.Background(), lib.LicensePath) + if err == nil { + licenseURL = url + } else { glog.Warningf("Error discovering license URL: %s", err) } - if licenseURL == "" { - licenseURL = "Unknown" - } } - // Adding spaces after each "," makes vscode/terminal recognize the correct license URL. - // Otherwise, if there's no space, vscode interprets the URL as concatenated with the license name. - if _, err := os.Stdout.WriteString(fmt.Sprintf("%s, %s, %s\n", lib.Name(), licenseURL, licenseName)); err != nil { + // Using ", " to join words makes vscode/terminal recognize the + // correct license URL. Otherwise, if there's no space after + // comma, vscode interprets the URL as concatenated with the + // license name after it. + // Also, the extra spaces does not affect csv syntax much, we + // can still copy the csv text and paste into Excel / Google + // Sheets. + if _, err := fmt.Fprintln(os.Stdout, strings.Join([]string{lib.Name(), licenseURL, licenseName}, ", ")); err != nil { return err } } From 60c44d69fe259db3bef345b5f21700fadc469d4c Mon Sep 17 00:00:00 2001 From: Yuan Gong Date: Sun, 23 Jan 2022 08:45:00 +0000 Subject: [PATCH 4/8] test: update golden files --- testdata/modules/cli02/licenses.csv | 36 ++++++++++++------------ testdata/modules/hello01/licenses.csv | 2 +- testdata/modules/replace04/licenses.csv | 4 +-- testdata/modules/vendored03/licenses.csv | 4 +-- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/testdata/modules/cli02/licenses.csv b/testdata/modules/cli02/licenses.csv index 6967feb..4be7bdd 100644 --- a/testdata/modules/cli02/licenses.csv +++ b/testdata/modules/cli02/licenses.csv @@ -1,18 +1,18 @@ -github.com/fsnotify/fsnotify,https://github.com/fsnotify/fsnotify/blob/master/LICENSE,BSD-3-Clause -github.com/google/go-licenses/testdata/modules/cli02,https://github.com/google/go-licenses/blob/master/testdata/modules/cli02/LICENSE,Apache-2.0 -github.com/hashicorp/hcl,https://github.com/hashicorp/hcl/blob/master/LICENSE,MPL-2.0 -github.com/magiconair/properties,https://github.com/magiconair/properties/blob/master/LICENSE.md,BSD-2-Clause -github.com/mitchellh/go-homedir,https://github.com/mitchellh/go-homedir/blob/master/LICENSE,MIT -github.com/mitchellh/mapstructure,https://github.com/mitchellh/mapstructure/blob/master/LICENSE,MIT -github.com/pelletier/go-toml,https://github.com/pelletier/go-toml/blob/master/LICENSE,Apache-2.0 -github.com/spf13/afero,https://github.com/spf13/afero/blob/master/LICENSE.txt,Apache-2.0 -github.com/spf13/cast,https://github.com/spf13/cast/blob/master/LICENSE,MIT -github.com/spf13/cobra,https://github.com/spf13/cobra/blob/master/LICENSE.txt,Apache-2.0 -github.com/spf13/jwalterweatherman,https://github.com/spf13/jwalterweatherman/blob/master/LICENSE,MIT -github.com/spf13/pflag,https://github.com/spf13/pflag/blob/master/LICENSE,BSD-3-Clause -github.com/spf13/viper,https://github.com/spf13/viper/blob/master/LICENSE,MIT -github.com/subosito/gotenv,https://github.com/subosito/gotenv/blob/master/LICENSE,MIT -golang.org/x/sys,Unknown,BSD-3-Clause -golang.org/x/text,Unknown,BSD-3-Clause -gopkg.in/ini.v1,Unknown,Apache-2.0 -gopkg.in/yaml.v2,Unknown,Apache-2.0 +github.com/fsnotify/fsnotify, https://github.com/fsnotify/fsnotify/blob/v1.4.9/LICENSE, BSD-3-Clause +github.com/google/go-licenses/testdata/modules/cli02, https://github.com/google/go-licenses/blob/testdata/modules/cli02/master/testdata/modules/cli02/LICENSE, Apache-2.0 +github.com/hashicorp/hcl, https://github.com/hashicorp/hcl/blob/v1.0.0/LICENSE, MPL-2.0 +github.com/magiconair/properties, https://github.com/magiconair/properties/blob/v1.8.5/LICENSE.md, BSD-2-Clause +github.com/mitchellh/go-homedir, https://github.com/mitchellh/go-homedir/blob/v1.1.0/LICENSE, MIT +github.com/mitchellh/mapstructure, https://github.com/mitchellh/mapstructure/blob/v1.4.1/LICENSE, MIT +github.com/pelletier/go-toml, https://github.com/pelletier/go-toml/blob/v1.9.3/LICENSE, Apache-2.0 +github.com/spf13/afero, https://github.com/spf13/afero/blob/v1.6.0/LICENSE.txt, Apache-2.0 +github.com/spf13/cast, https://github.com/spf13/cast/blob/v1.3.1/LICENSE, MIT +github.com/spf13/cobra, https://github.com/spf13/cobra/blob/v1.1.3/LICENSE.txt, Apache-2.0 +github.com/spf13/jwalterweatherman, https://github.com/spf13/jwalterweatherman/blob/v1.1.0/LICENSE, MIT +github.com/spf13/pflag, https://github.com/spf13/pflag/blob/v1.0.5/LICENSE, BSD-3-Clause +github.com/spf13/viper, https://github.com/spf13/viper/blob/v1.8.0/LICENSE, MIT +github.com/subosito/gotenv, https://github.com/subosito/gotenv/blob/v1.2.0/LICENSE, MIT +golang.org/x/sys, https://cs.opensource.google/go/x/sys/+/977fb726:LICENSE, BSD-3-Clause +golang.org/x/text, https://cs.opensource.google/go/x/text/+/v0.3.5:LICENSE, BSD-3-Clause +gopkg.in/ini.v1, https://github.com/go-ini/ini/blob/v1.62.0/LICENSE, Apache-2.0 +gopkg.in/yaml.v2, https://github.com/go-yaml/yaml/blob/v2.4.0/LICENSE, Apache-2.0 diff --git a/testdata/modules/hello01/licenses.csv b/testdata/modules/hello01/licenses.csv index ad1271f..5be1770 100644 --- a/testdata/modules/hello01/licenses.csv +++ b/testdata/modules/hello01/licenses.csv @@ -1 +1 @@ -github.com/google/go-licenses/testdata/modules/hello01,https://github.com/google/go-licenses/blob/master/testdata/modules/hello01/LICENSE,Apache-2.0 +github.com/google/go-licenses/testdata/modules/hello01, https://github.com/google/go-licenses/blob/testdata/modules/hello01/master/testdata/modules/hello01/LICENSE, Apache-2.0 diff --git a/testdata/modules/replace04/licenses.csv b/testdata/modules/replace04/licenses.csv index 0a51c43..4401e78 100644 --- a/testdata/modules/replace04/licenses.csv +++ b/testdata/modules/replace04/licenses.csv @@ -1,2 +1,2 @@ -github.com/google/go-licenses/testdata/modules/replace04,https://github.com/google/go-licenses/blob/master/testdata/modules/replace04/LICENSE,Apache-2.0 -github.com/mitchellh/go-homedir,https://github.com/mitchellh/go-homedir/blob/master/LICENSE,MIT +github.com/google/go-licenses/testdata/modules/replace04, https://github.com/google/go-licenses/blob/testdata/modules/replace04/master/testdata/modules/replace04/LICENSE, Apache-2.0 +github.com/mitchellh/go-homedir, https://github.com/mitchellh/go-homedir/blob/v1.1.0/LICENSE, MIT diff --git a/testdata/modules/vendored03/licenses.csv b/testdata/modules/vendored03/licenses.csv index 0172a50..48f61c0 100644 --- a/testdata/modules/vendored03/licenses.csv +++ b/testdata/modules/vendored03/licenses.csv @@ -1,2 +1,2 @@ -github.com/google/go-licenses/testdata/modules/vendored03,https://github.com/google/go-licenses/blob/master/testdata/modules/vendored03/LICENSE,Apache-2.0 -github.com/mitchellh/go-homedir,https://github.com/google/go-licenses/blob/master/testdata/modules/vendored03/vendor/github.com/mitchellh/go-homedir/LICENSE,MIT +github.com/google/go-licenses/testdata/modules/vendored03, https://github.com/google/go-licenses/blob/testdata/modules/vendored03/master/testdata/modules/vendored03/LICENSE, Apache-2.0 +github.com/mitchellh/go-homedir, Unknown, MIT From 6b78cc44725f66a72e6e9e3551bc6e70bbae8cce Mon Sep 17 00:00:00 2001 From: Yuan Gong Date: Sun, 23 Jan 2022 08:47:57 +0000 Subject: [PATCH 5/8] feat(v2): handle modules that vendor deps --- internal/third_party/pkgsite/README.md | 1 + .../pkgsite/source/source_patch.go | 33 ++++++++++++ licenses/library.go | 54 ++++++++++++++++--- licenses/library_test.go | 4 +- testdata/modules/cli02/licenses.csv | 2 +- testdata/modules/hello01/licenses.csv | 2 +- testdata/modules/replace04/licenses.csv | 2 +- testdata/modules/vendored03/licenses.csv | 4 +- 8 files changed, 88 insertions(+), 14 deletions(-) create mode 100644 internal/third_party/pkgsite/source/source_patch.go diff --git a/internal/third_party/pkgsite/README.md b/internal/third_party/pkgsite/README.md index 91e63dd..473d33a 100644 --- a/internal/third_party/pkgsite/README.md +++ b/internal/third_party/pkgsite/README.md @@ -16,3 +16,4 @@ Local modifications: pkgsite/internal/version to avoid other dependencies. - For pkgsite/internal/source, switched to use go log package, because glog conflicts with a test dependency that also defines the "v" flag. +- Add a SetCommit method to type ModuleInfo in ./source/source_patch.go, more rationale explained in the method's comments. diff --git a/internal/third_party/pkgsite/source/source_patch.go b/internal/third_party/pkgsite/source/source_patch.go new file mode 100644 index 0000000..3bce5e6 --- /dev/null +++ b/internal/third_party/pkgsite/source/source_patch.go @@ -0,0 +1,33 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package source + +// This file includes all local additions to source package for google/go-licenses use-cases. + +// SetCommit overrides commit to a specified commit. Usually, you should pass your version to +// ModuleInfo(). However, when you do not know the version and just wants a link that points to +// a known commit/branch/tag. You can use this method to directly override the commit like +// info.SetCommit("master"). +// +// Note this is different from directly passing "master" as version to ModuleInfo(), because for +// modules not at the root of a repo, there are conventions that add a module's relative dir in +// front of the version as the actual git tag. For example, for a sub module at ./submod whose +// version is v1.0.1, the actual git tag should be submod/v1.0.1. +func (i *Info) SetCommit(commit string) { + if i == nil { + return + } + i.commit = commit +} diff --git a/licenses/library.go b/licenses/library.go index 1e7429a..c1801e5 100644 --- a/licenses/library.go +++ b/licenses/library.go @@ -132,6 +132,30 @@ func Libraries(ctx context.Context, classifier Classifier, importPaths ...string // All the sub packages should belong to the same module. lib.Module = pkg.Module } + if lib.Module != nil && lib.Module.Path != "" && lib.Module.Dir == "" { + // A known cause is that the module is vendored, so some information is lost. + splits := strings.SplitN(lib.LicensePath, "/vendor/", 2) + if len(splits) != 2 { + glog.Warningf("module %s does not have dir and it's not vendored, cannot discover the license URL. Report to go-licenses developer if you see this.", lib.Module.Path) + } else { + // This is vendored. Handle this known special case. + parentModDir := splits[0] + var parentPkg *packages.Package + for _, rootPkg := range rootPkgs { + if rootPkg.Module != nil && rootPkg.Module.Dir == parentModDir { + parentPkg = rootPkg + break + } + } + if parentPkg == nil { + glog.Warningf("cannot find parent package of vendored module %s", lib.Module.Path) + } else { + // Vendored modules should be commited in the parent module, so it counts as part of the + // parent module. + lib.Module = parentPkg.Module + } + } + } } libraries = append(libraries, lib) } @@ -189,16 +213,32 @@ func (l *Library) FileURL(ctx context.Context, filePath string) (string, error) return "", wrap(fmt.Errorf("empty go module dir")) } client := source.NewClient(time.Second * 20) - ver := m.Version - if ver == "" { - // This always happens for the module in development. - ver = "master" - glog.Warningf("module %s has empty version, defaults to master. The file URL may be incorrect. Please verify!", m.Path) - } - remote, err := source.ModuleInfo(ctx, client, m.Path, ver) + remote, err := source.ModuleInfo(ctx, client, m.Path, m.Version) if err != nil { return "", wrap(err) } + if m.Version == "" { + // This always happens for the module in development. + // Note#1 if we pass version=HEAD to source.ModuleInfo, github tag for modules not at the root + // of the repo will be incorrect, because there's a convention that: + // * I have a module at github.com/google/go-licenses/submod. + // * The module is of version v1.0.0. + // Then the github tag should be submod/v1.0.0. + // In our case, if we pass HEAD as version, the result commit will be submod/HEAD which is incorrect. + // Therefore, to workaround this problem, we directly set the commit after getting module info. + // + // Note#2 repos have different branches as default, some use the + // master branch and some use the main branch. However, HEAD + // always refers to the default branch, so it's better than + // both of master/main when we do not know which branch is default. + // Examples: + // * https://github.com/google/go-licenses/blob/HEAD/LICENSE + // points to latest commit of master branch. + // * https://github.com/google/licenseclassifier/blob/HEAD/LICENSE + // points to latest commit of main branch. + remote.SetCommit("HEAD") + glog.Warningf("module %s has empty version, defaults to HEAD. The license URL may be incorrect. Please verify!", m.Path) + } relativePath, err := filepath.Rel(m.Dir, filePath) if err != nil { return "", wrap(err) diff --git a/licenses/library_test.go b/licenses/library_test.go index c3f5c2b..e2938d3 100644 --- a/licenses/library_test.go +++ b/licenses/library_test.go @@ -199,7 +199,7 @@ func TestLibraryFileURL(t *testing.T) { wantURL: "https://example.com/user/project/blob/v1.2.3/foo/README.md", }, { - desc: "Library without version defaults to master branch", + desc: "Library without version defaults to remote HEAD", lib: &Library{ Packages: []string{ "github.com/google/trillian", @@ -212,7 +212,7 @@ func TestLibraryFileURL(t *testing.T) { }, }, path: "/go/src/github.com/google/trillian/foo/README.md", - wantURL: "https://github.com/google/trillian/blob/master/foo/README.md", + wantURL: "https://github.com/google/trillian/blob/HEAD/foo/README.md", }, { desc: "Library on k8s.io", diff --git a/testdata/modules/cli02/licenses.csv b/testdata/modules/cli02/licenses.csv index 4be7bdd..2d42d4b 100644 --- a/testdata/modules/cli02/licenses.csv +++ b/testdata/modules/cli02/licenses.csv @@ -1,5 +1,5 @@ github.com/fsnotify/fsnotify, https://github.com/fsnotify/fsnotify/blob/v1.4.9/LICENSE, BSD-3-Clause -github.com/google/go-licenses/testdata/modules/cli02, https://github.com/google/go-licenses/blob/testdata/modules/cli02/master/testdata/modules/cli02/LICENSE, Apache-2.0 +github.com/google/go-licenses/testdata/modules/cli02, https://github.com/google/go-licenses/blob/HEAD/testdata/modules/cli02/LICENSE, Apache-2.0 github.com/hashicorp/hcl, https://github.com/hashicorp/hcl/blob/v1.0.0/LICENSE, MPL-2.0 github.com/magiconair/properties, https://github.com/magiconair/properties/blob/v1.8.5/LICENSE.md, BSD-2-Clause github.com/mitchellh/go-homedir, https://github.com/mitchellh/go-homedir/blob/v1.1.0/LICENSE, MIT diff --git a/testdata/modules/hello01/licenses.csv b/testdata/modules/hello01/licenses.csv index 5be1770..4aa9d5e 100644 --- a/testdata/modules/hello01/licenses.csv +++ b/testdata/modules/hello01/licenses.csv @@ -1 +1 @@ -github.com/google/go-licenses/testdata/modules/hello01, https://github.com/google/go-licenses/blob/testdata/modules/hello01/master/testdata/modules/hello01/LICENSE, Apache-2.0 +github.com/google/go-licenses/testdata/modules/hello01, https://github.com/google/go-licenses/blob/HEAD/testdata/modules/hello01/LICENSE, Apache-2.0 diff --git a/testdata/modules/replace04/licenses.csv b/testdata/modules/replace04/licenses.csv index 4401e78..a27f538 100644 --- a/testdata/modules/replace04/licenses.csv +++ b/testdata/modules/replace04/licenses.csv @@ -1,2 +1,2 @@ -github.com/google/go-licenses/testdata/modules/replace04, https://github.com/google/go-licenses/blob/testdata/modules/replace04/master/testdata/modules/replace04/LICENSE, Apache-2.0 +github.com/google/go-licenses/testdata/modules/replace04, https://github.com/google/go-licenses/blob/HEAD/testdata/modules/replace04/LICENSE, Apache-2.0 github.com/mitchellh/go-homedir, https://github.com/mitchellh/go-homedir/blob/v1.1.0/LICENSE, MIT diff --git a/testdata/modules/vendored03/licenses.csv b/testdata/modules/vendored03/licenses.csv index 48f61c0..c2db284 100644 --- a/testdata/modules/vendored03/licenses.csv +++ b/testdata/modules/vendored03/licenses.csv @@ -1,2 +1,2 @@ -github.com/google/go-licenses/testdata/modules/vendored03, https://github.com/google/go-licenses/blob/testdata/modules/vendored03/master/testdata/modules/vendored03/LICENSE, Apache-2.0 -github.com/mitchellh/go-homedir, Unknown, MIT +github.com/google/go-licenses/testdata/modules/vendored03, https://github.com/google/go-licenses/blob/HEAD/testdata/modules/vendored03/LICENSE, Apache-2.0 +github.com/mitchellh/go-homedir, https://github.com/google/go-licenses/blob/HEAD/testdata/modules/vendored03/vendor/github.com/mitchellh/go-homedir/LICENSE, MIT From e57f558ce9840067ac373901969f8db8f853a25b Mon Sep 17 00:00:00 2001 From: Yuan Gong Date: Sun, 23 Jan 2022 08:54:24 +0000 Subject: [PATCH 6/8] feat(v2): handle modules with replace directive --- licenses/library.go | 18 +++---- licenses/library_test.go | 11 ++-- licenses/module.go | 69 +++++++++++++++++++++++++ testdata/modules/replace04/licenses.csv | 2 +- 4 files changed, 84 insertions(+), 16 deletions(-) create mode 100644 licenses/module.go diff --git a/licenses/library.go b/licenses/library.go index c1801e5..2bccc35 100644 --- a/licenses/library.go +++ b/licenses/library.go @@ -36,7 +36,7 @@ type Library struct { // It may not be the complete set of all packages in the library. Packages []string // Parent go module. - Module *packages.Module + module *Module } // PackagesError aggregates all Packages[].Errors into a single error. @@ -118,7 +118,7 @@ func Libraries(ctx context.Context, classifier Classifier, importPaths ...string for _, p := range pkgs { libraries = append(libraries, &Library{ Packages: []string{p.PkgPath}, - Module: p.Module, + module: newModule(p.Module), }) } continue @@ -128,15 +128,15 @@ func Libraries(ctx context.Context, classifier Classifier, importPaths ...string } for _, pkg := range pkgs { lib.Packages = append(lib.Packages, pkg.PkgPath) - if lib.Module == nil { + if lib.module == nil { // All the sub packages should belong to the same module. - lib.Module = pkg.Module + lib.module = newModule(pkg.Module) } - if lib.Module != nil && lib.Module.Path != "" && lib.Module.Dir == "" { + if lib.module != nil && lib.module.Path != "" && lib.module.Dir == "" { // A known cause is that the module is vendored, so some information is lost. splits := strings.SplitN(lib.LicensePath, "/vendor/", 2) if len(splits) != 2 { - glog.Warningf("module %s does not have dir and it's not vendored, cannot discover the license URL. Report to go-licenses developer if you see this.", lib.Module.Path) + glog.Warningf("module %s does not have dir and it's not vendored, cannot discover the license URL. Report to go-licenses developer if you see this.", lib.module.Path) } else { // This is vendored. Handle this known special case. parentModDir := splits[0] @@ -148,11 +148,11 @@ func Libraries(ctx context.Context, classifier Classifier, importPaths ...string } } if parentPkg == nil { - glog.Warningf("cannot find parent package of vendored module %s", lib.Module.Path) + glog.Warningf("cannot find parent package of vendored module %s", lib.module.Path) } else { // Vendored modules should be commited in the parent module, so it counts as part of the // parent module. - lib.Module = parentPkg.Module + lib.module = newModule(parentPkg.Module) } } } @@ -205,7 +205,7 @@ func (l *Library) FileURL(ctx context.Context, filePath string) (string, error) wrap := func(err error) error { return fmt.Errorf("getting file URL in library %s: %w", l.Name(), err) } - m := l.Module + m := l.module if m == nil { return "", wrap(fmt.Errorf("empty go module info")) } diff --git a/licenses/library_test.go b/licenses/library_test.go index e2938d3..4f86c06 100644 --- a/licenses/library_test.go +++ b/licenses/library_test.go @@ -21,7 +21,6 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" - "golang.org/x/tools/go/packages" ) func TestLibraries(t *testing.T) { @@ -155,7 +154,7 @@ func TestLibraryFileURL(t *testing.T) { "github.com/google/trillian/crypto", }, LicensePath: "/go/src/github.com/google/trillian/LICENSE", - Module: &packages.Module{ + module: &Module{ Path: "github.com/google/trillian", Dir: "/go/src/github.com/google/trillian", Version: "v1.2.3", @@ -172,7 +171,7 @@ func TestLibraryFileURL(t *testing.T) { "bitbucket.org/user/project/pkg2", }, LicensePath: "/foo/bar/bitbucket.org/user/project/LICENSE", - Module: &packages.Module{ + module: &Module{ Path: "bitbucket.org/user/project", Dir: "/foo/bar/bitbucket.org/user/project", Version: "v1.2.3", @@ -189,7 +188,7 @@ func TestLibraryFileURL(t *testing.T) { "example.com/user/project/pkg2", }, LicensePath: "/foo/bar/example.com/user/project/LICENSE", - Module: &packages.Module{ + module: &Module{ Path: "example.com/user/project", Dir: "/foo/bar/example.com/user/project", Version: "v1.2.3", @@ -206,7 +205,7 @@ func TestLibraryFileURL(t *testing.T) { "github.com/google/trillian/crypto", }, LicensePath: "/go/src/github.com/google/trillian/LICENSE", - Module: &packages.Module{ + module: &Module{ Path: "github.com/google/trillian", Dir: "/go/src/github.com/google/trillian", }, @@ -221,7 +220,7 @@ func TestLibraryFileURL(t *testing.T) { "k8s.io/api/core/v1", }, LicensePath: "/go/modcache/k8s.io/api/LICENSE", - Module: &packages.Module{ + module: &Module{ Path: "k8s.io/api", Dir: "/go/modcache/k8s.io/api", Version: "v0.23.1", diff --git a/licenses/module.go b/licenses/module.go new file mode 100644 index 0000000..eaff1b5 --- /dev/null +++ b/licenses/module.go @@ -0,0 +1,69 @@ +// Copyright 2022 Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package licenses + +import ( + "strings" + + "golang.org/x/tools/go/packages" +) + +// Module provides module information for a package. +type Module struct { + // Differences from packages.Module: + // * Replace field is removed, it's only an implementation detail in this package. + // If a module is replaced, we'll directly return the replaced module. + // * Version field +incompatible suffix is trimmed. + // * Main, ModuleError, Time, Indirect, GoMod, GoVersion fields are removed, because they are not used. + Path string // module path + Version string // module version + Dir string // directory holding files for this module, if any +} + +func newModule(mod *packages.Module) *Module { + if mod == nil { + return nil + } + // Example of a module with replace directive: k8s.io/kubernetes => k8s.io/kubernetes v1.11.1 + // { + // "Path": "k8s.io/kubernetes", + // "Version": "v0.17.9", + // "Replace": { + // "Path": "k8s.io/kubernetes", + // "Version": "v1.11.1", + // "Time": "2018-07-17T04:20:29Z", + // "Dir": "/home/gongyuan_kubeflow_org/go/pkg/mod/k8s.io/kubernetes@v1.11.1", + // "GoMod": "/home/gongyuan_kubeflow_org/go/pkg/mod/cache/download/k8s.io/kubernetes/@v/v1.11.1.mod" + // }, + // "Dir": "/home/gongyuan_kubeflow_org/go/pkg/mod/k8s.io/kubernetes@v1.11.1", + // "GoMod": "/home/gongyuan_kubeflow_org/go/pkg/mod/cache/download/k8s.io/kubernetes/@v/v1.11.1.mod" + // } + // handle replace directives + // Note, we specifically want to replace version field. + // Haven't confirmed, but we may also need to override the + // entire struct when using replace directive with local folders. + tmp := *mod + if tmp.Replace != nil { + tmp = *tmp.Replace + } + // The +incompatible suffix does not affect module version. + // ref: https://golang.org/ref/mod#incompatible-versions + tmp.Version = strings.TrimSuffix(tmp.Version, "+incompatible") + return &Module{ + Path: tmp.Path, + Version: tmp.Version, + Dir: tmp.Dir, + } +} diff --git a/testdata/modules/replace04/licenses.csv b/testdata/modules/replace04/licenses.csv index a27f538..12a80a8 100644 --- a/testdata/modules/replace04/licenses.csv +++ b/testdata/modules/replace04/licenses.csv @@ -1,2 +1,2 @@ github.com/google/go-licenses/testdata/modules/replace04, https://github.com/google/go-licenses/blob/HEAD/testdata/modules/replace04/LICENSE, Apache-2.0 -github.com/mitchellh/go-homedir, https://github.com/mitchellh/go-homedir/blob/v1.1.0/LICENSE, MIT +github.com/mitchellh/go-homedir, https://github.com/mitchellh/go-homedir/blob/v1.0.0/LICENSE, MIT From a76c09bd4b586e43229515c46480a8ece4c0a058 Mon Sep 17 00:00:00 2001 From: Yuan Gong Date: Sun, 13 Feb 2022 07:43:09 +0000 Subject: [PATCH 7/8] chore: simplify code --- licenses/library.go | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/licenses/library.go b/licenses/library.go index 2bccc35..0cfe598 100644 --- a/licenses/library.go +++ b/licenses/library.go @@ -128,33 +128,33 @@ func Libraries(ctx context.Context, classifier Classifier, importPaths ...string } for _, pkg := range pkgs { lib.Packages = append(lib.Packages, pkg.PkgPath) - if lib.module == nil { + if lib.module == nil && pkg.Module != nil { // All the sub packages should belong to the same module. lib.module = newModule(pkg.Module) } - if lib.module != nil && lib.module.Path != "" && lib.module.Dir == "" { - // A known cause is that the module is vendored, so some information is lost. - splits := strings.SplitN(lib.LicensePath, "/vendor/", 2) - if len(splits) != 2 { - glog.Warningf("module %s does not have dir and it's not vendored, cannot discover the license URL. Report to go-licenses developer if you see this.", lib.module.Path) - } else { - // This is vendored. Handle this known special case. - parentModDir := splits[0] - var parentPkg *packages.Package - for _, rootPkg := range rootPkgs { - if rootPkg.Module != nil && rootPkg.Module.Dir == parentModDir { - parentPkg = rootPkg - break - } - } - if parentPkg == nil { - glog.Warningf("cannot find parent package of vendored module %s", lib.module.Path) - } else { - // Vendored modules should be commited in the parent module, so it counts as part of the - // parent module. - lib.module = newModule(parentPkg.Module) + } + if lib.module != nil && lib.module.Path != "" && lib.module.Dir == "" { + // A known cause is that the module is vendored, so some information is lost. + splits := strings.SplitN(lib.LicensePath, "/vendor/", 2) + if len(splits) != 2 { + glog.Warningf("module %s does not have dir and it's not vendored, cannot discover the license URL. Report to go-licenses developer if you see this.", lib.module.Path) + } else { + // This is vendored. Handle this known special case. + parentModDir := splits[0] + var parentPkg *packages.Package + for _, rootPkg := range rootPkgs { + if rootPkg.Module != nil && rootPkg.Module.Dir == parentModDir { + parentPkg = rootPkg + break } } + if parentPkg == nil { + glog.Warningf("cannot find parent package of vendored module %s", lib.module.Path) + } else { + // Vendored modules should be commited in the parent module, so it counts as part of the + // parent module. + lib.module = newModule(parentPkg.Module) + } } } libraries = append(libraries, lib) From 5bddcd8c398fd3c9b30a59aad5cdecaaf2690106 Mon Sep 17 00:00:00 2001 From: Yuan Gong Date: Thu, 24 Feb 2022 22:31:39 +0000 Subject: [PATCH 8/8] chore: address @wlynch final comments --- csv.go | 19 +++++-------- licenses/git.go | 3 ++ licenses/library.go | 13 +++++++++ testdata/modules/cli02/licenses.csv | 36 ++++++++++++------------ testdata/modules/hello01/licenses.csv | 2 +- testdata/modules/replace04/licenses.csv | 4 +-- testdata/modules/vendored03/licenses.csv | 4 +-- 7 files changed, 46 insertions(+), 35 deletions(-) diff --git a/csv.go b/csv.go index 34bf847..0581b50 100644 --- a/csv.go +++ b/csv.go @@ -16,9 +16,8 @@ package main import ( "context" - "fmt" + "encoding/csv" "os" - "strings" "github.com/golang/glog" "github.com/google/go-licenses/licenses" @@ -43,6 +42,8 @@ func init() { } func csvMain(_ *cobra.Command, args []string) error { + writer := csv.NewWriter(os.Stdout) + classifier, err := licenses.NewClassifier(confidenceThreshold) if err != nil { return err @@ -53,8 +54,8 @@ func csvMain(_ *cobra.Command, args []string) error { return err } for _, lib := range libs { - licenseName := "Unknown" licenseURL := "Unknown" + licenseName := "Unknown" if lib.LicensePath != "" { name, _, err := classifier.Identify(lib.LicensePath) if err == nil { @@ -69,16 +70,10 @@ func csvMain(_ *cobra.Command, args []string) error { glog.Warningf("Error discovering license URL: %s", err) } } - // Using ", " to join words makes vscode/terminal recognize the - // correct license URL. Otherwise, if there's no space after - // comma, vscode interprets the URL as concatenated with the - // license name after it. - // Also, the extra spaces does not affect csv syntax much, we - // can still copy the csv text and paste into Excel / Google - // Sheets. - if _, err := fmt.Fprintln(os.Stdout, strings.Join([]string{lib.Name(), licenseURL, licenseName}, ", ")); err != nil { + if err := writer.Write([]string{lib.Name(), licenseURL, licenseName}); err != nil { return err } } - return nil + writer.Flush() + return writer.Error() } diff --git a/licenses/git.go b/licenses/git.go index b8e60b1..79ba24e 100644 --- a/licenses/git.go +++ b/licenses/git.go @@ -46,6 +46,9 @@ type GitRepo struct { // FindGitRepo finds the Git repository that contains the specified filePath // by searching upwards through the directory tree for a ".git" directory. func FindGitRepo(filePath string) (*GitRepo, error) { + // TODO(Bobgy): the "/" is used just to fix the test. git.go is not + // currently used, but I plan to bring it back to detect version of the + // main module in following up PRs. path, err := findUpwards(filepath.Dir(filePath), gitRegexp, "/", nil) if err != nil { return nil, err diff --git a/licenses/library.go b/licenses/library.go index 0cfe598..bbfd290 100644 --- a/licenses/library.go +++ b/licenses/library.go @@ -140,6 +140,19 @@ func Libraries(ctx context.Context, classifier Classifier, importPaths ...string glog.Warningf("module %s does not have dir and it's not vendored, cannot discover the license URL. Report to go-licenses developer if you see this.", lib.module.Path) } else { // This is vendored. Handle this known special case. + + // Extra note why we identify a vendored package like this. + // + // For a normal package: + // * if it's not in a module, lib.module == nil + // * if it's in a module, lib.module.Dir != "" + // Only vendored modules will have lib.module != nil && lib.module.Path != "" && lib.module.Dir == "" as far as I know. + // So the if condition above is already very strict for vendored packages. + // On top of it, we checked the lib.LicensePath contains a vendor folder in it. + // So it's rare to have a false positive for both conditions at the same time, although it may happen in theory. + // + // These assumptions may change in the future, + // so we need to keep this updated with go tooling changes. parentModDir := splits[0] var parentPkg *packages.Package for _, rootPkg := range rootPkgs { diff --git a/testdata/modules/cli02/licenses.csv b/testdata/modules/cli02/licenses.csv index 2d42d4b..0cf35cf 100644 --- a/testdata/modules/cli02/licenses.csv +++ b/testdata/modules/cli02/licenses.csv @@ -1,18 +1,18 @@ -github.com/fsnotify/fsnotify, https://github.com/fsnotify/fsnotify/blob/v1.4.9/LICENSE, BSD-3-Clause -github.com/google/go-licenses/testdata/modules/cli02, https://github.com/google/go-licenses/blob/HEAD/testdata/modules/cli02/LICENSE, Apache-2.0 -github.com/hashicorp/hcl, https://github.com/hashicorp/hcl/blob/v1.0.0/LICENSE, MPL-2.0 -github.com/magiconair/properties, https://github.com/magiconair/properties/blob/v1.8.5/LICENSE.md, BSD-2-Clause -github.com/mitchellh/go-homedir, https://github.com/mitchellh/go-homedir/blob/v1.1.0/LICENSE, MIT -github.com/mitchellh/mapstructure, https://github.com/mitchellh/mapstructure/blob/v1.4.1/LICENSE, MIT -github.com/pelletier/go-toml, https://github.com/pelletier/go-toml/blob/v1.9.3/LICENSE, Apache-2.0 -github.com/spf13/afero, https://github.com/spf13/afero/blob/v1.6.0/LICENSE.txt, Apache-2.0 -github.com/spf13/cast, https://github.com/spf13/cast/blob/v1.3.1/LICENSE, MIT -github.com/spf13/cobra, https://github.com/spf13/cobra/blob/v1.1.3/LICENSE.txt, Apache-2.0 -github.com/spf13/jwalterweatherman, https://github.com/spf13/jwalterweatherman/blob/v1.1.0/LICENSE, MIT -github.com/spf13/pflag, https://github.com/spf13/pflag/blob/v1.0.5/LICENSE, BSD-3-Clause -github.com/spf13/viper, https://github.com/spf13/viper/blob/v1.8.0/LICENSE, MIT -github.com/subosito/gotenv, https://github.com/subosito/gotenv/blob/v1.2.0/LICENSE, MIT -golang.org/x/sys, https://cs.opensource.google/go/x/sys/+/977fb726:LICENSE, BSD-3-Clause -golang.org/x/text, https://cs.opensource.google/go/x/text/+/v0.3.5:LICENSE, BSD-3-Clause -gopkg.in/ini.v1, https://github.com/go-ini/ini/blob/v1.62.0/LICENSE, Apache-2.0 -gopkg.in/yaml.v2, https://github.com/go-yaml/yaml/blob/v2.4.0/LICENSE, Apache-2.0 +github.com/fsnotify/fsnotify,https://github.com/fsnotify/fsnotify/blob/v1.4.9/LICENSE,BSD-3-Clause +github.com/google/go-licenses/testdata/modules/cli02,https://github.com/google/go-licenses/blob/HEAD/testdata/modules/cli02/LICENSE,Apache-2.0 +github.com/hashicorp/hcl,https://github.com/hashicorp/hcl/blob/v1.0.0/LICENSE,MPL-2.0 +github.com/magiconair/properties,https://github.com/magiconair/properties/blob/v1.8.5/LICENSE.md,BSD-2-Clause +github.com/mitchellh/go-homedir,https://github.com/mitchellh/go-homedir/blob/v1.1.0/LICENSE,MIT +github.com/mitchellh/mapstructure,https://github.com/mitchellh/mapstructure/blob/v1.4.1/LICENSE,MIT +github.com/pelletier/go-toml,https://github.com/pelletier/go-toml/blob/v1.9.3/LICENSE,Apache-2.0 +github.com/spf13/afero,https://github.com/spf13/afero/blob/v1.6.0/LICENSE.txt,Apache-2.0 +github.com/spf13/cast,https://github.com/spf13/cast/blob/v1.3.1/LICENSE,MIT +github.com/spf13/cobra,https://github.com/spf13/cobra/blob/v1.1.3/LICENSE.txt,Apache-2.0 +github.com/spf13/jwalterweatherman,https://github.com/spf13/jwalterweatherman/blob/v1.1.0/LICENSE,MIT +github.com/spf13/pflag,https://github.com/spf13/pflag/blob/v1.0.5/LICENSE,BSD-3-Clause +github.com/spf13/viper,https://github.com/spf13/viper/blob/v1.8.0/LICENSE,MIT +github.com/subosito/gotenv,https://github.com/subosito/gotenv/blob/v1.2.0/LICENSE,MIT +golang.org/x/sys,https://cs.opensource.google/go/x/sys/+/977fb726:LICENSE,BSD-3-Clause +golang.org/x/text,https://cs.opensource.google/go/x/text/+/v0.3.5:LICENSE,BSD-3-Clause +gopkg.in/ini.v1,https://github.com/go-ini/ini/blob/v1.62.0/LICENSE,Apache-2.0 +gopkg.in/yaml.v2,https://github.com/go-yaml/yaml/blob/v2.4.0/LICENSE,Apache-2.0 diff --git a/testdata/modules/hello01/licenses.csv b/testdata/modules/hello01/licenses.csv index 4aa9d5e..914932f 100644 --- a/testdata/modules/hello01/licenses.csv +++ b/testdata/modules/hello01/licenses.csv @@ -1 +1 @@ -github.com/google/go-licenses/testdata/modules/hello01, https://github.com/google/go-licenses/blob/HEAD/testdata/modules/hello01/LICENSE, Apache-2.0 +github.com/google/go-licenses/testdata/modules/hello01,https://github.com/google/go-licenses/blob/HEAD/testdata/modules/hello01/LICENSE,Apache-2.0 diff --git a/testdata/modules/replace04/licenses.csv b/testdata/modules/replace04/licenses.csv index 12a80a8..9c5f0db 100644 --- a/testdata/modules/replace04/licenses.csv +++ b/testdata/modules/replace04/licenses.csv @@ -1,2 +1,2 @@ -github.com/google/go-licenses/testdata/modules/replace04, https://github.com/google/go-licenses/blob/HEAD/testdata/modules/replace04/LICENSE, Apache-2.0 -github.com/mitchellh/go-homedir, https://github.com/mitchellh/go-homedir/blob/v1.0.0/LICENSE, MIT +github.com/google/go-licenses/testdata/modules/replace04,https://github.com/google/go-licenses/blob/HEAD/testdata/modules/replace04/LICENSE,Apache-2.0 +github.com/mitchellh/go-homedir,https://github.com/mitchellh/go-homedir/blob/v1.0.0/LICENSE,MIT diff --git a/testdata/modules/vendored03/licenses.csv b/testdata/modules/vendored03/licenses.csv index c2db284..a05acad 100644 --- a/testdata/modules/vendored03/licenses.csv +++ b/testdata/modules/vendored03/licenses.csv @@ -1,2 +1,2 @@ -github.com/google/go-licenses/testdata/modules/vendored03, https://github.com/google/go-licenses/blob/HEAD/testdata/modules/vendored03/LICENSE, Apache-2.0 -github.com/mitchellh/go-homedir, https://github.com/google/go-licenses/blob/HEAD/testdata/modules/vendored03/vendor/github.com/mitchellh/go-homedir/LICENSE, MIT +github.com/google/go-licenses/testdata/modules/vendored03,https://github.com/google/go-licenses/blob/HEAD/testdata/modules/vendored03/LICENSE,Apache-2.0 +github.com/mitchellh/go-homedir,https://github.com/google/go-licenses/blob/HEAD/testdata/modules/vendored03/vendor/github.com/mitchellh/go-homedir/LICENSE,MIT