Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!(v2): use go modules info #94

Merged
merged 8 commits into from
Feb 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 10 additions & 30 deletions csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import (
"context"
"encoding/csv"
"os"
"strings"

"github.com/golang/glog"
"github.com/google/go-licenses/licenses"
Expand Down Expand Up @@ -58,39 +57,20 @@ func csvMain(_ *cobra.Command, args []string) error {
licenseURL := "Unknown"
licenseName := "Unknown"
if lib.LicensePath != "" {
// Find a URL for the license file, based on the URL of a remote for the Git repository.
var errs []string
repo, err := licenses.FindGitRepo(lib.LicensePath)
if err != nil {
// Can't find Git repo (possibly a Go Module?) - derive URL from lib name instead.
lURL, err := lib.FileURL(lib.LicensePath)
if err != nil {
errs = append(errs, err.Error())
} else {
licenseURL = lURL.String()
}
name, _, err := classifier.Identify(lib.LicensePath)
if err == nil {
licenseName = name
} else {
for _, remote := range gitRemotes {
url, err := repo.FileURL(lib.LicensePath, remote)
if err != nil {
errs = append(errs, err.Error())
continue
}
licenseURL = url.String()
break
}
}
if licenseURL == "Unknown" {
glog.Errorf("Error discovering URL for %q:\n- %s", lib.LicensePath, strings.Join(errs, "\n- "))
}
licenseName, _, err = classifier.Identify(lib.LicensePath)
if err != nil {
glog.Errorf("Error identifying license in %q: %v", lib.LicensePath, err)
licenseName = "Unknown"
}
url, err := lib.FileURL(context.Background(), lib.LicensePath)
if err == nil {
licenseURL = url
} else {
glog.Warningf("Error discovering license URL: %s", err)
wlynch marked this conversation as resolved.
Show resolved Hide resolved
}
}
// Remove the "*/vendor/" prefix from the library name for conciseness.
if err := writer.Write([]string{unvendor(lib.Name()), licenseURL, licenseName}); err != nil {
if err := writer.Write([]string{lib.Name(), licenseURL, licenseName}); err != nil {
return err
}
}
Expand Down
1 change: 1 addition & 0 deletions internal/third_party/pkgsite/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ Local modifications:
pkgsite/internal/version to avoid other dependencies.
- For pkgsite/internal/source, switched to use go log package, because glog conflicts with a test
dependency that also defines the "v" flag.
- Add a SetCommit method to type ModuleInfo in ./source/source_patch.go, more rationale explained in the method's comments.
33 changes: 33 additions & 0 deletions internal/third_party/pkgsite/source/source_patch.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package source

// This file includes all local additions to source package for google/go-licenses use-cases.

// SetCommit overrides commit to a specified commit. Usually, you should pass your version to
// ModuleInfo(). However, when you do not know the version and just wants a link that points to
// a known commit/branch/tag. You can use this method to directly override the commit like
// info.SetCommit("master").
//
// Note this is different from directly passing "master" as version to ModuleInfo(), because for
// modules not at the root of a repo, there are conventions that add a module's relative dir in
// front of the version as the actual git tag. For example, for a sub module at ./submod whose
// version is v1.0.1, the actual git tag should be submod/v1.0.1.
func (i *Info) SetCommit(commit string) {
if i == nil {
return
}
i.commit = commit
}
47 changes: 21 additions & 26 deletions licenses/find.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,30 +16,34 @@ package licenses

import (
"fmt"
"go/build"
"io/ioutil"
"path/filepath"
"regexp"
"strings"
)

var (
licenseRegexp = regexp.MustCompile(`^(?i)(LICEN(S|C)E|COPYING|README|NOTICE).*$`)
srcDirRegexps = func() []*regexp.Regexp {
var rs []*regexp.Regexp
for _, s := range build.Default.SrcDirs() {
rs = append(rs, regexp.MustCompile("^"+regexp.QuoteMeta(s)+"$"))
}
return rs
}()
vendorRegexp = regexp.MustCompile(`.+/vendor(/)?$`)
)

// Find returns the file path of the license for this package.
func Find(dir string, classifier Classifier) (string, error) {
var stopAt []*regexp.Regexp
stopAt = append(stopAt, srcDirRegexps...)
stopAt = append(stopAt, vendorRegexp)
wlynch marked this conversation as resolved.
Show resolved Hide resolved
return findUpwards(dir, licenseRegexp, stopAt, func(path string) bool {
//
// dir is path of the directory where we want to find a license.
// rootDir is path of the module containing this package. Find will not search out of the
// rootDir.
func Find(dir string, rootDir string, classifier Classifier) (string, error) {
dir, err := filepath.Abs(dir)
if err != nil {
return "", err
}
rootDir, err = filepath.Abs(rootDir)
if err != nil {
return "", err
}
if !strings.HasPrefix(dir, rootDir) {
return "", fmt.Errorf("licenses.Find: rootDir %s should contain dir %s", rootDir, dir)
}
return findUpwards(dir, licenseRegexp, rootDir, func(path string) bool {
// TODO(RJPercival): Return license details
if _, _, err := classifier.Identify(path); err != nil {
return false
Expand All @@ -48,15 +52,15 @@ func Find(dir string, classifier Classifier) (string, error) {
})
}

func findUpwards(dir string, r *regexp.Regexp, stopAt []*regexp.Regexp, predicate func(path string) bool) (string, error) {
func findUpwards(dir string, r *regexp.Regexp, stopAt string, predicate func(path string) bool) (string, error) {
// Dir must be made absolute for reliable matching with stopAt regexps
dir, err := filepath.Abs(dir)
if err != nil {
return "", err
}
start := dir
// Stop once dir matches a stopAt regexp or dir is the filesystem root
for !matchAny(stopAt, dir) {
// Stop once we go out of the stopAt dir.
for strings.HasPrefix(dir, stopAt) {
dirContents, err := ioutil.ReadDir(dir)
if err != nil {
return "", err
Expand All @@ -79,12 +83,3 @@ func findUpwards(dir string, r *regexp.Regexp, stopAt []*regexp.Regexp, predicat
}
return "", fmt.Errorf("no file/directory matching regexp %q found for %s", r, start)
}

func matchAny(patterns []*regexp.Regexp, s string) bool {
for _, p := range patterns {
if p.MatchString(s) {
return true
}
}
return false
}
2 changes: 1 addition & 1 deletion licenses/find_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ func TestFind(t *testing.T) {
},
} {
t.Run(test.desc, func(t *testing.T) {
licensePath, err := Find(test.dir, classifier)
licensePath, err := Find(test.dir, "./testdata", classifier)
if err != nil || licensePath != test.wantLicensePath {
t.Fatalf("Find(%q) = (%#v, %q), want (%q, nil)", test.dir, licensePath, err, test.wantLicensePath)
}
Expand Down
5 changes: 4 additions & 1 deletion licenses/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ type GitRepo struct {
// FindGitRepo finds the Git repository that contains the specified filePath
// by searching upwards through the directory tree for a ".git" directory.
func FindGitRepo(filePath string) (*GitRepo, error) {
path, err := findUpwards(filepath.Dir(filePath), gitRegexp, srcDirRegexps, nil)
// TODO(Bobgy): the "/" is used just to fix the test. git.go is not
// currently used, but I plan to bring it back to detect version of the
// main module in following up PRs.
path, err := findUpwards(filepath.Dir(filePath), gitRegexp, "/", nil)
Bobgy marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return nil, err
}
Expand Down
131 changes: 98 additions & 33 deletions licenses/library.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,31 +18,25 @@ import (
"context"
"fmt"
"go/build"
"net/url"
"path"
"path/filepath"
"sort"
"strings"
"time"

"github.com/golang/glog"
"github.com/google/go-licenses/internal/third_party/pkgsite/source"
"golang.org/x/tools/go/packages"
)

var (
// TODO(RJPercival): Support replacing "master" with Go Module version
repoPathPrefixes = map[string]string{
"github.com": "blob/master/",
"bitbucket.org": "src/master/",
}
)

// Library is a collection of packages covered by the same license file.
type Library struct {
// LicensePath is the path of the file containing the library's license.
LicensePath string
// Packages contains import paths for Go packages in this library.
// It may not be the complete set of all packages in the library.
Packages []string
// Parent go module.
module *Module
}

// PackagesError aggregates all Packages[].Errors into a single error.
Expand All @@ -68,7 +62,7 @@ func (e PackagesError) Error() string {
func Libraries(ctx context.Context, classifier Classifier, importPaths ...string) ([]*Library, error) {
cfg := &packages.Config{
Context: ctx,
Mode: packages.NeedImports | packages.NeedDeps | packages.NeedFiles | packages.NeedName,
Mode: packages.NeedImports | packages.NeedDeps | packages.NeedFiles | packages.NeedName | packages.NeedModule,
}

rootPkgs, err := packages.Load(cfg, importPaths...)
Expand Down Expand Up @@ -103,7 +97,7 @@ func Libraries(ctx context.Context, classifier Classifier, importPaths ...string
// This package is empty - nothing to do.
return true
}
licensePath, err := Find(pkgDir, classifier)
licensePath, err := Find(pkgDir, p.Module.Dir, classifier)
if err != nil {
glog.Errorf("Failed to find license for %s: %v", p.PkgPath, err)
}
Expand All @@ -124,6 +118,7 @@ func Libraries(ctx context.Context, classifier Classifier, importPaths ...string
for _, p := range pkgs {
libraries = append(libraries, &Library{
Packages: []string{p.PkgPath},
module: newModule(p.Module),
})
}
continue
Expand All @@ -133,6 +128,47 @@ func Libraries(ctx context.Context, classifier Classifier, importPaths ...string
}
for _, pkg := range pkgs {
lib.Packages = append(lib.Packages, pkg.PkgPath)
if lib.module == nil && pkg.Module != nil {
// All the sub packages should belong to the same module.
lib.module = newModule(pkg.Module)
}
}
if lib.module != nil && lib.module.Path != "" && lib.module.Dir == "" {
// A known cause is that the module is vendored, so some information is lost.
splits := strings.SplitN(lib.LicensePath, "/vendor/", 2)
if len(splits) != 2 {
glog.Warningf("module %s does not have dir and it's not vendored, cannot discover the license URL. Report to go-licenses developer if you see this.", lib.module.Path)
} else {
// This is vendored. Handle this known special case.

// Extra note why we identify a vendored package like this.
//
// For a normal package:
// * if it's not in a module, lib.module == nil
// * if it's in a module, lib.module.Dir != ""
// Only vendored modules will have lib.module != nil && lib.module.Path != "" && lib.module.Dir == "" as far as I know.
// So the if condition above is already very strict for vendored packages.
// On top of it, we checked the lib.LicensePath contains a vendor folder in it.
// So it's rare to have a false positive for both conditions at the same time, although it may happen in theory.
//
// These assumptions may change in the future,
// so we need to keep this updated with go tooling changes.
parentModDir := splits[0]
var parentPkg *packages.Package
for _, rootPkg := range rootPkgs {
if rootPkg.Module != nil && rootPkg.Module.Dir == parentModDir {
parentPkg = rootPkg
break
}
}
if parentPkg == nil {
glog.Warningf("cannot find parent package of vendored module %s", lib.module.Path)
} else {
// Vendored modules should be commited in the parent module, so it counts as part of the
// parent module.
lib.module = newModule(parentPkg.Module)
}
}
}
libraries = append(libraries, lib)
}
Expand Down Expand Up @@ -173,35 +209,64 @@ func (l *Library) String() string {
return l.Name()
}

// FileURL attempts to determine the URL for a file in this library.
// This only works for certain supported package prefixes, such as github.com,
// bitbucket.org and googlesource.com. Prefer GitRepo.FileURL() if possible.
func (l *Library) FileURL(filePath string) (*url.URL, error) {
relFilePath, err := filepath.Rel(filepath.Dir(l.LicensePath), filePath)
if err != nil {
return nil, err
// FileURL attempts to determine the URL for a file in this library using
// go module name and version.
func (l *Library) FileURL(ctx context.Context, filePath string) (string, error) {
if l == nil {
return "", fmt.Errorf("library is nil")
}
wrap := func(err error) error {
return fmt.Errorf("getting file URL in library %s: %w", l.Name(), err)
}
m := l.module
if m == nil {
return "", wrap(fmt.Errorf("empty go module info"))
}
if m.Dir == "" {
return "", wrap(fmt.Errorf("empty go module dir"))
}
nameParts := strings.SplitN(l.Name(), "/", 4)
if len(nameParts) < 3 {
return nil, fmt.Errorf("cannot determine URL for %q package", l.Name())
client := source.NewClient(time.Second * 20)
remote, err := source.ModuleInfo(ctx, client, m.Path, m.Version)
if err != nil {
return "", wrap(err)
}
host, user, project := nameParts[0], nameParts[1], nameParts[2]
pathPrefix, ok := repoPathPrefixes[host]
if !ok {
return nil, fmt.Errorf("unsupported package host %q for %q", host, l.Name())
if m.Version == "" {
// This always happens for the module in development.
// Note#1 if we pass version=HEAD to source.ModuleInfo, github tag for modules not at the root
// of the repo will be incorrect, because there's a convention that:
// * I have a module at github.com/google/go-licenses/submod.
// * The module is of version v1.0.0.
// Then the github tag should be submod/v1.0.0.
// In our case, if we pass HEAD as version, the result commit will be submod/HEAD which is incorrect.
// Therefore, to workaround this problem, we directly set the commit after getting module info.
//
// Note#2 repos have different branches as default, some use the
// master branch and some use the main branch. However, HEAD
// always refers to the default branch, so it's better than
// both of master/main when we do not know which branch is default.
// Examples:
// * https://github.com/google/go-licenses/blob/HEAD/LICENSE
// points to latest commit of master branch.
// * https://github.com/google/licenseclassifier/blob/HEAD/LICENSE
// points to latest commit of main branch.
remote.SetCommit("HEAD")
glog.Warningf("module %s has empty version, defaults to HEAD. The license URL may be incorrect. Please verify!", m.Path)
}
if len(nameParts) == 4 {
pathPrefix = path.Join(pathPrefix, nameParts[3])
relativePath, err := filepath.Rel(m.Dir, filePath)
if err != nil {
return "", wrap(err)
}
return &url.URL{
Scheme: "https",
Host: host,
Path: path.Join(user, project, pathPrefix, relFilePath),
}, nil
// TODO: there are still rare cases this may result in an incorrect URL.
// https://github.com/google/go-licenses/issues/73#issuecomment-1005587408
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be worthwhile to document examples of the types of URLs that are problematic (I didn't get a clear sense what "major branch conventions" meant from the comment)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, let me expand on the comment and leave a TODO in v2 roadmap to document known caveats.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated in #73 (comment), note I have already built workarounds for these special cases in follow up PRs.

return remote.FileURL(relativePath), nil
}

// isStdLib returns true if this package is part of the Go standard library.
func isStdLib(pkg *packages.Package) bool {
if pkg.Name == "unsafe" {
// Special case unsafe stdlib, because it does not contain go files.
return true
}
if len(pkg.GoFiles) == 0 {
return false
}
Expand Down
Loading