diff --git a/.github/README.md b/.github/README.md index 26e590ba..850d65b1 100644 --- a/.github/README.md +++ b/.github/README.md @@ -21,17 +21,17 @@ OpenSCA is intended for scanning the third-party component dependencies and vuln OpenSCA is now capable of parsing configuration files in the listed programming languages and correspondent package managers. The project team is now dedicated to introducing more languages and enriching the parsing of relevant configuration files gradually. -| LANGUAGE | PACKAGE MANAGER | FILE | -| ------------ | --------------- | ---------------------------------------------- | -| `Java` | `Maven` | `pom.xml` | -| `Java` | `Gradle` | `.gradle` `.gradle.kts` | -| `JavaScript` | `Npm` | `package-lock.json` `package.json` `yarn.lock` | -| `PHP` | `Composer` | `composer.json` `composer.lock` | -| `Ruby` | `gem` | `gemfile.lock` | -| `Golang` | `gomod` | `go.mod` `go.sum` | -| `Rust` | `cargo` | `Cargo.lock` | -| `Erlang` | `Rebar` | `rebar.lock` | -| `Python` | `Pip` | `Pipfile` `Pipfile.lock` `setup.py` | +| LANGUAGE | PACKAGE MANAGER | FILE | +| ------------ | --------------- | ------------------------------------------------------------ | +| `Java` | `Maven` | `pom.xml` | +| `Java` | `Gradle` | `.gradle` `.gradle.kts` | +| `JavaScript` | `Npm` | `package-lock.json` `package.json` `yarn.lock` | +| `PHP` | `Composer` | `composer.json` `composer.lock` | +| `Ruby` | `gem` | `gemfile.lock` | +| `Golang` | `gomod` | `go.mod` `go.sum` | +| `Rust` | `cargo` | `Cargo.lock` | +| `Erlang` | `Rebar` | `rebar.lock` | +| `Python` | `Pip` | `Pipfile` `Pipfile.lock` `setup.py``requirements.txt``requirements.in`(For the latter two, you need to install pipenv in advance) | ## Download and Deployment diff --git a/README.md b/README.md index 16a429c9..16c22ac9 100644 --- a/README.md +++ b/README.md @@ -17,17 +17,17 @@ `OpenSCA`现已支持以下编程语言相关的配置文件解析及对应的包管理器,后续会逐步支持更多的编程语言,丰富相关配置文件的解析。 -| 支持语言 | 包管理器 | 解析文件 | -| ------------ | ---------- | ---------------------------------------------- | -| `Java` | `Maven` | `pom.xml` | -| `Java` | `Gradle` | `.gradle` `.gradle.kts` | -| `JavaScript` | `Npm` | `package-lock.json` `package.json` `yarn.lock` | -| `PHP` | `Composer` | `composer.json` `composer.lock` | -| `Ruby` | `gem` | `gemfile.lock` | -| `Golang` | `gomod` | `go.mod` `go.sum` | -| `Rust` | `cargo` | `Cargo.lock` | -| `Erlang` | `Rebar` | `rebar.lock` | -| `Python` | `Pip` | `Pipfile` `Pipfile.lock` `setup.py` | +| 支持语言 | 包管理器 | 解析文件 | +| ------------ | ---------- | ------------------------------------------------------------ | +| `Java` | `Maven` | `pom.xml` | +| `Java` | `Gradle` | `.gradle` `.gradle.kts` | +| `JavaScript` | `Npm` | `package-lock.json` `package.json` `yarn.lock` | +| `PHP` | `Composer` | `composer.json` `composer.lock` | +| `Ruby` | `gem` | `gemfile.lock` | +| `Golang` | `gomod` | `go.mod` `go.sum` | +| `Rust` | `cargo` | `Cargo.lock` | +| `Erlang` | `Rebar` | `rebar.lock` | +| `Python` | `Pip` | `Pipfile` `Pipfile.lock` `setup.py``requirements.txt``requirements.in`(后两者需要pipenv环境,需要联网) | ## 下载安装 @@ -75,18 +75,18 @@ opensca-cli -db db.json -path ${project_path} **可在配置文件中配置参数,也可在命令行输入参数,两者冲突时优先使用输入参数** -| 参数 | 类型 | 描述 | 使用样例 | -| ---------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------- | -| `config` | `string` | 指定配置文件路径,程序启动时将配置文件中的参数作为启动参数,配置参数与命令行输入参数冲突时优先使用输入参数 | `-config config.json` | -| `path` | `string` | 指定要检测的文件或目录路径 | `-path ./foo` | -| `url` | `string` | 从云漏洞库查询漏洞,指定要连接云服务的地址,与 `token` 参数一起使用 | `-url https://opensca.xmirror.cn` | -| `token` | `string` | 云服务验证 `token`,需要在云服务平台申请,与 `url` 参数一起使用 | `-token xxxxxxx` | -| `cache` | `bool` | 建议开启,缓存下载的文件(例如 `.pom` 文件),重复检测相同组件时会节省时间,下载的文件会保存到工具所在目录的.cache 目录下 | `-cache` | -| `vuln` | `bool` | 结果仅保留有漏洞信息的组件,使用该参数将不会保留组件层级结构 | `-vuln` | -| `out` | `string` | 将检测结果保存到指定文件,根据后缀生成不同格式的文件,默认为 `json` 格式;;支持以`spdx`格式展示`sbom`清单只需更换相应输出文件后缀即可 | `-out output.json` | +| 参数 | 类型 | 描述 | 使用样例 | +| ---------- | -------- | ------------------------------------------------------------ | --------------------------------- | +| `config` | `string` | 指定配置文件路径,程序启动时将配置文件中的参数作为启动参数,配置参数与命令行输入参数冲突时优先使用输入参数 | `-config config.json` | +| `path` | `string` | 指定要检测的文件或目录路径 | `-path ./foo` | +| `url` | `string` | 从云漏洞库查询漏洞,指定要连接云服务的地址,与 `token` 参数一起使用 | `-url https://opensca.xmirror.cn` | +| `token` | `string` | 云服务验证 `token`,需要在云服务平台申请,与 `url` 参数一起使用 | `-token xxxxxxx` | +| `cache` | `bool` | 建议开启,缓存下载的文件(例如 `.pom` 文件),重复检测相同组件时会节省时间,下载的文件会保存到工具所在目录的.cache 目录下 | `-cache` | +| `vuln` | `bool` | 结果仅保留有漏洞信息的组件,使用该参数将不会保留组件层级结构 | `-vuln` | +| `out` | `string` | 将检测结果保存到指定文件,根据后缀生成不同格式的文件,默认为 `json` 格式;支持以`spdx`格式展示`sbom`清单只需更换相应输出文件后缀即可 | `-out output.json` | | `db` | `string` | 指定本地漏洞库文件,希望使用自己漏洞库时可用,漏洞库文件为 `json` 格式,具体格式会在之后给出;若同时使用云端漏洞库与本地漏洞库,漏洞查询结果取并集 | `-db db.json` | -| `progress` | `bool` | 显示进度条 | `-progress` | -| `dedup` | `bool` | 相同组件去重 | `-dedup` | +| `progress` | `bool` | 显示进度条 | `-progress` | +| `dedup` | `bool` | 相同组件去重 | `-dedup` | --- diff --git a/analyzer/engine/engine.go b/analyzer/engine/engine.go index 4a065050..4f6c7d48 100644 --- a/analyzer/engine/engine.go +++ b/analyzer/engine/engine.go @@ -56,8 +56,9 @@ func (e Engine) ParseFile(filepath string) (depRoot *model.DepTree, taskInfo rep // 目录树 dirRoot := model.NewDirTree() depRoot = model.NewDepTree(nil) + filepath = strings.ReplaceAll(filepath, `\`, `/`) taskInfo = report.TaskInfo{ - AppName: filepath, + AppName: strings.TrimSuffix(path.Base(filepath), path.Ext(path.Base(filepath))), StartTime: time.Now().Format("2006-01-02 15:04:05"), } s := time.Now() diff --git a/analyzer/java/ext.go b/analyzer/java/ext.go index 2afd04fa..8a6a5d35 100644 --- a/analyzer/java/ext.go +++ b/analyzer/java/ext.go @@ -3,158 +3,174 @@ * @Date: 2021-12-16 10:10:13 */ -package java + package java -import ( - "bytes" - "fmt" - "io/ioutil" - "net/http" - "os" - "os/exec" - "regexp" - "strings" - "util/cache" - "util/enum/language" - "util/logs" - "util/model" - "util/temp" - - "github.com/pkg/errors" -) - -// MvnDepTree 调用mvn解析项目获取依赖树 -func MvnDepTree(path string, root *model.DepTree) { - pwd := temp.GetPwd() - os.Chdir(path) - cmd := exec.Command("mvn", "dependency:tree", "--fail-never") - out, _ := cmd.CombinedOutput() - os.Chdir(pwd) - // 统一替换换行符为\n - out = bytes.ReplaceAll(out, []byte("\r\n"), []byte("\n")) - out = bytes.ReplaceAll(out, []byte("\n\r"), []byte("\n")) - out = bytes.ReplaceAll(out, []byte("\r"), []byte("\n")) - // 获取mvn解析内容 - lines := strings.Split(string(out), "\n") - for i := range lines { - lines[i] = strings.TrimPrefix(lines[i], "[INFO] ") - } - // 捕获依赖树起始位置 - title := regexp.MustCompile(`--- [^\n]+ ---`) - // 记录依赖树起始位置行号 - start := 0 - // 标记是否在依赖范围内树 - tree := false - root.Direct = true - // 获取mvn依赖树 - for i, line := range lines { - if title.MatchString(line) { - tree = true - start = i - continue - } - if tree && strings.Trim(line, "-") == "" { - tree = false - buildMvnDepTree(root, lines[start+1:i]) - for _, c := range root.Children { - c.Direct = true - } - continue - } - } - return -} - -// buildMvnDepTree 构建mvn树 -func buildMvnDepTree(root *model.DepTree, lines []string) { - // 记录当前的顶点节点列表 - tops := []*model.DepTree{root} - // 上一层级 - lastLevel := -1 - for _, line := range lines { - // 计算层级 - level := 0 - for line[level*3+2] == ' ' { - level++ - } - tops = tops[:len(tops)-lastLevel+level-1] - root = tops[len(tops)-1] - tags := strings.Split(line[level*3:], ":") - if len(tags) < 4 { - logs.Error(errors.New("mvn parse error")) - break - } - dep := model.NewDepTree(root) - dep.Vendor = tags[0] - dep.Name = tags[1] - dep.Version = model.NewVersion(tags[3]) - dep.Language = language.Java - tops = append(tops, dep) - lastLevel = level - } -} - -// downloadPom 下载pom文件 -func downloadPom(dep model.Dependency, repos ...string) (data []byte, err error) { - if repos == nil { - repos = []string{} - } - tags := strings.Split(dep.Vendor, ".") - tags = append(tags, dep.Name) - tags = append(tags, dep.Version.Org) - tags = append(tags, fmt.Sprintf("%s-%s.pom", dep.Name, dep.Version.Org)) - // 遍历仓库地址, 默认maven仓库 - for i, repo := range append(repos, `https://repo.maven.apache.org/maven2/`) { - // 是否是最后一个仓库(默认的maven仓库) - last := i == len(repos) - // 拼接完整的pom下载地址 - url := repo + strings.Join(tags, "/") - if rep, err := http.Get(url); err != nil { - if last { - return nil, err - } else { - continue - } - } else { - defer rep.Body.Close() - if rep.StatusCode != 200 { - if last { - return ioutil.ReadAll(rep.Body) - } else { - continue - } - } else { - return ioutil.ReadAll(rep.Body) - } - } - } - // 应该走不到这里 - return nil, fmt.Errorf("download failure") -} - -// getpom is get pom from index -func getpom(groupId, artifactId, version string) (p *Pom) { - p = &Pom{Properties: PomProperties{}} - if groupId == "" || artifactId == "" || version == "" { - return nil - } - dep := model.Dependency{ - Vendor: groupId, - Name: artifactId, - Version: model.NewVersion(version), - } - data := cache.LoadCache(dep) - if len(data) != 0 { - return ReadPom(data) - } else { - // 无本地缓存下载pom文件 - if data, err := downloadPom(dep); err == nil { - // 保存pom文件 - cache.SaveCache(dep, data) - return ReadPom(data) - } else { - logs.Warn(err) - } - } - return nil -} + import ( + "bytes" + "crypto/tls" + "fmt" + "io/ioutil" + "net/http" + "os" + "os/exec" + "regexp" + "strings" + "time" + "util/args" + "util/cache" + "util/enum/language" + "util/logs" + "util/model" + "util/temp" + + "github.com/pkg/errors" + ) + + // MvnDepTree 调用mvn解析项目获取依赖树 + func MvnDepTree(path string, root *model.DepTree) { + pwd := temp.GetPwd() + os.Chdir(path) + cmd := exec.Command("mvn", "dependency:tree", "--fail-never") + out, _ := cmd.CombinedOutput() + os.Chdir(pwd) + // 统一替换换行符为\n + out = bytes.ReplaceAll(out, []byte("\r\n"), []byte("\n")) + out = bytes.ReplaceAll(out, []byte("\n\r"), []byte("\n")) + out = bytes.ReplaceAll(out, []byte("\r"), []byte("\n")) + // 获取mvn解析内容 + lines := strings.Split(string(out), "\n") + for i := range lines { + lines[i] = strings.TrimPrefix(lines[i], "[INFO] ") + } + // 捕获依赖树起始位置 + title := regexp.MustCompile(`--- [^\n]+ ---`) + // 记录依赖树起始位置行号 + start := 0 + // 标记是否在依赖范围内树 + tree := false + root.Direct = true + // 获取mvn依赖树 + for i, line := range lines { + if title.MatchString(line) { + tree = true + start = i + continue + } + if tree && strings.Trim(line, "-") == "" { + tree = false + buildMvnDepTree(root, lines[start+1:i]) + for _, c := range root.Children { + c.Direct = true + } + continue + } + } + return + } + + // buildMvnDepTree 构建mvn树 + func buildMvnDepTree(root *model.DepTree, lines []string) { + // 记录当前的顶点节点列表 + tops := []*model.DepTree{root} + // 上一层级 + lastLevel := -1 + for _, line := range lines { + // 计算层级 + level := 0 + for line[level*3+2] == ' ' { + level++ + } + tops = tops[:len(tops)-lastLevel+level-1] + root = tops[len(tops)-1] + tags := strings.Split(line[level*3:], ":") + if len(tags) < 4 { + logs.Error(errors.New("mvn parse error")) + break + } + dep := model.NewDepTree(root) + dep.Vendor = tags[0] + dep.Name = tags[1] + dep.Version = model.NewVersion(tags[3]) + dep.Language = language.Java + tops = append(tops, dep) + lastLevel = level + } + } + + // downloadPom 下载pom文件 + func downloadPom(dep model.Dependency) (data []byte, err error) { + tags := strings.Split(dep.Vendor, ".") + tags = append(tags, dep.Name) + tags = append(tags, dep.Version.Org) + tags = append(tags, fmt.Sprintf("%s-%s.pom", dep.Name, dep.Version.Org)) + // 先扫描指定仓库 + for _, m := range args.Config.Maven { + url := strings.TrimSuffix(m.Repo, `/`) + `/` + url = url + strings.Join(tags, "/") + name := m.User + password := m.Password + data, err = getFromRepo(url, name, password) + if data == nil { + continue + } + return + } + // 指定仓库都没有就去官方仓库查询 + d := `https://repo.maven.apache.org/maven2/` + url := d + strings.Join(tags, "/") + if rep, err := http.Get(url); err != nil { + return nil, err + } else { + defer rep.Body.Close() + if rep.StatusCode == 200 { + return ioutil.ReadAll(rep.Body) + } + } + // 应该走不到这里 + return nil, fmt.Errorf("download failure") + } + + // 从私服库获取pom文件 + func getFromRepo(url string, name string, password string) (data []byte, err error) { + c := http.Client{Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}, Timeout: time.Duration(1 * time.Second)} + resp, err := c.Get(url) + if err != nil { + return nil, err + } else { + resp.Request.SetBasicAuth(name, password) + defer resp.Body.Close() + logs.Debug(fmt.Sprintf("status code: %d url: %s", resp.StatusCode, url)) + if resp.StatusCode == 200 { + return ioutil.ReadAll(resp.Body) + } + } + return nil, fmt.Errorf("download from repository failure") + } + + // getpom is get pom from index + func getpom(groupId, artifactId, version string) (p *Pom) { + p = &Pom{Properties: PomProperties{}} + if groupId == "" || artifactId == "" || version == "" { + return nil + } + dep := model.Dependency{ + Vendor: groupId, + Name: artifactId, + Version: model.NewVersion(version), + } + data := cache.LoadCache(dep) + if len(data) != 0 { + return ReadPom(data) + } else { + // 无本地缓存下载pom文件 + if data, err := downloadPom(dep); err == nil { + // 保存pom文件 + cache.SaveCache(dep, data) + return ReadPom(data) + } else { + logs.Warn(err) + } + } + return nil + } + \ No newline at end of file diff --git a/analyzer/python/analyzer.go b/analyzer/python/analyzer.go index 047d3528..807dacbb 100644 --- a/analyzer/python/analyzer.go +++ b/analyzer/python/analyzer.go @@ -22,7 +22,9 @@ func (Analyzer) GetLanguage() language.Type { func (Analyzer) CheckFile(filename string) bool { return filter.PythonSetup(filename) || filter.PythonPipfile(filename) || - filter.PythonPipfileLock(filename) + filter.PythonPipfileLock(filename) || + filter.PythonRequirementsTxt(filename) || + filter.PythonRequirementsIn(filename) } // ParseFiles parse dependency from file @@ -37,6 +39,8 @@ func (Analyzer) ParseFiles(files []*model.FileInfo) []*model.DepTree { parsePipfile(dep, f) } else if filter.PythonPipfileLock(f.Name) { parsePipfileLock(dep, f) + } else if filter.PythonRequirementsTxt(f.Name) || filter.PythonRequirementsIn(f.Name) { + parseRequirementsin(dep, f) } deps = append(deps, dep) } diff --git a/analyzer/python/pipfile.go b/analyzer/python/pipfile.go index 71575227..a2b3a5b7 100644 --- a/analyzer/python/pipfile.go +++ b/analyzer/python/pipfile.go @@ -2,7 +2,6 @@ package python import ( "encoding/json" - "strings" "util/logs" "util/model" @@ -21,12 +20,12 @@ func parsePipfile(root *model.DepTree, file *model.FileInfo) { for name, version := range pip.Packages { dep := model.NewDepTree(root) dep.Name = name - dep.Version = model.NewVersion(formatVer(version)) + dep.Version = model.NewVersion(version) } for name, version := range pip.DevPackages { dep := model.NewDepTree(root) dep.Name = name - dep.Version = model.NewVersion(formatVer(version)) + dep.Version = model.NewVersion(version) } } @@ -50,17 +49,7 @@ func parsePipfileLock(root *model.DepTree, file *model.FileInfo) { if v != "" { dep := model.NewDepTree(root) dep.Name = n - dep.Version = model.NewVersion(formatVer(v)) + dep.Version = model.NewVersion(v) } } - return -} - -// 后续使用其他办法确定版本号 -func formatVer(v string) string { - res := strings.ReplaceAll(v, "==", "") - res = strings.ReplaceAll(res, "~=", "") - res = strings.ReplaceAll(res, ">=", "") - res = strings.ReplaceAll(res, "<=", "") - return res } diff --git a/analyzer/python/req.go b/analyzer/python/req.go new file mode 100644 index 00000000..87b7f23e --- /dev/null +++ b/analyzer/python/req.go @@ -0,0 +1,183 @@ +package python + +import ( + "fmt" + "os" + "path" + "regexp" + "sort" + "strings" + "util/bar" + "util/ex" + "util/logs" + "util/model" + "util/temp" +) + +var reg1 *regexp.Regexp +var regGit *regexp.Regexp +var replacer *strings.Replacer + +func init() { + reg1 = regexp.MustCompile(`^\w`) + regGit = regexp.MustCompile(`\/([\w-]+)\.git`) + replacer = strings.NewReplacer("# via","","\r",""," ","","#","") +} + +func parseRequirementsin(root *model.DepTree, file *model.FileInfo) { + // 检查python环境 + if _, err := ex.CheckPython(ex.Python); err != nil { + return + } + strArry := []string{} + temp.DoInTempDir(func(tempdir string) { + // 安装piptools + if _, err := ex.Do(ex.PipinstallPiptoos, tempdir); err != nil { + logs.Error(err) + return + } + // 删除虚拟环境 + defer ex.Do(ex.RemoveVirtualCmd, tempdir) + // 获取输出数据 + strArry = getOutData(file, tempdir) + }) + // 解析输出数据构建依赖树 + parseOutData(root, strArry) +} + +// 解析各组件所打印的信息 +func parseOutData(root *model.DepTree, strs []string) { + // 直接依赖 + directMap := map[string]*model.DepTree{} + childMap := map[*model.DepTree]map[string]struct{}{} + for _, str := range strs { + lines := strings.Split(str, "\n") + for i, line := range lines { + if reg1.MatchString(line) { + lines = lines[i:] + break + } + } + // parentsMap一个组件名对应其所有父组件名 + var parentsMap = make(map[string][]string) + cur := model.NewDepTree(nil) + nodes := []string{} + depMap := map[string]*model.DepTree{} + for _, line := range lines { + if strings.Contains(line, "==") { + // 在输出内容"=="符号左右对应名字与版本号 + cur = model.NewDepTree(nil) + line = strings.TrimSuffix(line, "\r") + nv := strings.Split(line, `==`) + if len(nv) == 2 { + cur.Name = strings.TrimSpace(nv[0]) + cur.Version = model.NewVersion(strings.TrimSpace(nv[1])) + depMap[cur.Name] = cur + m := make(map[string]struct{}) + childMap[cur] = m + nodes = append(nodes, cur.Name) + } + } else if strings.Contains(line, "#") { + // "#"符号后有父组件名字信息 + line = replacer.Replace(line) + if line == "" { + continue + } + parentsMap[cur.Name] = append(parentsMap[cur.Name], line) + } + } + depMap[cur.Name] = cur + nodes = append(nodes, cur.Name) + for _, name := range nodes { + if _,ok := depMap[name]; !ok { + continue + } + parNames := parentsMap[name] + for _, parName := range parNames { + if len(parNames) == 1 && strings.Contains(parName, "requirements") { + if dep, ok := depMap[name]; ok { + directMap[dep.Name] = dep + } + } + if _,ok := depMap[parName]; !ok { + continue + } + parent := depMap[parName] + dep := depMap[name] + if m,ok := childMap[dep]; ok { + if _,ok := m[dep.Name];ok { + continue + } + m[dep.Name] = struct{}{} + } + parent.Children = append(parent.Children, dep) + dep.Parent = parent + } + } + } + withRoot(root,directMap) +} + +// 所有直接依赖连接至root +func withRoot(root *model.DepTree,directMap map[string]*model.DepTree) { + direct := []*model.DepTree{} + for _, n := range directMap { + direct = append(direct, n) + } + sort.Slice(direct, func(i, j int) bool { + return direct[i].Name < direct[j].Name + }) + for _, d := range direct { + root.Children = append(root.Children, d) + d.Parent = root + } +} + +// 获取打印数据 +func getOutData(file *model.FileInfo, dir string) []string { + s := string(file.Data) + strList := []string{} + reqpath := path.Join(dir, `requirements.in`) + out, err := os.Create(reqpath) + if err != nil { + logs.Error(err) + return strList + } + out.Close() + for _, v := range strings.Split(s, "\n") { + // 少部分情况会有git连接 + if regGit.MatchString(v) { + res := regGit.FindStringSubmatch(v) + if len(res) == 2 { + bar.PipCompile.Add(1) + strList = append(strList, getSingleModStr(reqpath, res[1])) + continue + } + } + // 一般情况下字母开头的行内容都是组件名 + if reg1.MatchString(v) { + bar.PipCompile.Add(1) + strList = append(strList, getSingleModStr(reqpath, v)) + } + } + return strList +} + +// 将组件名与版本号写入requirements.in文件单独调用pip-compile,获取打印数据 +func getSingleModStr(reqpath string, elem string) string { + f, err := os.OpenFile(reqpath, os.O_CREATE, 0744) + if err != nil { + return "" + } + f.Seek(0, 0) + f.Truncate(0) + f.WriteString(elem) + f.Close() + if str, err := ex.Do(ex.PipCompilein, path.Dir(reqpath)); err != nil { + logs.Error(err) + logs.Error(fmt.Errorf("get info err:%s", elem)) + return "" + } else { + return str + } +} \ No newline at end of file diff --git a/analyzer/python/setup.go b/analyzer/python/setup.go index 4f79b561..e48dc73f 100644 --- a/analyzer/python/setup.go +++ b/analyzer/python/setup.go @@ -56,7 +56,7 @@ func parseSetup(root *model.DepTree, file *model.FileInfo) { logs.Warn(err) } root.Name = dep.Name - root.Version = model.NewVersion(formatVer(dep.Version)) + root.Version = model.NewVersion(dep.Version) root.Licenses = append(root.Licenses, dep.License) for _, pkg := range [][]string{dep.Packages, dep.InstallRequires, dep.Requires} { for _, p := range pkg { @@ -64,12 +64,11 @@ func parseSetup(root *model.DepTree, file *model.FileInfo) { sub := model.NewDepTree(root) if index > -1 { sub.Name = p[:index] - sub.Version = model.NewVersion(formatVer(p[index:])) + sub.Version = model.NewVersion(p[index:]) } else { sub.Name = p } } } }) - return } diff --git a/config.json b/config.json index 63fc6672..aae66d54 100644 --- a/config.json +++ b/config.json @@ -7,5 +7,12 @@ "cache": true, "vuln": false, "progress": true, - "dedup": true -} \ No newline at end of file + "dedup": true, + "maven":[ + { + "repo":"", + "user":"", + "password":"" + } + ] +} diff --git a/util/args/args.go b/util/args/args.go index d0f54063..1348a64a 100644 --- a/util/args/args.go +++ b/util/args/args.go @@ -30,9 +30,17 @@ var ( Token string `json:"token"` // local vuldb VulnDB string `json:"db"` + // prvate repository + Maven []repos `json:"maven"` }{} ) +type repos struct { + Repo string `json:"repo"` + User string `json:"user"` + Password string `json:"password"` +} + func init() { flag.StringVar(&ConfigPath, "config", "", "(可选) 指定配置文件路径,指定后启动程序时将默认使用配置参数,配置参数与命令行输入参数冲突时优先使用输入参数") flag.StringVar(&Config.Path, "path", Config.Path, "(必须) 指定要检测的文件或目录路径,例: -path ./foo 或 -path ./foo.zip") diff --git a/util/bar/bar.go b/util/bar/bar.go index 9085e4ee..13df825b 100644 --- a/util/bar/bar.go +++ b/util/bar/bar.go @@ -18,6 +18,7 @@ var ( Npm *Bar = newBar("parse npm indirect dependency") Composer *Bar = newBar("parse composer indirect dependency") Dependency *Bar = newBar("parse project dependency") + PipCompile *Bar = newBar("parse python module") ) // mult pargress bar diff --git a/util/ex/python.go b/util/ex/python.go new file mode 100644 index 00000000..797a9b4d --- /dev/null +++ b/util/ex/python.go @@ -0,0 +1,80 @@ +package ex + +import ( + "os/exec" + "strings" + "util/logs" + + "github.com/axgle/mahonia" +) + +const ( + Python string = "python" + PipinstallPiptoos string = "pipenv install pip-tools --skip-lock" + PipCompilein string = "pipenv run pip-compile requirements.in" + PipCompileCfg string = "pipenv run pip-compile setup.cfg -o temp.txt" + PipcompileSetup string = "pipenv run pip-compile setup.py" + RemoveVirtualCmd string = "pipenv --rm" +) + +type CmdOpts struct { + Name string + Args []string + Dir string +} + +func Do(c string, dir string) (out string, err error) { + cmd := GetCmdOpts(c, dir).BuildCmd() + out, err = Excute(cmd) + if err != nil { + return + } + return +} + +func CheckPython(py string) (s string, err error) { + s, err = exec.LookPath(py) + if err != nil { + logs.Error(err) + } + return +} + +func GetCmdOpts(c string, dir string) *CmdOpts { + list := strings.Fields(string(c)) + if len(list) <= 1 { + return &CmdOpts{} + } + return &CmdOpts{ + Name: list[0], + Args: list[1:], + Dir: dir, + } +} + +func (c *CmdOpts) BuildCmd() (ec *exec.Cmd) { + ec = exec.Command(c.Name, c.Args...) + ec.Dir = c.Dir + return +} + +// 执行 +func Excute(cmd *exec.Cmd) (s string, err error) { + stdoutStderr, err := cmd.CombinedOutput() + if err != nil { + logs.Error(err) + return + } + s = Convert(string(stdoutStderr), "gbk", "utf-8") + return +} + +// 编码转换 +func Convert(s string, source string, target string) string { + srcCoder := mahonia.NewDecoder(source) + res := srcCoder.ConvertString(s) + t := mahonia.NewDecoder(target) + _, cdata, _ := t.Translate([]byte(res), true) + result := string(cdata) + return result +} diff --git a/util/filter/file.go b/util/filter/file.go index 5c0c9aa0..51608361 100644 --- a/util/filter/file.go +++ b/util/filter/file.go @@ -91,10 +91,15 @@ var ( // python var ( - PythonSetup = filterFunc(strings.HasSuffix, "setup.py") - PythonPipfile = filterFunc(strings.HasSuffix, "Pipfile") - PythonPipfileLock = filterFunc(strings.HasSuffix, "Pipfile.lock") - PythonRequirements = filterFunc(strings.HasSuffix, "requirements.txt") + PythonSetup = filterFunc(strings.HasSuffix, "setup.py") + PythonPipfile = filterFunc(strings.HasSuffix, "Pipfile") + PythonPipfileLock = filterFunc(strings.HasSuffix, "Pipfile.lock") + PythonRequirementsTxt = func(filename string) bool { + return filterFunc(strings.HasSuffix, ".txt")(filename) && + filterFunc(strings.Contains, "requirements")(path.Base(filename)) && !filterFunc(strings.Contains, "test")(path.Base(filename)) + } + PythonRequirementsIn = filterFunc(strings.HasSuffix, "requirements.in") + // PythonSetupCfg = filterFunc(strings.HasSuffix, "setup.cfg") ) // 用于筛选可能有copyright信息的文件 diff --git a/util/model/version.go b/util/model/version.go index 01df4e2a..b92ed546 100644 --- a/util/model/version.go +++ b/util/model/version.go @@ -3,156 +3,312 @@ * @Date: 2021-11-03 16:03:06 */ -package model + package model -import ( - "strconv" - "strings" -) - -// Version 组件依赖版本号 -type Version struct { - Org string `json:"org"` - Nums []int `json:"nums,omitempty"` - Suffix string `json:"suffix,omitempty"` -} - -// weight 获取当前版本的后缀权重 -func (ver *Version) weight() (weight int) { - if len(ver.Suffix) > 0 { - // 后缀权重 - suffixs := map[string]int{"alpha": 1, "beta": 2, "milestone": 3, "rc": 4, "cr": 4, "snapshot": 5, "release": 6, "final": 6, "ga": 6, "sp": 7} - if w, ok := suffixs[ver.Suffix]; ok { - // 后缀在后缀列表中取对应后缀权重 - weight = w - } else { - // 后缀不在后缀列表中 - weight = 8 - } - } else { - // 不存在后缀 - weight = 6 - } - return weight -} - -// NewVersion 解析版本字符串 -func NewVersion(verStr string) *Version { - verStr = strings.TrimSpace(verStr) - ver := &Version{Nums: []int{}, Org: verStr} - verStr = strings.TrimLeft(verStr, "vV^~=<>") - // 获取后缀 - index := strings.Index(verStr, "-") - if index != -1 { - ver.Suffix = verStr[index+1:] - verStr = verStr[:index] - } - // 解析版本号 - tags := strings.Split(verStr, ".") - for i, numStr := range tags { - if num, err := strconv.Atoi(numStr); err == nil { - ver.Nums = append(ver.Nums, num) - } else { - ver.Suffix = strings.Join(tags[i:], ".") - break - } - } - // 去除结尾零值 - for len(ver.Nums) > 1 { - length := len(ver.Nums) - if ver.Nums[length-1] == 0 { - ver.Nums = ver.Nums[:length-1] - } else { - break - } - } - return ver -} - -// Less 判断是否严格小于另一个版本号 -func (ver *Version) Less(other *Version) bool { - length := len(ver.Nums) - if length > len(other.Nums) { - length = len(other.Nums) - } - // 比较数字大小 - for i := 0; i < length; i++ { - if ver.Nums[i] < other.Nums[i] { - return true - } else if ver.Nums[i] > other.Nums[i] { - return false - } - } - // 数字多时查看是否有非零值 - if len(ver.Nums) < len(other.Nums) { - for i := len(other.Nums) - 1; i >= len(ver.Nums); i-- { - if other.Nums[i] != 0 { - return true - } - } - } - // 比较后缀 - vw, ow := ver.weight(), other.weight() - if vw == ow { - return ver.Suffix < other.Suffix - } else { - return vw < ow - } -} - -// Equal 判断是否等于另一个版本号 -func (ver *Version) Equal(other *Version) bool { - if len(ver.Nums) != len(other.Nums) { - return false - } - // 比较数字大小 - for i, n := range ver.Nums { - if other.Nums[i] != n { - return false - } - } - // 比较后缀 - vw, ow := ver.weight(), other.weight() - return vw == ow -} - -// InRangeInterval 判断一个版本是否在一个版本区间内 -func InRangeInterval(ver *Version, interval string) bool { - // 当前版本 - // 遍历所有区间 - for _, interval := range strings.Split(interval, "||") { - if len(interval) < 2 { - continue - } - // 判断左边界是否为闭 - left := interval[0] == '[' - // 判断右边界是否为闭 - right := interval[len(interval)-1] == ']' - // 逗号所在位置 - index := strings.Index(interval, ",") - if index == -1 { - return false - } - // 区间左值 - leftValue := NewVersion(interval[1:index]) - // 区间右值 - rightValue := NewVersion(interval[index+1 : len(interval)-1]) - // 判断是否在区间边界 - if (left && ver.Equal(leftValue)) || (right && ver.Equal(rightValue)) { - return true - } - // 判断是否在区间内部 - // 大于左值并(右值为空或小于右值) - // leftValue < version && ( isempty(rightValue) || version < rightValue ) - if leftValue.Less(ver) && (len(rightValue.Nums) == 0 || ver.Less(rightValue)) { - return true - } - } - // 不在任何一个区间内则返回false - return false -} - -// Ok 检测是否为合法版本号 -func (v *Version) Ok() bool { - return !strings.Contains(v.Org, "$") && len(v.Nums) > 0 -} + import ( + "regexp" + "strconv" + "strings" + ) + + // Version 组件依赖版本号 + type Version struct { + Org string `json:"org"` + Nums []int `json:"nums,omitempty"` + Suffix string `json:"suffix,omitempty"` + } + type token struct { + // 连接符 + // true 代表 -, false 代表 . + link bool + // 值 整数 + num int + // 值 字符串 + str string + // 标记是否为值 + isnum bool + } + + var ( + // 后缀权重 + suffixs = map[string]int{"alpha": 1, "beta": 2, "milestone": 3, "rc": 4, "cr": 4, "snapshot": 5, "release": 6, "final": 6, "ga": 6, "sp": 7} + // 数字or字母匹配 + numStrReg = regexp.MustCompile(`((\d+)|([a-zA-Z]+))`) + ) + + func (t token) compare(t2 token) int { + // 比较数字 + if t.isnum && !t2.isnum { + return 1 + } else if !t.isnum && t2.isnum { + return -1 + } else if t.isnum && t2.isnum { + if t.num == t2.num { + if !t.link && t2.link { + return 1 + } else if t.link && !t2.link { + return -1 + } else { + return 0 + } + } else { + return t.num - t2.num + } + } + // 比较字符串 + if t.str != t2.str { + w, ok := suffixs[strings.ToLower(t.str)] + w2, ok2 := suffixs[strings.ToLower(t2.str)] + if ok && ok2 { + return w - w2 + } else if ok && !ok2 { + return -1 + } else if !ok && ok2 { + return 1 + } + if t.str > t2.str { + return 1 + } else { + return -1 + } + } + // 比较分隔符 + if t.link != t2.link { + if t.num != 0 { + // 数字.分隔符优先级高 + if !t.link { + return 1 + } else { + return -1 + } + } + if t.str != "" { + // 字符串-分隔符优先级高 + if t.link { + return 1 + } else { + return -1 + } + } + } + return 0 + } + + // compareToken 比较两组token + // return a - b + func compareToken(a, b []token) int { + var min int + if len(a) > len(b) { + if a[len(b)].str != "" { + b = append(b, token{link: true, str: "ga"}) + } + min = len(b) + } else if len(a) < len(b) { + if b[len(a)].str != "" { + a = append(a, token{link: true, str: "ga"}) + } + min = len(a) + } else { + min = len(a) + } + // 依次比较token + for i := 0; i < min; i++ { + r := a[i].compare(b[i]) + if r != 0 { + return r + } + } + // 返回长的那个 + return len(a) - len(b) + } + + // parseToken 从版本号字符串中解析token + func parseToken(ver string) (tokens []token) { + ver = strings.ToLower(strings.TrimLeft(ver, "vV")) + tokens = []token{} + t := token{isnum: true} + for len(ver) > 0 { + // 按-和.分割 + index := strings.IndexAny(ver, `.-`) + for index == 0 { + next := strings.IndexAny(ver[1:], `.-`) + if next == -1 { + index = len(ver) + } else { + // 从ver[1:]开始搜索,所以需要下标+1 + index = next + 1 + } + } + if index == -1 { + index = len(ver) + } + word := ver[:index] + ver = ver[index:] + // 检测到分隔符重新创建新token + if word[0] == '.' || word[0] == '-' { + tokens = append(tokens, t) + t = token{link: word[0] == '-', isnum: word[0] == '.'} + word = word[1:] + } + // 尝试解析数字 + if n, err := strconv.Atoi(word); err == nil { + t.num = n + t.isnum = true + } else if !strings.ContainsAny(word, `1234567890`) { + // 不含数字则保存限定符 + t.str = word + } else { + // 标记下一个token是否是额外创建的'-'分隔符 + link := false + // 解析数字与字符串 + matchs := numStrReg.FindAllString(word, -1) + for i, match := range matchs { + if n, err := strconv.Atoi(match); err == nil { + t.num = n + t.isnum = true + } else { + // 为单个字母并后面存在数字 + if len(match) == 1 && i+1 < len(matchs) { + if match == "a" { + match = "alpha" + } else if match == "b" { + match = "beta" + } else if match == "m" { + match = "milestone" + } + } + t.str = match + } + tokens = append(tokens, t) + t = token{link: true} + link = true + } + if link { + t.link = false + } + } + } + tokens = append(tokens, t) + // 处理限定符 + for i := range tokens { + if tokens[i].str != "" { + s := tokens[i].str + if s == "final" || s == "ga" { + s = "" + } + tokens[i].str = s + tokens[i].isnum = false + } + } + isZero := true + for i := len(tokens) - 1; i >= 0; i-- { + t := tokens[i] + if t.num == 0 { + if t.str == "" { + if isZero || !t.isnum { + tokens = append(tokens[:i], tokens[i+1:]...) + } + } else if t.str != "" { + isZero = true + } + } else { + isZero = false + } + } + return + } + + // NewVersion 解析版本字符串 + func NewVersion(verStr string) *Version { + verStr = strings.TrimSpace(verStr) + ver := &Version{Nums: []int{}, Org: verStr} + verStr = strings.TrimLeft(verStr, "vV^~=<>") + // 获取后缀 + index := strings.Index(verStr, "-") + if index != -1 { + ver.Suffix = verStr[index+1:] + verStr = verStr[:index] + } + // 解析版本号 + tags := strings.Split(verStr, ".") + for i, numStr := range tags { + if num, err := strconv.Atoi(numStr); err == nil { + ver.Nums = append(ver.Nums, num) + } else { + ver.Suffix = strings.Join(tags[i:], ".") + break + } + } + // 去除结尾零值 + for len(ver.Nums) > 1 { + length := len(ver.Nums) + if ver.Nums[length-1] == 0 { + ver.Nums = ver.Nums[:length-1] + } else { + break + } + } + return ver + } + + // Less 判断是否严格小于另一个版本号 + func (ver *Version) Less(other *Version) bool { + va := strings.TrimLeft(ver.Org, "vV^<>=~!, ") + vb := strings.TrimLeft(other.Org, "vV^<>=~!, ") + ta := parseToken(va) + tb := parseToken(vb) + return compareToken(ta, tb) < 0 + } + + // Equal 判断是否等于另一个版本号 + func (ver *Version) Equal(other *Version) bool { + if len(ver.Nums) != len(other.Nums) { + return false + } + va := strings.TrimLeft(ver.Org, "vV^<>=~!, ") + vb := strings.TrimLeft(other.Org, "vV^<>=~!, ") + ta := parseToken(va) + tb := parseToken(vb) + return compareToken(ta, tb) == 0 + } + + // InRangeInterval 判断一个版本是否在一个版本区间内 + func InRangeInterval(ver *Version, interval string) bool { + // 当前版本 + // 遍历所有区间 + for _, interval := range strings.Split(interval, "||") { + if len(interval) < 2 { + continue + } + // 判断左边界是否为闭 + left := interval[0] == '[' + // 判断右边界是否为闭 + right := interval[len(interval)-1] == ']' + // 逗号所在位置 + index := strings.Index(interval, ",") + if index == -1 { + return false + } + // 区间左值 + leftValue := NewVersion(interval[1:index]) + // 区间右值 + rightValue := NewVersion(interval[index+1 : len(interval)-1]) + // 判断是否在区间边界 + if (left && ver.Equal(leftValue)) || (right && ver.Equal(rightValue)) { + return true + } + // 判断是否在区间内部 + // 大于左值并(右值为空或小于右值) + // leftValue < version && ( isempty(rightValue) || version < rightValue ) + if leftValue.Less(ver) && (len(rightValue.Nums) == 0 || ver.Less(rightValue)) { + return true + } + } + // 不在任何一个区间内则返回false + return false + } + + // Ok 检测是否为合法版本号 + func (v *Version) Ok() bool { + return !strings.Contains(v.Org, "$") && len(v.Nums) > 0 + } \ No newline at end of file diff --git a/util/report/spdx.go b/util/report/spdx.go index 47270c32..b328b0f5 100644 --- a/util/report/spdx.go +++ b/util/report/spdx.go @@ -20,6 +20,7 @@ func init() { replacers := []string{"_", "-", "/", "."} replacer = strings.NewReplacer(replacers...) } + func Spdx(dep *model.DepTree, taskInfo TaskInfo) []byte { format(dep) doc := buildDocument(dep, taskInfo) @@ -38,6 +39,7 @@ func Spdx(dep *model.DepTree, taskInfo TaskInfo) []byte { } return templateBuffer.Bytes() } + func SpdxJson(dep *model.DepTree, taskInfo TaskInfo) []byte { format(dep) doc := buildDocument(dep, taskInfo) @@ -53,6 +55,7 @@ func SpdxJson(dep *model.DepTree, taskInfo TaskInfo) []byte { } return res } + func SpdxXml(dep *model.DepTree, taskInfo TaskInfo) []byte { format(dep) doc := buildDocument(dep, taskInfo) @@ -103,7 +106,8 @@ func addPkgToDoc(root *model.DepTree, doc *Document) { if root.Name == "" { root.Name = doc.DocumentName } - q := []*model.DepTree{root} + q := []*model.DepTree{} + q = append(q, root.Children...) for len(q) > 0 { n := q[0] q = append(q[1:], n.Children...) @@ -129,7 +133,7 @@ func buildPkg(dep *model.DepTree) Package { PackageComment: setPkgComments(dep), RootPackage: isParent(dep), } - pkg.SPDXID = setPkgSPDXID(dep.Name, dep.VersionStr) + pkg.SPDXID = setPkgSPDXID(path.Base(dep.Name), dep.VersionStr) nodePkg[dep] = pkg return pkg }