Skip to content

Commit

Permalink
csfilter-kfp: script to filter known false positives
Browse files Browse the repository at this point in the history
  • Loading branch information
kdudka committed Aug 27, 2024
1 parent a6c08ed commit 31ab32c
Show file tree
Hide file tree
Showing 8 changed files with 265 additions and 3 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ cmake_minimum_required(VERSION 3.15)
project(csdiff CXX)
enable_testing()

# C/C++ sources
# source code
add_subdirectory(src)

# regression tests
Expand Down
1 change: 1 addition & 0 deletions make-srpm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ make version.cc
%doc README
%license COPYING
%{_bindir}/csdiff
%{_bindir}/csfilter-kfp
%{_bindir}/csgrep
%{_bindir}/cshtml
%{_bindir}/cslinker
Expand Down
5 changes: 5 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ install(TARGETS
cstrans-df-run
DESTINATION ${CMAKE_INSTALL_BINDIR})

# install the csfilter-kfp script
install(PROGRAMS
csfilter-kfp
DESTINATION ${CMAKE_INSTALL_BINDIR})

# optionally build statically linked csgrep-static
option(CSGREP_STATIC "Set to ON to build the csgrep-static executable" OFF)
if(CSGREP_STATIC)
Expand Down
225 changes: 225 additions & 0 deletions src/csfilter-kfp
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
#!/usr/bin/env python3

# Copyright (C) 2024 Red Hat, Inc.
#
# This file is part of csdiff.
#
# csdiff is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.
#
# csdiff is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with csdiff. If not, see <http://www.gnu.org/licenses/>.

import argparse
import os
import re
import subprocess
import sys


# if neither --kfp-dir nor --kfp-git-url is specified, use the known-false-positives RPM pacakge
DEFAULT_KFP_DIR = "/usr/share/csmock/known-false-positives.d"
DEFAULT_KFP_JSON = "/usr/share/csmock/known-false-positives.js"


def construct_init_cmd(args):
# make bash exit on error
cmd = 'set -e\n'

# make bash propagate exit code from piped commands
cmd += 'set -o pipefail\n'

# make bash expand empty globs
cmd += 'shopt -s nullglob\n'

# create a temporary directory with an automatic destructor
cmd += 'export td=$(mktemp -d /tmp/tmp-csfilter-kfp.XXXXXXXXXX)\n'
cmd += 'trap "rm -fr \'${td}\'" EXIT\n'

if args.verbose:
# run shell in XTRACE mode
cmd += 'set -x\n'

return cmd


def construct_git_cmd(kfp_git_url):
# split kfp_git_url into the clone URL and (optional) revision
m = re.match("^(.*)#([0-9a-f]+)", kfp_git_url)
if m:
# checkout a specific revision
return f"git clone {m.group(1)} ${{td}}/kfp\n" \
f"(cd ${{td}}/kfp && git reset -q --hard {m.group(2)})\n"
else:
# shallow clone of the default branch
return f"git clone --depth 1 {kfp_git_url} ${{td}}/kfp\n"


def construct_prep_cmd(args):
# check which KFP will be used
have_kfp_json = False
if args.kfp_git_url:
# clone git repo
cmd = construct_git_cmd(args.kfp_git_url)
elif args.kfp_dir:
# symlink a directory
cmd = f'ln -s "{args.kfp_dir}" "${{td}}/kfp"\n'
elif os.path.isfile(DEFAULT_KNOWN_FALSE_POSITIVES):
# create symlinks to the known-false-positives RPM package installed on the system
cmd = f'ln -s "{DEFAULT_KFP_DIR}" "${{td}}/kfp"\n' \
f'ln -s "{DEFAULT_KFP_JSON}" "${{td}}/kfp.json"\n'
have_kfp_json = True
else:
raise RuntimeError("no source of KFP specified, please use --kfp-dir or --kfp-git-url" \
" (or install the known-false-positives RPM pacakge)")

if not have_kfp_json:
# create all-in-one kfp.json file from files in ${td}/kfp
cmd += '(cd "${td}/kfp" && touch empty.err && csgrep --mode=json --remove-duplicates'
cmd += ' empty.err */ignore.err */true-positives-ignore.err >"${td}/kfp.json")\n'

return cmd


def construct_path_filter(args):
if args.project_nvr is None:
# TODO: read project_nvr from scan properties if available
return ' cat\n'

# cut off the `-version-release` or `-version` suffix to obtain package name where `version` can be
# a number optionally prefixed by `v` or a full-size SHA1 hash encoded in lowercase as, for example,
# in `project-koku-koku-cbe5e5c3355c1e140aa1cca7377aebe09d8d8466`
proj = re.sub("-(([v]?[0-9][^-]*)|([0-9a-f]{40}))(-[0-9][^-]*)?$", "", args.project_nvr)

# validate the resulting project name
if not re.match("^[A-Za-z0-9-_]+$", proj):
raise RuntimeError(f"invalid project name: {proj}")

# generate a script that will construct the filter at run-time
cmd = f' ep="${{td}}/kfp/{proj}/exclude-paths.txt"\n'
cmd += ' re=\n'
cmd += ' while read line; do\n'
cmd += ' re="${re}|(${line})"\n'
cmd += ' done < <(grep -Esv "^(#|\\\\$)" "$ep")\n'
cmd += ' if test -n "$re"; then\n'
cmd += ' csgrep --mode=json --invert-match --path="${re#|}"\n'
cmd += ' else\n'
cmd += ' cat\n'
cmd += ' fi\n'
return cmd


def construct_filter_cmd(args):
# set shell options and create a temporary diretory ${td}
cmd = construct_init_cmd(args)

# prepare the KFP data from the specified source
cmd += construct_prep_cmd(args)

# read the whole input into a JSON file
cmd += 'csgrep --mode=json > "${td}/input.json"\n'

# define path-based filter
path_filter = construct_path_filter(args)
cmd += f'path_filter() {{\n{path_filter}}}\n'

# exclude individual findings
cmd += 'csdiff --show-internal "${td}/kfp.json" "${td}/input.json"'

# exclude paths in the scan results
cmd += ' | path_filter >${td}/output.json\n'

if args.record_excluded:
# record excluded findings to the specified file
cmd += 'csdiff "${td}/output.json" "${td}/input.json"'
cmd += f' >"{args.record_excluded}"\n'

if not args.json_output:
# export plain-text format
cmd += 'csgrep "${td}/output.json"\n'
return cmd

# export JSON format
cmd += 'csgrep --mode=json "${td}/output.json"'

# optionally record the source of known-false-positives
if args.kfp_dir:
cmd += f' --set-scan-prop="known-false-positives-dir:{args.kfp_dir}"'
elif args.kfp_git_url:
cmd += f' --set-scan-prop="known-false-positives-git-url:{args.kfp_git_url}"'
cmd += '\n'

return cmd


def main():
# initialize argument parser
parser = argparse.ArgumentParser()

parser.add_argument(
"input-file", nargs="?",
help="optional name of the input file (standard input is used by default)")

# source of known-false-positives
kfp_source = parser.add_mutually_exclusive_group()
kfp_source.add_argument(
"--kfp-dir",
help="known false positives file")
kfp_source.add_argument(
"--kfp-git-url",
help="known false positives git URL (optionally taking a revision delimited by #)")

parser.add_argument(
"--project-nvr",
help="Name-Version-Release (NVR) of the scanned project, used to match path exclusions")

parser.add_argument(
"--record-excluded",
help="file to store all excluded findings to")

parser.add_argument(
"--json-output", action="store_true", default=(not os.isatty(sys.stdout.fileno())),
help="produce JSON output (default if stdout is not connected to a terminal)")

parser.add_argument(
"-v", "--verbose", action="store_true",
help="run shell in XTRACE mode while executing the filtering script")

parser.add_argument(
"-n", "--dry-run", action="store_true",
help="do not execute anything, only print the shell script that would be executed")

# parse command-line arguments
args = parser.parse_args()

# if --kfp-dir is used, check that a directory was given
if args.kfp_dir and not os.path.isdir(args.kfp_dir):
parser.error(f"'{args.kfp_dir}' given to --kfp-dir is not a directory")

# construct the command to filter
try:
cmd = construct_filter_cmd(args)
except RuntimeError as e:
parser.error(e)

if args.dry_run:
# print the command and exit successfully
print(cmd)
sys.exit(0)

# run the command
try:
subprocess.run(cmd, shell=True, check=True)
except subprocess.CalledProcessError as e:
sys.exit(e.returncode)


if __name__ == "__main__":
main()
5 changes: 3 additions & 2 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ set(jsfilter "sed -e 's|\"version\": \"[^\"]*\"|\"version\": \"\"|g'")

macro(add_test_wrap test_name cmd)
add_test("${test_name}" bash -c "${cmd}")
set_tests_properties(${test_name} PROPERTIES
ENVIRONMENT "PROJECT_ROOT=${CMAKE_SOURCE_DIR}")
set_tests_properties(${test_name} PROPERTIES ENVIRONMENT
"PATH=${CMAKE_BINARY_DIR}/src:$ENV{PATH};PROJECT_ROOT=${CMAKE_SOURCE_DIR}")

set_tests_properties(${test_name} PROPERTIES COST ${test_cost})
math(EXPR test_cost "${test_cost} - 1")
Expand All @@ -45,6 +45,7 @@ endmacro()
set(test_cost 1048576)

add_subdirectory(csdiff)
add_subdirectory(csfilter-kfp)
add_subdirectory(csgrep)
add_subdirectory(cshtml)
add_subdirectory(cslinker)
Expand Down
Empty file.
Empty file.
30 changes: 30 additions & 0 deletions tests/csfilter-kfp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (C) 2024 Red Hat, Inc.
#
# This file is part of csdiff.
#
# csdiff is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.
#
# csdiff is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with csdiff. If not, see <http://www.gnu.org/licenses/>.

# a generic template for cstrans-df-run tests
macro(test_csfilter_kfp tst)
set(test_data_prefix "${CMAKE_CURRENT_SOURCE_DIR}/${tst}")
set(cmd "${CMAKE_SOURCE_DIR}/src/csfilter-kfp")
file(READ ${test_data_prefix}-args.txt args)
string(REPLACE "\n" "" args "${args}")
set(cmd "${cmd} ${args} <${test_data_prefix}-stdin.txt")
set(cmd "${cmd} | ${diffcmd} ${test_data_prefix}-stdout.txt -")
add_test_wrap("csfilter-kfp-${tst}" "${cmd}")
endmacro()

# csfilter-kpf tests
test_csfilter_kfp(0001)

0 comments on commit 31ab32c

Please sign in to comment.