Skip to content

Commit

Permalink
parser-json-sarif: expand relative paths coming from GCC
Browse files Browse the repository at this point in the history
Resolves: #209
Closes: #210
  • Loading branch information
kdudka committed Oct 3, 2024
1 parent ebd81d9 commit 470aced
Show file tree
Hide file tree
Showing 5 changed files with 299 additions and 0 deletions.
45 changes: 45 additions & 0 deletions src/lib/parser-json-sarif.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ struct SarifTreeDecoder::Private {
void readToolInfo(TScanProps *pScanProps, const pt::ptree *toolNode);

std::string singleChecker = "UNKNOWN_SARIF_WARNING";
std::string pwd;
const RE reFileUrl = RE("^file://");
const RE reCwe = RE("^CWE-([0-9]+)$");
const RE reVersion = RE("^([0-9][0-9.]+).*$");
const RE reRuleId =
Expand Down Expand Up @@ -156,6 +158,22 @@ void SarifTreeDecoder::readScanProps(
const pt::ptree *toolNode;
if (findChildOf(&toolNode, run0, "tool"))
d->readToolInfo(pDst, toolNode);

// read PWD so that we can reconstruct absolute paths later on
const pt::ptree *uriBase, *pwdNode, *uriNode;
if (findChildOf(&uriBase, run0, "originalUriBaseIds")
&& findChildOf(&pwdNode, *uriBase, "PWD")
&& findChildOf(&uriNode, *pwdNode, "uri"))
{
// remove the "file://" prefix
const auto &pwd = uriNode->data();
d->pwd = boost::regex_replace(pwd, d->reFileUrl, "");
// FIXME: Should we check whether d->pwd begins with '/'?

// make sure that d->pwd ends with '/'
if (!d->pwd.empty() && *d->pwd.rbegin() != '/')
d->pwd += '/';
}
}

void SarifTreeDecoder::readRoot(const pt::ptree *runs)
Expand Down Expand Up @@ -321,6 +339,32 @@ static int sarifCweFromDefNode(const pt::ptree &defNode)
return 0;
}

static void expandRelativePaths(Defect *pDef, const std::string &pwd)
{
if (pwd.empty())
// no PWD info provided
return;

// go through all events
for (DefEvent &evt : pDef->events) {
std::string &fileName = evt.fileName;
if (fileName.empty())
// no file path to expand
continue;

const unsigned char beginsWith = *fileName.begin();
switch (beginsWith) {
case '/': // absolute path
case '<': // <unknown> and the like
continue;

default:
// prepend `pwd` to relative path
fileName = pwd + fileName;
}
}
}

bool SarifTreeDecoder::readNode(Defect *def)
{
// move the iterator after we get the current position
Expand Down Expand Up @@ -388,6 +432,7 @@ bool SarifTreeDecoder::readNode(Defect *def)
if (findChildOf(&relatedLocs, defNode, "relatedLocations"))
sarifReadComments(def, *relatedLocs);

expandRelativePaths(def, d->pwd);
d->digger.inferLangFromChecker(def);
d->digger.inferToolFromChecker(def);

Expand Down
1 change: 1 addition & 0 deletions tests/csgrep/0129-sarif-gcc-pwd-args.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--mode=json
209 changes: 209 additions & 0 deletions tests/csgrep/0129-sarif-gcc-pwd-stdin.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
{
"$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json",
"version": "2.1.0",
"runs": [
{
"tool": {
"driver": {
"name": "GNU C17",
"fullName": "GNU C17 (GCC) version 14.2.1 20240912 (Red Hat 14.2.1-4) (x86_64-redhat-linux)",
"version": "14.2.1 20240912 (Red Hat 14.2.1-4)",
"informationUri": "https://gcc.gnu.org/gcc-14/",
"rules": [
{
"id": "-Wanalyzer-null-dereference",
"helpUri": "https://gcc.gnu.org/onlinedocs/gcc-14.2.0/gcc/Static-Analyzer-Options.html#index-Wanalyzer-null-dereference"
}
]
}
},
"taxonomies": [
{
"name": "CWE",
"version": "4.7",
"organization": "MITRE",
"shortDescription": {
"text": "The MITRE Common Weakness Enumeration"
},
"taxa": [
{
"id": "476",
"helpUri": "https://cwe.mitre.org/data/definitions/476.html"
}
]
}
],
"invocations": [
{
"executionSuccessful": true,
"toolExecutionNotifications": []
}
],
"originalUriBaseIds": {
"PWD": {
"uri": "file:///home/kdudka/"
}
},
"artifacts": [
{
"location": {
"uri": "xxx.c",
"uriBaseId": "PWD"
},
"contents": {
"text": "int main()\n{\n int *p = 0;\n return *p;\n}\n"
},
"sourceLanguage": "c"
}
],
"results": [
{
"ruleId": "-Wanalyzer-null-dereference",
"taxa": [
{
"id": "476",
"toolComponent": {
"name": "cwe"
}
}
],
"properties": {
"gcc/analyzer/saved_diagnostic/sm": "malloc",
"gcc/analyzer/saved_diagnostic/enode": 4,
"gcc/analyzer/saved_diagnostic/snode": 1,
"gcc/analyzer/saved_diagnostic/sval": "(int *)0B",
"gcc/analyzer/saved_diagnostic/state": "null",
"gcc/analyzer/saved_diagnostic/idx": 0
},
"level": "warning",
"message": {
"text": "dereference of NULL \u2018p\u2019"
},
"locations": [
{
"physicalLocation": {
"artifactLocation": {
"uri": "xxx.c",
"uriBaseId": "PWD"
},
"region": {
"startLine": 4,
"startColumn": 12,
"endColumn": 14
},
"contextRegion": {
"startLine": 4,
"snippet": {
"text": " return *p;\n"
}
}
},
"logicalLocations": [
{
"name": "main",
"fullyQualifiedName": "main",
"decoratedName": "main",
"kind": "function"
}
]
}
],
"codeFlows": [
{
"threadFlows": [
{
"id": "main",
"locations": [
{
"properties": {
"gcc/analyzer/checker_event/emission_id": "(1)",
"gcc/analyzer/checker_event/kind": "EK_STATE_CHANGE"
},
"location": {
"physicalLocation": {
"artifactLocation": {
"uri": "xxx.c",
"uriBaseId": "PWD"
},
"region": {
"startLine": 3,
"startColumn": 10,
"endColumn": 11
},
"contextRegion": {
"startLine": 3,
"snippet": {
"text": " int *p = 0;\n"
}
}
},
"logicalLocations": [
{
"name": "main",
"fullyQualifiedName": "main",
"decoratedName": "main",
"kind": "function"
}
],
"message": {
"text": "\u2018p\u2019 is NULL"
}
},
"kinds": [
"release",
"memory"
],
"nestingLevel": 1,
"executionOrder": 1
},
{
"properties": {
"gcc/analyzer/checker_event/emission_id": "(2)",
"gcc/analyzer/checker_event/kind": "EK_WARNING"
},
"location": {
"physicalLocation": {
"artifactLocation": {
"uri": "xxx.c",
"uriBaseId": "PWD"
},
"region": {
"startLine": 4,
"startColumn": 12,
"endColumn": 14
},
"contextRegion": {
"startLine": 4,
"snippet": {
"text": " return *p;\n"
}
}
},
"logicalLocations": [
{
"name": "main",
"fullyQualifiedName": "main",
"decoratedName": "main",
"kind": "function"
}
],
"message": {
"text": "dereference of NULL \u2018p\u2019"
}
},
"kinds": [
"danger"
],
"nestingLevel": 1,
"executionOrder": 2
}
]
}
]
}
]
}
]
}
]
}
43 changes: 43 additions & 0 deletions tests/csgrep/0129-sarif-gcc-pwd-stdout.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"scan": {
"analyzer-version-gcc": "14.2.1"
},
"defects": [
{
"checker": "GCC_ANALYZER_WARNING",
"cwe": 476,
"language": "c/c++",
"tool": "gcc-analyzer",
"key_event_idx": 0,
"events": [
{
"file_name": "/home/kdudka/xxx.c",
"line": 4,
"column": 12,
"h_size": 2,
"event": "warning[-Wanalyzer-null-dereference]",
"message": "dereference of NULL ‘p’",
"verbosity_level": 0
},
{
"file_name": "/home/kdudka/xxx.c",
"line": 3,
"column": 10,
"h_size": 1,
"event": "release_memory",
"message": "‘p’ is NULL",
"verbosity_level": 1
},
{
"file_name": "/home/kdudka/xxx.c",
"line": 4,
"column": 12,
"h_size": 2,
"event": "danger",
"message": "dereference of NULL ‘p’",
"verbosity_level": 1
}
]
}
]
}
1 change: 1 addition & 0 deletions tests/csgrep/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -172,3 +172,4 @@ test_csgrep("0125-sarif-parser-bom" )
test_csgrep("0126-cov-parser-imp-flag" )
test_csgrep("0127-cov-writer-noloc" )
test_csgrep("0128-cov-parser-noloc" )
test_csgrep("0129-sarif-gcc-pwd" )

0 comments on commit 470aced

Please sign in to comment.