From 290f189931ced305cb0deb40f27e815a43c2d709 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Sat, 17 Dec 2022 17:50:08 -0800 Subject: [PATCH] #Centipede Add dominator collection to control_flow. PiperOrigin-RevId: 496142092 --- BUILD | 28 ++++-- analyze_corpora.cc | 2 +- analyze_corpora.h | 2 +- call_graph.cc | 6 +- call_graph.h | 5 +- call_graph_test.cc | 15 ++- centipede.cc | 6 +- centipede.h | 8 +- centipede_callbacks.cc | 9 +- centipede_callbacks.h | 4 +- centipede_interface.cc | 4 +- control_flow.cc | 173 +++++++++++++++++++++++++++++++++-- control_flow.h | 123 ++++++++++++++++++++++++- control_flow_test.cc | 193 +++++++++++++++++++++++++++++++++------ corpus.cc | 2 +- corpus.h | 8 +- corpus_test.cc | 34 +++---- coverage.cc | 109 +--------------------- coverage.h | 105 ++++----------------- symbol_table.cc | 5 +- symbol_table.h | 4 +- testing/BUILD | 1 + testing/coverage_test.cc | 118 ++---------------------- 23 files changed, 559 insertions(+), 405 deletions(-) diff --git a/BUILD b/BUILD index 5807a2f..91c9561 100644 --- a/BUILD +++ b/BUILD @@ -255,7 +255,9 @@ cc_library( ], deps = [ ":command", + ":control_flow", ":defs", + ":feature", ":logging", ":util", "@com_google_absl//absl/base:core_headers", @@ -272,14 +274,20 @@ cc_library( name = "control_flow", srcs = [ "control_flow.cc", + "symbol_table.cc", ], hdrs = [ "control_flow.h", + "symbol_table.h", ], deps = [ - ":coverage", + ":command", + ":defs", ":logging", + ":util", "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/strings", ], ) @@ -294,7 +302,7 @@ cc_library( "call_graph.h", ], deps = [ - ":coverage", + ":control_flow", ":logging", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/log:check", @@ -353,7 +361,7 @@ cc_library( deps = [ ":byte_array_mutator", ":command", - ":coverage", + ":control_flow", ":defs", ":environment", ":execution_request", @@ -392,6 +400,7 @@ cc_library( ":blob_file", ":centipede_callbacks", ":command", + ":control_flow", ":corpus", ":coverage", ":defs", @@ -810,11 +819,19 @@ cc_test( cc_test( name = "control_flow_test", srcs = ["control_flow_test.cc"], + data = [ + "@centipede///testing:test_fuzz_target", + "@centipede///testing:test_fuzz_target_trace_pc", + "@centipede///testing:threaded_fuzz_target", + ], deps = [ - "@centipede//:control_flow", - "@centipede//:coverage", + ":control_flow", + "@centipede//:defs", + "@centipede//:environment", + "@centipede//:execution_result", "@centipede//:logging", "@centipede//:test_util", + "@centipede//:util", "@com_google_googletest//:gtest_main", ], ) @@ -824,7 +841,6 @@ cc_test( srcs = ["call_graph_test.cc"], deps = [ "@centipede//:call_graph", - "@centipede//:coverage", "@centipede//:logging", "@com_google_googletest//:gtest_main", ], diff --git a/analyze_corpora.cc b/analyze_corpora.cc index 1e8d377..4c4d221 100644 --- a/analyze_corpora.cc +++ b/analyze_corpora.cc @@ -19,7 +19,7 @@ #include "./logging.h" namespace centipede { -void AnalyzeCorpora(const Coverage::PCTable &pc_table, +void AnalyzeCorpora(const PCTable &pc_table, const SymbolTable &symbols, const std::vector &a, const std::vector &b) { diff --git a/analyze_corpora.h b/analyze_corpora.h index 431a1ac..c29ef64 100644 --- a/analyze_corpora.h +++ b/analyze_corpora.h @@ -23,7 +23,7 @@ namespace centipede { // Analyzes two corpora, `a` and `b`, reports the differences. // TODO(kcc): unimplemented. -void AnalyzeCorpora(const Coverage::PCTable &pc_table, +void AnalyzeCorpora(const PCTable &pc_table, const SymbolTable &symbols, const std::vector &a, const std::vector &b); diff --git a/call_graph.cc b/call_graph.cc index 811cc09..8867dc8 100644 --- a/call_graph.cc +++ b/call_graph.cc @@ -20,11 +20,11 @@ namespace centipede { -void CallGraph::ReadFromCfTable(const Coverage::CFTable &cf_table, - const Coverage::PCTable &pc_table) { +void CallGraph::ReadFromCfTable(const CFTable &cf_table, + const PCTable &pc_table) { // Find all function entries. for (auto pc_info : pc_table) { - if (pc_info.has_flag(Coverage::PCInfo::kFuncEntry)) + if (pc_info.has_flag(PCInfo::kFuncEntry)) function_entries_.insert(pc_info.pc); } diff --git a/call_graph.h b/call_graph.h index 3abd134..6d10d50 100644 --- a/call_graph.h +++ b/call_graph.h @@ -22,7 +22,7 @@ #include "absl/container/flat_hash_map.h" #include "absl/log/check.h" -#include "./coverage.h" +#include "./control_flow.h" #include "./logging.h" namespace centipede { @@ -31,8 +31,7 @@ class CallGraph { public: // Reads in the CfTable from __sancov_cfs section. On error it crashes, if the // section is not available, the hash maps will be empty. - void ReadFromCfTable(const Coverage::CFTable& cf_table, - const Coverage::PCTable& pc_table); + void ReadFromCfTable(const CFTable& cf_table, const PCTable& pc_table); const std::vector& GetFunctionCallees(uintptr_t pc) const { const auto it = call_graph_.find(pc); diff --git a/call_graph_test.cc b/call_graph_test.cc index a3b75ab..c14be26 100644 --- a/call_graph_test.cc +++ b/call_graph_test.cc @@ -20,7 +20,6 @@ #include "googlemock/include/gmock/gmock.h" #include "googletest/include/gtest/gtest.h" -#include "./coverage.h" #include "./logging.h" namespace centipede { @@ -38,7 +37,7 @@ using ::testing::Contains; // \ / // \ / // 4 (7) -static const Coverage::CFTable g_cf_table = { +static const CFTable g_cf_table = { 1, 2, 3, 0, 0, // PC 1 has no callee. 2, 4, 0, 99, 0, // PC 2 calls 99. 3, 4, 0, 6, -1, 8, 0, // PC 3 calls 6, 8, and has one indirect call. @@ -49,14 +48,14 @@ static const Coverage::CFTable g_cf_table = { }; // Mock PCTable for the above cfg. -static const Coverage::PCTable g_pc_table = { - {1, Coverage::PCInfo::kFuncEntry}, +static const PCTable g_pc_table = { + {1, PCInfo::kFuncEntry}, {2, 0}, {3, 0}, {4, 0}, - {6, Coverage::PCInfo::kFuncEntry}, - {7, Coverage::PCInfo::kFuncEntry}, - {8, Coverage::PCInfo::kFuncEntry}, + {6, PCInfo::kFuncEntry}, + {7, PCInfo::kFuncEntry}, + {8, PCInfo::kFuncEntry}, }; TEST(CallGraphDeathTest, CgNoneExistentPc) { @@ -80,7 +79,7 @@ TEST(CallGraph, BuildCgFromCfTable) { // Check callees. for (size_t i = 0; i < g_pc_table.size(); ++i) { uintptr_t pc = g_pc_table[i].pc; - if (g_pc_table[i].has_flag(Coverage::PCInfo::kFuncEntry)) + if (g_pc_table[i].has_flag(PCInfo::kFuncEntry)) EXPECT_TRUE(call_graph.IsFunctionEntry(pc)); else EXPECT_FALSE(call_graph.IsFunctionEntry(pc)); diff --git a/centipede.cc b/centipede.cc index 82296e7..25ade7b 100644 --- a/centipede.cc +++ b/centipede.cc @@ -63,6 +63,7 @@ #include "absl/synchronization/mutex.h" #include "absl/types/span.h" #include "./blob_file.h" +#include "./control_flow.h" #include "./coverage.h" #include "./defs.h" #include "./environment.h" @@ -80,8 +81,7 @@ namespace centipede { using perf::RUsageProfiler; Centipede::Centipede(const Environment &env, CentipedeCallbacks &user_callbacks, - const Coverage::PCTable &pc_table, - const SymbolTable &symbols, + const PCTable &pc_table, const SymbolTable &symbols, CoverageLogger &coverage_logger, Stats &stats) : env_(env), user_callbacks_(user_callbacks), @@ -217,7 +217,7 @@ void Centipede::LogFeaturesAsSymbols(const FeatureVec &fv) { auto feature_domain = feature_domains::k8bitCounters; for (auto feature : fv) { if (!feature_domain.Contains(feature)) continue; - Coverage::PCIndex pc_index = Convert8bitCounterFeatureToPcIndex(feature); + PCIndex pc_index = Convert8bitCounterFeatureToPcIndex(feature); auto description = coverage_logger_.ObserveAndDescribeIfNew(pc_index); if (description.empty()) continue; LOG(INFO) << description; diff --git a/centipede.h b/centipede.h index 0aeb952..899e4d0 100644 --- a/centipede.h +++ b/centipede.h @@ -32,6 +32,7 @@ #include "./rusage_profiler.h" #include "./stats.h" #include "./symbol_table.h" +#include "./control_flow.h" namespace centipede { @@ -39,8 +40,9 @@ namespace centipede { class Centipede { public: Centipede(const Environment &env, CentipedeCallbacks &user_callbacks, - const Coverage::PCTable &pc_table, const SymbolTable &symbols, - CoverageLogger &coverage_logger, Stats &stats); + const PCTable &pc_table, + const SymbolTable &symbols, CoverageLogger &coverage_logger, + Stats &stats); virtual ~Centipede() {} // Main loop. @@ -145,7 +147,7 @@ class Centipede { // Coverage-related data, initialized at startup, once per process, // by calling the PopulateSymbolAndPcTables callback. - const Coverage::PCTable &pc_table_; + const PCTable &pc_table_; const SymbolTable &symbols_; // Derived from env_.function_filter. Currently, duplicated by every thread. diff --git a/centipede_callbacks.cc b/centipede_callbacks.cc index 5fde8ac..49c72c9 100644 --- a/centipede_callbacks.cc +++ b/centipede_callbacks.cc @@ -24,7 +24,7 @@ #include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" #include "./command.h" -#include "./coverage.h" +#include "./control_flow.h" #include "./defs.h" #include "./execution_request.h" #include "./execution_result.h" @@ -34,16 +34,15 @@ namespace centipede { -void CentipedeCallbacks::PopulateSymbolAndPcTables( - SymbolTable &symbols, Coverage::PCTable &pc_table) { +void CentipedeCallbacks::PopulateSymbolAndPcTables(SymbolTable &symbols, + PCTable &pc_table) { // Running in main thread, create our own temp dir. if (!std::filesystem::exists(temp_dir_)) { CreateLocalDirRemovedAtExit(temp_dir_); } std::string pc_table_path = std::filesystem::path(temp_dir_).append("pc_table"); - pc_table = - Coverage::GetPcTableFromBinary(env_.coverage_binary, pc_table_path); + pc_table = GetPcTableFromBinary(env_.coverage_binary, pc_table_path); if (pc_table.empty()) { if (env_.require_pc_table) { LOG(INFO) << "Could not get PCTable, exiting (override with " diff --git a/centipede_callbacks.h b/centipede_callbacks.h index eb9fa3c..5b3536b 100644 --- a/centipede_callbacks.h +++ b/centipede_callbacks.h @@ -23,7 +23,7 @@ #include "./byte_array_mutator.h" #include "./command.h" -#include "./coverage.h" +#include "./control_flow.h" #include "./defs.h" #include "./environment.h" #include "./execution_result.h" @@ -72,7 +72,7 @@ class CentipedeCallbacks { // the `coverage_binary` or if symbolization fails. // Exits if PC table was not populated and `env_.require_pc_table` is set. virtual void PopulateSymbolAndPcTables(SymbolTable &symbols, - Coverage::PCTable &pc_table); + PCTable &pc_table); // Returns some simple non-empty valid input. virtual ByteArray DummyValidInput() { return {0}; } diff --git a/centipede_interface.cc b/centipede_interface.cc index 7c70d83..f013e07 100644 --- a/centipede_interface.cc +++ b/centipede_interface.cc @@ -116,7 +116,7 @@ void PrintExperimentStatsThread(const std::atomic &continue_running, // Loads corpora from work dirs provided in `env.args`, analyzes differences. // Returns EXIT_SUCCESS on success, EXIT_FAILURE otherwise. -int Analyze(const Environment &env, const Coverage::PCTable &pc_table, +int Analyze(const Environment &env, const PCTable &pc_table, const SymbolTable &symbols) { LOG(INFO) << "Analyze " << absl::StrJoin(env.args, ","); CHECK_EQ(env.args.size(), 2) << "for now, Analyze supports only 2 work dirs"; @@ -175,7 +175,7 @@ int CentipedeMain(const Environment &env, RemoteMkdir(env.MakeCoverageDirPath()); auto one_time_callbacks = callbacks_factory.create(env); - Coverage::PCTable pc_table; + PCTable pc_table; SymbolTable symbols; one_time_callbacks->PopulateSymbolAndPcTables(symbols, pc_table); callbacks_factory.destroy(one_time_callbacks); diff --git a/control_flow.cc b/control_flow.cc index 51598c7..ba978bc 100644 --- a/control_flow.cc +++ b/control_flow.cc @@ -14,15 +14,120 @@ #include "./control_flow.h" +#include +#include #include +#include +#include -#include "./coverage.h" +#include "./command.h" +#include "./defs.h" #include "./logging.h" +#include "./symbol_table.h" +#include "./util.h" namespace centipede { -void ControlFlowGraph::ReadFromCfTable(const Coverage::CFTable &cf_table, - const Coverage::PCTable &pc_table) { +PCTable GetPcTableFromBinaryWithTracePC(std::string_view binary_path, + std::string_view tmp_path) { + // Assumes objdump in PATH. + // Run objdump -d on the binary. + Command cmd("objdump", {"-d", std::string(binary_path)}, {}, tmp_path, + "/dev/null"); + int system_exit_code = cmd.Execute(); + if (system_exit_code) { + LOG(INFO) << __func__ << " objdump failed: " << VV(system_exit_code) + << VV(cmd.ToString()); + return PCTable(); + } + PCTable pc_table; + std::ifstream in(std::string{tmp_path}); + CHECK(in.good()) << VV(tmp_path); + bool saw_new_function = false; + + // TODO(navidem): use absl::EndsWith(). + auto ends_with = [](std::string_view str, std::string_view end) -> bool { + return end.size() <= str.size() && str.find(end) == str.size() - end.size(); + }; + + // Read the objdump output, find lines that start a function + // and lines that have a call to __sanitizer_cov_trace_pc. + // Reconstruct the PCTable from those. + for (std::string line; std::getline(in, line);) { + if (ends_with(line, ">:")) { // new function. + saw_new_function = true; + continue; + } + if (!ends_with(line, "<__sanitizer_cov_trace_pc>")) continue; + uintptr_t pc = std::stoul(line, nullptr, 16); + uintptr_t flags = saw_new_function ? PCInfo::kFuncEntry : 0; + saw_new_function = false; // next trace_pc will be in the same function. + pc_table.push_back({pc, flags}); + } + std::filesystem::remove(tmp_path); + return pc_table; +} + +PCTable GetPcTableFromBinary(std::string_view binary_path, + std::string_view tmp_path) { + PCTable res = GetPcTableFromBinaryWithPcTable(binary_path, tmp_path); + if (res.empty()) { + // Fall back to trace-pc. + res = GetPcTableFromBinaryWithTracePC(binary_path, tmp_path); + } + return res; +} + +PCTable GetPcTableFromBinaryWithPcTable(std::string_view binary_path, + std::string_view tmp_path) { + Command cmd(binary_path, {}, + {absl::StrCat("CENTIPEDE_RUNNER_FLAGS=:dump_pc_table:arg1=", + tmp_path, ":")}, + "/dev/null", "/dev/null"); + int system_exit_code = cmd.Execute(); + if (system_exit_code) { + LOG(INFO) << "system() for " << binary_path + << " with --dump_pc_table failed: " << VV(system_exit_code); + return {}; + } + ByteArray pc_infos_as_bytes; + ReadFromLocalFile(tmp_path, pc_infos_as_bytes); + std::filesystem::remove(tmp_path); + CHECK_EQ(pc_infos_as_bytes.size() % sizeof(PCInfo), 0); + size_t pc_table_size = pc_infos_as_bytes.size() / sizeof(PCInfo); + const auto *pc_infos = reinterpret_cast(pc_infos_as_bytes.data()); + PCTable pc_table{pc_infos, pc_infos + pc_table_size}; + CHECK_EQ(pc_table.size(), pc_table_size); + return pc_table; +} + +CFTable GetCfTableFromBinary(std::string_view binary_path, + std::string_view tmp_path) { + Command cmd(binary_path, {}, + {absl::StrCat("CENTIPEDE_RUNNER_FLAGS=:dump_cf_table:arg1=", + tmp_path, ":")}, + "/dev/null", "/dev/null"); + int cmd_exit_code = cmd.Execute(); + if (cmd_exit_code != EXIT_SUCCESS) { + LOG(ERROR) << "CF table dumping failed: " << VV(cmd.ToString()) + << VV(cmd_exit_code); + return {}; + } + ByteArray cf_infos_as_bytes; + ReadFromLocalFile(tmp_path, cf_infos_as_bytes); + std::filesystem::remove(tmp_path); + + size_t cf_table_size = cf_infos_as_bytes.size() / sizeof(CFTable::value_type); + const auto *cf_infos = + reinterpret_cast(cf_infos_as_bytes.data()); + CFTable cf_table{cf_infos, cf_infos + cf_table_size}; + CHECK_EQ(cf_table.size(), cf_table_size); + return cf_table; +} + +ControlFlowGraph::ControlFlowGraph(const CFTable &cf_table, + const PCTable &pc_table) + : pc_table_(pc_table), cf_table_(cf_table), func_entries_(pc_table.size()) { for (size_t j = 0; j < cf_table.size();) { std::vector successors; auto curr_pc = cf_table[j]; @@ -46,12 +151,18 @@ void ControlFlowGraph::ReadFromCfTable(const Coverage::CFTable &cf_table, CHECK_LE(j, cf_table.size()); } // Calculate cyclomatic complexity for all functions. - for (Coverage::PCIndex i = 0; i < pc_table.size(); ++i) { - if (pc_table[i].has_flag(Coverage::PCInfo::kFuncEntry)) { - uintptr_t func_pc = pc_table[i].pc; - auto func_comp = ComputeFunctionCyclomaticComplexity(func_pc, *this); - function_complexities_[func_pc] = func_comp; + uintptr_t function_pc = 0; + for (PCIndex i = 0; i < pc_table.size(); ++i) { + if (pc_table[i].has_flag(PCInfo::kFuncEntry)) { + func_entries_[i] = true; + function_pc = pc_table[i].pc; + function_bb_num_[function_pc] = 0; // Initialize conservatively. + auto func_comp = ComputeFunctionCyclomaticComplexity(function_pc, *this); + function_complexities_[function_pc] = func_comp; } + entry_block_[pc_table[i].pc] = function_pc; + pc_index_map_[pc_table[i].pc] = i; + function_bb_num_[function_pc] += 1; } } @@ -62,6 +173,52 @@ const std::vector &ControlFlowGraph::GetSuccessors( return it->second; } +absl::flat_hash_set ControlFlowGraph::GetFunctionBlocks( + uintptr_t pc) { + auto pc_index = pc_index_map_[pc]; + CHECK(BlockIsFunctionEntry(pc_index)); + absl::flat_hash_set blocks; + + for (PCIndex i = 0; i < function_bb_num_[pc]; ++i) { + blocks.insert(pc_table_[pc_index + i].pc); + } + return blocks; +} + +std::vector ControlFlowGraph::GetDominates(uintptr_t pc) { + CHECK(graph_.contains(pc)); + // If it is already calculated, return it. + auto it = dominates_.find(pc); + if (it != dominates_.end()) return it->second; + + // Perform straight forward dominator calculation: for the given pc, + // traverse all paths from function_entry avoiding pc. Report all unvisited + // nodes as dominated by pc. + auto entry_pc = entry_block_[pc]; + std::queue worklist; + absl::flat_hash_set all_pcs, visited_pcs; + + all_pcs = GetFunctionBlocks(entry_pc); + worklist.push(entry_pc); + while (!worklist.empty()) { + auto current_pc = worklist.front(); + worklist.pop(); + if (current_pc == pc) continue; // Avoid any path through pc. + if (!visited_pcs.insert(current_pc).second) continue; + for (auto &successor : graph_[current_pc]) { + worklist.push(successor); + } + } + + std::vector dominatees; + for (auto candidate : all_pcs) { + if (visited_pcs.contains(candidate)) continue; + dominatees.push_back(candidate); + } + dominates_[pc] = std::move(dominatees); // Store it for later usage. + return dominates_[pc]; +} + uint32_t ComputeFunctionCyclomaticComplexity(uintptr_t pc, const ControlFlowGraph &cfg) { size_t edge_num = 0, node_num = 0; diff --git a/control_flow.h b/control_flow.h index 46b8eee..35d0954 100644 --- a/control_flow.h +++ b/control_flow.h @@ -20,25 +20,94 @@ #include #include "absl/container/flat_hash_map.h" -#include "./coverage.h" +#include "absl/container/flat_hash_set.h" +#include "./logging.h" namespace centipede { +class SymbolTable; // To avoid mutual inclusion with symbol_table.h. + +// PCInfo is a pair {PC, bit mask with PC flags}. +// See https://clang.llvm.org/docs/SanitizerCoverage.html#pc-table +struct PCInfo { + enum PCFlags : uintptr_t { + kFuncEntry = 1 << 0, // The PC is the function entry block. + }; + + uintptr_t pc; + uintptr_t flags; + + bool has_flag(PCFlags f) const { return flags & f; } +}; + +// Array of PCInfo-s. +// PCTable is created by the compiler/linker in the instrumented binary. +// The order of elements is significant: each element corresponds +// to the coverage counter with the same index. +// Every PCInfo that is kFuncEntry is followed by PCInfo-s from the same +// function. +using PCTable = std::vector; + +// Reads the pc table from the binary file at `binary_path`. +// May create a file `tmp_path`, but will delete it afterwards. +// Currently works for +// * binaries linked with :centipede_runner +// and built with -fsanitize-coverage=pc-table, +// * binaries built with -fsanitize-coverage=trace-pc +PCTable GetPcTableFromBinary(std::string_view binary_path, + std::string_view tmp_path); + +// Helper for GetPcTableFromBinary, +// for binaries linked with :centipede_runner +// and built with -fsanitize-coverage=pc-table. +// Returns the PCTable that the binary itself reported. +// May create a file `tmp_path`, but will delete it afterwards. +PCTable GetPcTableFromBinaryWithPcTable(std::string_view binary_path, + std::string_view tmp_path); + +// Helper for GetPcTableFromBinary, +// for binaries built with -fsanitize-coverage=trace-pc. +// Returns the PCTable reconstructed from `binary_path` with `objdump -d`. +// May create a file `tmp_path`, but will delete it afterwards. +PCTable GetPcTableFromBinaryWithTracePC(std::string_view binary_path, + std::string_view tmp_path); + +// PCIndex: an index into the PCTable. +// We use 32-bit int for compactness since PCTable is never too large. +using PCIndex = uint32_t; +// A set of PCIndex-es, order is not important. +using PCIndexVec = std::vector; + +// Array of elements in __sancov_cfs section. +// CFTable is created by the compiler/linker in the instrumented binary. +// https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-control-flow. +using CFTable = std::vector; + +// Reads the control-flow table from the binary file at `binary_path`. +// May create a file `tmp_path`, but will delete it afterwards. +// Currently works for +// * binaries linked with :fuzz_target_runner +// and built with -fsanitize-coverage=control-flow. +CFTable GetCfTableFromBinary(std::string_view binary_path, + std::string_view tmp_path); + class ControlFlowGraph { public: // Reads form __sancov_cfs section. On error it crashes, if the section is not // there, the graph_ will be empty. - void ReadFromCfTable(const Coverage::CFTable &cf_table, - const Coverage::PCTable &pc_table); + ControlFlowGraph(const CFTable &cf_table, const PCTable &pc_table); + // Returns the vector of successor PCs for the given basic block PC. const std::vector &GetSuccessors(uintptr_t basic_block) const; // Returns the number of cfg entries. size_t size() const { return graph_.size(); } + // Checks if basic_block is in cfg. bool exists(const uintptr_t basic_block) const { return graph_.contains(basic_block); } + // Returns cyclomatic complexity of function PC. CHECK-fails if it is not a // valid function PC. uint32_t GetCyclomaticComplexity(uintptr_t pc) const { @@ -47,12 +116,60 @@ class ControlFlowGraph { return it->second; } + // Returns true if the given basic block is function entry. + bool BlockIsFunctionEntry(PCIndex pc_index) const { + return func_entries_[pc_index]; + } + + // Returns the idx in pc_table associated with the PC, CHECK-fails if the PC + // is not in the pc_table. + PCIndex GetPcIndex(uintptr_t pc) const { + auto it = pc_index_map_.find(pc); + CHECK(it != pc_index_map_.end()); + return it->second; + } + + // Returns the function entry of given PC. CHECK-fails if the pc in not in + // cfg. + uintptr_t GetFunctionEntryBlock(uintptr_t pc) const { + auto it = entry_block_.find(pc); + CHECK(it != entry_block_.end()); + return it->second; + } + + // Returns all BBs in function with entry PC. + absl::flat_hash_set GetFunctionBlocks(uintptr_t pc); + + // Returns all BBs dominated by PC. + // TODO(navidem): Currently we calculate diminators on demand and store it in + // dominates_. An alternative is in CFG CTOR collect dominators. + // The current implemented approach is linear for each BB, which becomes + // quadratic for the whole function. If this is to be implemented in CTOR, we + // have to use this https://dl.acm.org/doi/pdf/10.1145/357062.357071 which is + // almost linear. + // Along that we have to get a sense on how much memory is needed for storing + // N*N vector for each function. N is the number of BBs in a function. + std::vector GetDominates(uintptr_t pc); + private: + PCTable pc_table_; + CFTable cf_table_; + // Map from PC to the idx in pc_table. + absl::flat_hash_map pc_index_map_; + // A vector of size PCTable. func_entries[idx] is true iff means the PC at idx + // is a function entry. + std::vector func_entries_; // A map with PC as the keys and vector of PCs as value. absl::flat_hash_map> graph_; // A map from function PC to its calculated cyclomatic complexity. It is // to avoid unnecessary calls to ComputeFunctionCyclomaticComplexity. absl::flat_hash_map function_complexities_; + // A map from BB pc to its function entry block PC. + absl::flat_hash_map entry_block_; + // A map to maintain all BBs dominated by the key PC. + absl::flat_hash_map> dominates_; + // A map to maintain number of BBs in a function. + absl::flat_hash_map function_bb_num_; }; // Computes the Cyclomatic Complexity for the given function, diff --git a/control_flow_test.cc b/control_flow_test.cc index b8eca0c..f0a0b33 100644 --- a/control_flow_test.cc +++ b/control_flow_test.cc @@ -19,9 +19,13 @@ #include "googlemock/include/gmock/gmock.h" #include "googletest/include/gtest/gtest.h" -#include "./coverage.h" +#include "./defs.h" +#include "./environment.h" +#include "./execution_result.h" #include "./logging.h" +#include "./symbol_table.h" #include "./test_util.h" +#include "./util.h" namespace centipede { namespace { @@ -32,18 +36,13 @@ namespace { // 2 3 // \ / // 4 -static const Coverage::CFTable g_cf_table = {1, 2, 3, 0, 0, 2, 4, 0, - 0, 3, 4, 0, 0, 4, 0, 0}; -static const Coverage::PCTable g_pc_table = { - {1, Coverage::PCInfo::kFuncEntry}, - {2, 0}, - {3, 0}, - {4, 0} -}; - -TEST(ControlFlowGraph, MakeCfgFromCfTable) { - ControlFlowGraph cfg; - cfg.ReadFromCfTable(g_cf_table, g_pc_table); +static const CFTable g_cf_table = {1, 2, 3, 0, 0, 2, 4, 0, + 0, 3, 4, 0, 0, 4, 0, 0}; +static const PCTable g_pc_table = { + {1, PCInfo::kFuncEntry}, {2, 0}, {3, 0}, {4, 0}}; + +TEST(ControlFlow, MakeCfgFromCfTable) { + ControlFlowGraph cfg(g_cf_table, g_pc_table); EXPECT_NE(cfg.size(), 0); for (auto &pc : {1, 2, 3, 4}) { @@ -62,45 +61,83 @@ TEST(ControlFlowGraph, MakeCfgFromCfTable) { EXPECT_TRUE(cfg.GetSuccessors(4).empty()); } + CHECK_EQ(cfg.GetPcIndex(1), 0); + CHECK_EQ(cfg.GetPcIndex(2), 1); + CHECK_EQ(cfg.GetPcIndex(3), 2); + CHECK_EQ(cfg.GetPcIndex(4), 3); + + EXPECT_TRUE(cfg.BlockIsFunctionEntry(0)); + EXPECT_FALSE(cfg.BlockIsFunctionEntry(1)); + EXPECT_FALSE(cfg.BlockIsFunctionEntry(2)); + EXPECT_FALSE(cfg.BlockIsFunctionEntry(3)); + + auto func_blocks = cfg.GetFunctionBlocks(1); + EXPECT_EQ(func_blocks.size(), 4); + EXPECT_TRUE(func_blocks.contains(1)); + EXPECT_TRUE(func_blocks.contains(2)); + EXPECT_TRUE(func_blocks.contains(3)); + EXPECT_TRUE(func_blocks.contains(4)); + + EXPECT_EQ(cfg.GetFunctionEntryBlock(1), 1); + EXPECT_EQ(cfg.GetFunctionEntryBlock(2), 1); + EXPECT_EQ(cfg.GetFunctionEntryBlock(3), 1); + EXPECT_EQ(cfg.GetFunctionEntryBlock(4), 1); + + auto dominates = cfg.GetDominates(1); + EXPECT_NE(std::find(dominates.begin(), dominates.end(), 1), dominates.end()); + EXPECT_NE(std::find(dominates.begin(), dominates.end(), 2), dominates.end()); + EXPECT_NE(std::find(dominates.begin(), dominates.end(), 3), dominates.end()); + EXPECT_NE(std::find(dominates.begin(), dominates.end(), 4), dominates.end()); + + dominates = cfg.GetDominates(2); + EXPECT_NE(std::find(dominates.begin(), dominates.end(), 2), dominates.end()); + + dominates = cfg.GetDominates(3); + EXPECT_NE(std::find(dominates.begin(), dominates.end(), 3), dominates.end()); + + dominates = cfg.GetDominates(4); + EXPECT_NE(std::find(dominates.begin(), dominates.end(), 4), dominates.end()); + + EXPECT_THAT(cfg.GetDominates(1).size(), 4); + EXPECT_THAT(cfg.GetDominates(2).size(), 1); + EXPECT_THAT(cfg.GetDominates(3).size(), 1); + EXPECT_THAT(cfg.GetDominates(4).size(), 1); + CHECK_EQ(cfg.GetCyclomaticComplexity(1), 2); } -TEST(FunctionComplexity, ComputeFuncComplexity) { - static const Coverage::CFTable g_cf_table1 = { +TEST(ControlFlow, ComputeFuncComplexity) { + static const CFTable g_cf_table1 = { 1, 2, 3, 0, 0, // 1 goes to 2 and 3. 2, 3, 4, 0, 0, // 2 goes to 3 and 4. 3, 1, 4, 0, 0, // 3 goes to 1 and 4. 4, 0, 0 // 4 goes nowhere. }; - static const Coverage::CFTable g_cf_table2 = { + static const CFTable g_cf_table2 = { 1, 0, 0, // 1 goes nowhere. }; - static const Coverage::CFTable g_cf_table3 = { + static const CFTable g_cf_table3 = { 1, 2, 0, 0, // 1 goes to 2. 2, 3, 0, 0, // 2 goes to 3. 3, 1, 0, 0, // 3 goes to 1. }; - static const Coverage::CFTable g_cf_table4 = { + static const CFTable g_cf_table4 = { 1, 2, 3, 0, 0, // 1 goes to 2 and 3. 2, 3, 4, 0, 0, // 2 goes to 3 and 4. 3, 0, 0, // 3 goes nowhere. 4, 0, 0 // 4 goes nowhere. }; - ControlFlowGraph cfg1; - cfg1.ReadFromCfTable(g_cf_table1, g_pc_table); + ControlFlowGraph cfg1(g_cf_table1, g_pc_table); EXPECT_NE(cfg1.size(), 0); - ControlFlowGraph cfg2; - cfg2.ReadFromCfTable(g_cf_table2, g_pc_table); + ControlFlowGraph cfg2(g_cf_table2, g_pc_table); EXPECT_NE(cfg2.size(), 0); - ControlFlowGraph cfg3; - cfg3.ReadFromCfTable(g_cf_table3, g_pc_table); + ControlFlowGraph cfg3(g_cf_table3, g_pc_table); EXPECT_NE(cfg3.size(), 0); - ControlFlowGraph cfg4; - cfg4.ReadFromCfTable(g_cf_table4, g_pc_table); + ControlFlowGraph cfg4(g_cf_table4, g_pc_table); EXPECT_NE(cfg4.size(), 0); EXPECT_EQ(ComputeFunctionCyclomaticComplexity(1, cfg1), 4); @@ -109,6 +146,110 @@ TEST(FunctionComplexity, ComputeFuncComplexity) { EXPECT_EQ(ComputeFunctionCyclomaticComplexity(1, cfg4), 2); } +// Returns a path for i-th temporary file. +static std::string GetTempFilePath(size_t i) { + return std::filesystem::path(GetTestTempDir()) + .append(absl::StrCat("coverage_test", i, "-", getpid())); +} + +// Returns path to test_fuzz_target. +static std::string GetTargetPath() { + return GetDataDependencyFilepath("testing/test_fuzz_target"); +} + +// Returns path to llvm-symbolizer. +static std::string GetLLVMSymbolizerPath() { + CHECK_EQ(system("which llvm-symbolizer"), EXIT_SUCCESS) + << "llvm-symbolizer has to be installed and findable via PATH"; + return "llvm-symbolizer"; +} + +// Tests GetCfTableFromBinary() on test_fuzz_target. +TEST(CFTable, GetCfTable) { + auto target_path = GetTargetPath(); + std::string tmp_path1 = GetTempFilePath(1); + std::string tmp_path2 = GetTempFilePath(2); + + // Load the cf table. + auto cf_table = GetCfTableFromBinary(target_path, tmp_path1); + LOG(INFO) << VV(target_path) << VV(tmp_path1) << VV(cf_table.size()); + if (cf_table.empty()) { + LOG(INFO) << "__sancov_cfs is empty."; + // TODO(navidem): This should be removed once OSS's clang supports + // control-flow. + GTEST_SKIP(); + } + + ASSERT_FALSE( + std::filesystem::exists(tmp_path1.c_str())); // tmp_path1 was deleted. + LOG(INFO) << VV(cf_table.size()); + + // Load the pc table. + auto pc_table = GetPcTableFromBinary(target_path, tmp_path1); + ASSERT_FALSE( + std::filesystem::exists(tmp_path1.c_str())); // tmp_path1 was deleted. + EXPECT_THAT(pc_table.empty(), false); + + // Symbilize pc_table. + SymbolTable symbols; + symbols.GetSymbolsFromBinary(pc_table, target_path, GetLLVMSymbolizerPath(), + tmp_path1, tmp_path2); + ASSERT_EQ(symbols.size(), pc_table.size()); + + absl::flat_hash_map pc_table_index; + for (size_t i = 0; i < pc_table.size(); i++) { + pc_table_index[pc_table[i].pc] = i; + } + + for (size_t j = 0; j < cf_table.size();) { + auto current_pc = cf_table[j]; + ++j; + size_t succ_num = 0; + size_t callee_num = 0; + size_t icallee_num = 0; + + // Iterate over successors. + while (cf_table[j]) { + ++succ_num; + ++j; + } + ++j; // Step over the delimeter. + + // Iterate over callees. + while (cf_table[j]) { + if (cf_table[j] > 0) ++callee_num; + if (cf_table[j] < 0) ++icallee_num; + ++j; + } + ++j; // Step over the delimeter. + + // Determine if current_pc is a function entry. + if (pc_table_index.contains(current_pc)) { + size_t index = pc_table_index[current_pc]; + if (pc_table[index].has_flag(PCInfo::kFuncEntry)) { + const std::string ¤t_function = symbols.func(index); + // Check for properties. + SCOPED_TRACE(testing::Message() + << "Checking for " << VV(current_function) + << VV(current_pc) << VV(cf_table[j]) << VV(j)); + if (current_function == "SingleEdgeFunc") { + EXPECT_EQ(succ_num, 0); + EXPECT_EQ(icallee_num, 0); + EXPECT_EQ(callee_num, 0); + } else if (current_function == "MultiEdgeFunc") { + EXPECT_EQ(succ_num, 2); + EXPECT_EQ(icallee_num, 0); + EXPECT_EQ(callee_num, 0); + } else if (current_function == "IndirectCallFunc") { + EXPECT_EQ(succ_num, 0); + EXPECT_EQ(icallee_num, 1); + EXPECT_EQ(callee_num, 0); + } + } + } + } +} + } // namespace } // namespace centipede diff --git a/corpus.cc b/corpus.cc index fedad47..a9a4484 100644 --- a/corpus.cc +++ b/corpus.cc @@ -31,7 +31,7 @@ namespace centipede { // TODO(kcc): [impl] add tests. -Coverage::PCIndexVec FeatureSet::ToCoveragePCs() const { +PCIndexVec FeatureSet::ToCoveragePCs() const { return {pc_index_set_.begin(), pc_index_set_.end()}; } diff --git a/corpus.h b/corpus.h index b1f039a..c7afca9 100644 --- a/corpus.h +++ b/corpus.h @@ -54,7 +54,7 @@ class FeatureSet { size_t size() const { return num_features_; } // Returns features that originate from CFG counters, converted to PCIndexVec. - Coverage::PCIndexVec ToCoveragePCs() const; + PCIndexVec ToCoveragePCs() const; // Returns the number of features in `this` from the given feature domain. size_t CountFeatures(feature_domains::Domain domain); @@ -101,7 +101,7 @@ class FeatureSet { size_t features_per_domain_[feature_domains::Domain::kLastDomain + 1] = {}; // Maintains the set of PC indices that correspond to added features. - absl::flat_hash_set pc_index_set_; + absl::flat_hash_set pc_index_set_; }; // WeightedDistribution maintains an array of integer weights. @@ -231,7 +231,7 @@ class Corpus { // partially covered function. class CoverageFrontier { public: - CoverageFrontier(const Coverage::PCTable &pc_table) + CoverageFrontier(const PCTable &pc_table) : pc_table_(pc_table), frontier_(pc_table.size()) {} // Computes the coverage frontier of `corpus`. @@ -250,7 +250,7 @@ class CoverageFrontier { size_t MaxPcIndex() const { return pc_table_.size(); } private: - const Coverage::PCTable pc_table_; + const PCTable pc_table_; // frontier_[idx] is true iff pc_table_[i] is part of the coverage frontier. std::vector frontier_; diff --git a/corpus_test.cc b/corpus_test.cc index 9504b9f..060f66b 100644 --- a/corpus_test.cc +++ b/corpus_test.cc @@ -83,7 +83,7 @@ TEST(FeatureSet, CountUnseenAndPruneFrequentFeatures_IncrementFrequencies) { return feature_set.CountUnseenAndPruneFrequentFeatures(features); }; // Shorthand for IncrementFrequencies. - auto Increment = [&](const FeatureVec& features) { + auto Increment = [&](const FeatureVec &features) { feature_set.IncrementFrequencies(features); }; @@ -152,7 +152,7 @@ TEST(FeatureSet, CountUnseenAndPruneFrequentFeatures_IncrementFrequencies) { } TEST(Corpus, GetCmpArgs) { - Coverage::PCTable pc_table(100); + PCTable pc_table(100); CoverageFrontier coverage_frontier(pc_table); FeatureSet fs(3); Corpus corpus; @@ -165,7 +165,7 @@ TEST(Corpus, GetCmpArgs) { } TEST(Corpus, PrintStats) { - Coverage::PCTable pc_table(100); + PCTable pc_table(100); CoverageFrontier coverage_frontier(pc_table); FeatureSet fs(3); Corpus corpus; @@ -185,7 +185,7 @@ TEST(Corpus, PrintStats) { TEST(Corpus, Prune) { // Prune will remove an input if all of its features appear at least 3 times. - Coverage::PCTable pc_table(100); + PCTable pc_table(100); CoverageFrontier coverage_frontier(pc_table); FeatureSet fs(3); Corpus corpus; @@ -242,7 +242,7 @@ TEST(Corpus, Prune) { // Regression test for a crash in Corpus::Prune(). TEST(Corpus, PruneRegressionTest1) { - Coverage::PCTable pc_table(100); + PCTable pc_table(100); CoverageFrontier coverage_frontier(pc_table); FeatureSet fs(2); Corpus corpus; @@ -360,18 +360,18 @@ TEST(CoverageFrontier, Compute) { // Function [4, 6): Not covered. // Function [6, 9): Partially covered => part of frontier. // Function [9, 12): Fully covered. - Coverage::PCTable pc_table{{0, Coverage::PCInfo::kFuncEntry}, // Covered. - {1, Coverage::PCInfo::kFuncEntry}, - {2, Coverage::PCInfo::kFuncEntry}, // Covered. - {3, 0}, - {4, Coverage::PCInfo::kFuncEntry}, - {5, 0}, - {6, Coverage::PCInfo::kFuncEntry}, // Covered. - {7, 0}, // Covered. - {8, 0}, - {9, Coverage::PCInfo::kFuncEntry}, // Covered. - {10, 0}, // Covered. - {11, 0}}; // Covered. + PCTable pc_table{{0, PCInfo::kFuncEntry}, // Covered. + {1, PCInfo::kFuncEntry}, + {2, PCInfo::kFuncEntry}, // Covered. + {3, 0}, + {4, PCInfo::kFuncEntry}, + {5, 0}, + {6, PCInfo::kFuncEntry}, // Covered. + {7, 0}, // Covered. + {8, 0}, + {9, PCInfo::kFuncEntry}, // Covered. + {10, 0}, // Covered. + {11, 0}}; // Covered. CoverageFrontier frontier(pc_table); FeatureVec pcs(pc_table.size()); for (size_t i = 0; i < pc_table.size(); i++) { diff --git a/coverage.cc b/coverage.cc index e08cf39..70109f4 100644 --- a/coverage.cc +++ b/coverage.cc @@ -28,7 +28,6 @@ #include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" #include "absl/synchronization/mutex.h" -#include "./command.h" #include "./defs.h" #include "./logging.h" #include "./symbol_table.h" @@ -42,9 +41,6 @@ Coverage::Coverage(const PCTable &pc_table, const PCIndexVec &pci_vec) covered_pcs_vec_(pc_table.size()) { CHECK_LT(pc_table.size(), std::numeric_limits::max()); absl::flat_hash_set covered_pcs(pci_vec.begin(), pci_vec.end()); - for (Coverage::PCIndex i = 0; i < pc_table.size(); ++i) { - pc_index_map_[pc_table[i].pc] = i; - } // Iterate though all the pc_table entries. // The first one is some function's kFuncEntry. // Then find the next kFuncEntry or the table end. @@ -112,66 +108,8 @@ void Coverage::Print(const SymbolTable &symbols, std::ostream &out) { } } -Coverage::PCTable Coverage::GetPcTableFromBinary(std::string_view binary_path, - std::string_view tmp_path) { - Coverage::PCTable res = - GetPcTableFromBinaryWithPcTable(binary_path, tmp_path); - if (res.empty()) { - // Fall back to trace-pc. - res = GetPcTableFromBinaryWithTracePC(binary_path, tmp_path); - } - return res; -} - -Coverage::PCTable Coverage::GetPcTableFromBinaryWithPcTable( - std::string_view binary_path, std::string_view tmp_path) { - Command cmd(binary_path, {}, - {absl::StrCat("CENTIPEDE_RUNNER_FLAGS=:dump_pc_table:arg1=", - tmp_path, ":")}, - "/dev/null", "/dev/null"); - int system_exit_code = cmd.Execute(); - if (system_exit_code) { - LOG(INFO) << "system() for " << binary_path - << " with --dump_pc_table failed: " << VV(system_exit_code); - return {}; - } - ByteArray pc_infos_as_bytes; - ReadFromLocalFile(tmp_path, pc_infos_as_bytes); - std::filesystem::remove(tmp_path); - CHECK_EQ(pc_infos_as_bytes.size() % sizeof(PCInfo), 0); - size_t pc_table_size = pc_infos_as_bytes.size() / sizeof(PCInfo); - const auto *pc_infos = reinterpret_cast(pc_infos_as_bytes.data()); - PCTable pc_table{pc_infos, pc_infos + pc_table_size}; - CHECK_EQ(pc_table.size(), pc_table_size); - return pc_table; -} - -Coverage::CFTable Coverage::GetCfTableFromBinary(std::string_view binary_path, - std::string_view tmp_path) { - Command cmd(binary_path, {}, - {absl::StrCat("CENTIPEDE_RUNNER_FLAGS=:dump_cf_table:arg1=", - tmp_path, ":")}, - "/dev/null", "/dev/null"); - int cmd_exit_code = cmd.Execute(); - if (cmd_exit_code != EXIT_SUCCESS) { - LOG(ERROR) << "CF table dumping failed: " << VV(cmd.ToString()) - << VV(cmd_exit_code); - return {}; - } - ByteArray cf_infos_as_bytes; - ReadFromLocalFile(tmp_path, cf_infos_as_bytes); - std::filesystem::remove(tmp_path); - - size_t cf_table_size = cf_infos_as_bytes.size() / sizeof(intptr_t); - const auto *cf_infos = reinterpret_cast(cf_infos_as_bytes.data()); - CFTable cf_table{cf_infos, cf_infos + cf_table_size}; - CHECK_EQ(cf_table.size(), cf_table_size); - return cf_table; -} - //---------------------- NewCoverageLogger -std::string CoverageLogger::ObserveAndDescribeIfNew( - Coverage::PCIndex pc_index) { +std::string CoverageLogger::ObserveAndDescribeIfNew(PCIndex pc_index) { if (pc_table_.empty()) return ""; // Fast-path return (symbolization is off). absl::MutexLock l(&mu_); if (!observed_indices_.insert(pc_index).second) return ""; @@ -179,55 +117,14 @@ std::string CoverageLogger::ObserveAndDescribeIfNew( if (pc_index >= pc_table_.size()) { os << "FUNC/EDGE index: " << pc_index; } else { - os << (pc_table_[pc_index].has_flag(Coverage::PCInfo::kFuncEntry) - ? "FUNC: " - : "EDGE: "); + os << (pc_table_[pc_index].has_flag(PCInfo::kFuncEntry) ? "FUNC: " + : "EDGE: "); os << symbols_.full_description(pc_index); if (!observed_descriptions_.insert(os.str()).second) return ""; } return os.str(); } -Coverage::PCTable Coverage::GetPcTableFromBinaryWithTracePC( - std::string_view binary_path, std::string_view tmp_path) { - // Assumes objdump in PATH. - // Run objdump -d on the binary. - Command cmd("objdump", {"-d", std::string(binary_path)}, {}, tmp_path, - "/dev/null"); - int system_exit_code = cmd.Execute(); - if (system_exit_code) { - LOG(INFO) << __func__ << " objdump failed: " << system_exit_code; - return PCTable(); - } - PCTable pc_table; - std::ifstream in(std::string{tmp_path}); - bool saw_new_function = false; - - // std::string::ends_with is not yet available. - auto ends_with = [](std::string_view str, std::string_view end) -> bool { - return end.size() <= str.size() && str.find(end) == str.size() - end.size(); - }; - - // Read the objdump output, find lines that start a function - // and lines that have a call to __sanitizer_cov_trace_pc. - // Reconstruct the PCTable from those. - for (std::string line; std::getline(in, line);) { - if (ends_with(line, ">:")) { // new function. - saw_new_function = true; - continue; - } - if (!ends_with(line, "<__sanitizer_cov_trace_pc>")) continue; - std::istringstream iss(line); - uintptr_t pc; - iss >> std::hex >> pc; - uintptr_t flags = saw_new_function ? PCInfo::kFuncEntry : 0; - saw_new_function = false; // next trace_pc will be in the same function. - pc_table.push_back({pc, flags}); - } - std::filesystem::remove(tmp_path); - return pc_table; -} - FunctionFilter::FunctionFilter(std::string_view functions_to_filter, const SymbolTable &symbols) { // set pcs_[idx] to 1, for any idx that belongs to a filtered function. diff --git a/coverage.h b/coverage.h index 19ec559..c4df9c7 100644 --- a/coverage.h +++ b/coverage.h @@ -25,10 +25,11 @@ #include #include "absl/base/thread_annotations.h" -#include "absl/container/flat_hash_set.h" #include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/synchronization/mutex.h" -#include "./util.h" +#include "./control_flow.h" +#include "./feature.h" #include "./logging.h" namespace centipede { @@ -41,73 +42,10 @@ class SymbolTable; // To avoid mutual inclusion with symbol_table.h. // Thread-compatible. class Coverage { public: - // PCInfo is a pair {PC, bit mask with PC flags}. - // See https://clang.llvm.org/docs/SanitizerCoverage.html#pc-table - struct PCInfo { - enum PCFlags : uintptr_t { - kFuncEntry = 1 << 0, // The PC is the function entry block. - }; - - uintptr_t pc; - uintptr_t flags; - - bool has_flag(PCFlags f) const { return flags & f; } - }; - - // Array of PCInfo-s. - // PCTable is created by the compiler/linker in the instrumented binary. - // The order of elements is significant: each element corresponds - // to the coverage counter with the same index. - // Every PCInfo that is kFuncEntry is followed by PCInfo-s from the same - // function. - using PCTable = std::vector; - - // Reads the pc table from the binary file at `binary_path`. - // May create a file `tmp_path`, but will delete it afterwards. - // Currently works for - // * binaries linked with :centipede_runner - // and built with -fsanitize-coverage=pc-table, - // * binaries built with -fsanitize-coverage=trace-pc - static PCTable GetPcTableFromBinary(std::string_view binary_path, - std::string_view tmp_path); - - // Helper for GetPcTableFromBinary, - // for binaries linked with :centipede_runner - // and built with -fsanitize-coverage=pc-table. - // Returns the PCTable that the binary itself reported. - // May create a file `tmp_path`, but will delete it afterwards. - static PCTable GetPcTableFromBinaryWithPcTable(std::string_view binary_path, - std::string_view tmp_path); - - // Helper for GetPcTableFromBinary, - // for binaries built with -fsanitize-coverage=trace-pc. - // Returns the PCTable reconstructed from `binary_path` with `objdump -d`. - // May create a file `tmp_path`, but will delete it afterwards. - static PCTable GetPcTableFromBinaryWithTracePC(std::string_view binary_path, - std::string_view tmp_path); - - // PCIndex: an index into the PCTable. - // We use 32-bit int for compactness since PCTable is never too large. - using PCIndex = uint32_t; - // A set of PCIndex-es, order is not important. - using PCIndexVec = std::vector; - // PCTable is a property of the binary. // PCIndexVec is the coverage obtained from specific execution(s). - Coverage(const PCTable &pc_table, const PCIndexVec &pci_vec); - - // Array of elements in __sancov_cfs section. - // CFTable is created by the compiler/linker in the instrumented binary. - // https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-control-flow. - using CFTable = std::vector; - - // Reads the control-flow table from the binary file at `binary_path`. - // May create a file `tmp_path`, but will delete it afterwards. - // Currently works for - // * binaries linked with :fuzz_target_runner - // and built with -fsanitize-coverage=control-flow. - static CFTable GetCfTableFromBinary(std::string_view binary_path, - std::string_view tmp_path); + Coverage(const PCTable &pc_table, + const PCIndexVec &pci_vec); // Prints in human-readable form to `out` using `symbols`. void Print(const SymbolTable &symbols, std::ostream &out); @@ -122,21 +60,8 @@ class Coverage { bool BlockIsCovered(PCIndex pc_index) const { return covered_pcs_vec_[pc_index]; } - // Returns true if the given basic block is function entry. - bool BlockIsFunctionEntry(PCIndex pc_index) const { - return func_entries_[pc_index]; - } - // Returns the idx in pc_table associated with the PC, CHECK-fails if the PC - // is not in the pc_table. - PCIndex GetPcIndex(uintptr_t pc) const { - auto it = pc_index_map_.find(pc); - CHECK(it != pc_index_map_.end()); - return it->second; - } private: - // Map from PC to the idx in pc_table. - absl::flat_hash_map pc_index_map_; // A vector of size PCTable. func_entries[idx] is true iff means the PC at idx // is a function entry. std::vector func_entries_; @@ -155,7 +80,8 @@ class Coverage { // Partially covered function: function with some, but not all, edges covered. // Thus we can represent it as two vectors of PCIndex: covered and uncovered. struct PartiallyCoveredFunction { - PCIndexVec covered; // Non-empty, covered[0] is function entry. + PCIndexVec + covered; // Non-empty, covered[0] is function entry. PCIndexVec uncovered; // Non-empty. }; std::vector partially_covered_funcs; @@ -164,12 +90,13 @@ class Coverage { // Iterates `pc_table`, calls `callback` on every pair {beg, end}, such that // pc_table[beg] is PCInfo::kFuncEntry, and pc_table[beg + 1 : end] are not. template -void IteratePcTableFunctions(const Coverage::PCTable &pc_table, +void IteratePcTableFunctions(const PCTable &pc_table, Callback callback) { for (size_t beg = 0, n = pc_table.size(); beg < n;) { - if (pc_table[beg].has_flag(Coverage::PCInfo::kFuncEntry)) { + if (pc_table[beg].has_flag(PCInfo::kFuncEntry)) { size_t end = beg + 1; - while (end < n && !pc_table[end].has_flag(Coverage::PCInfo::kFuncEntry)) { + while (end < n && + !pc_table[end].has_flag(PCInfo::kFuncEntry)) { ++end; } callback(beg, end); @@ -184,21 +111,23 @@ class CoverageLogger { public: // CTOR. // Lifetimes of `pc_table` and `symbols` should be longer than for `this`. - CoverageLogger(const Coverage::PCTable &pc_table, const SymbolTable &symbols) + CoverageLogger(const PCTable &pc_table, + const SymbolTable &symbols) : pc_table_(pc_table), symbols_(symbols) {} // Checks if `pc_index` or its symbolized description was observed before. // If yes, returns empty string. // If this is the first observation, returns a symbolized description. // If symbolization is not available, returns a non-symbolized description. - std::string ObserveAndDescribeIfNew(Coverage::PCIndex pc_index); + std::string ObserveAndDescribeIfNew(PCIndex pc_index); private: - const Coverage::PCTable &pc_table_; + const PCTable &pc_table_; const SymbolTable &symbols_; absl::Mutex mu_; - absl::flat_hash_set observed_indices_ ABSL_GUARDED_BY(mu_); + absl::flat_hash_set observed_indices_ + ABSL_GUARDED_BY(mu_); absl::flat_hash_set observed_descriptions_ ABSL_GUARDED_BY(mu_); }; diff --git a/symbol_table.cc b/symbol_table.cc index 788f822..63096f6 100644 --- a/symbol_table.cc +++ b/symbol_table.cc @@ -20,11 +20,10 @@ #include #include -#include "absl/strings/match.h" #include "absl/strings/str_cat.h" #include "absl/strings/strip.h" #include "./command.h" -#include "./coverage.h" +#include "./control_flow.h" #include "./logging.h" #include "./util.h" @@ -49,7 +48,7 @@ void SymbolTable::ReadFromLLVMSymbolizer(std::istream &in) { } } -void SymbolTable::GetSymbolsFromBinary(const Coverage::PCTable &pc_table, +void SymbolTable::GetSymbolsFromBinary(const PCTable &pc_table, std::string_view binary_path, std::string_view symbolizer_path, std::string_view tmp_path1, diff --git a/symbol_table.h b/symbol_table.h index c79fe7a..7f4c20f 100644 --- a/symbol_table.h +++ b/symbol_table.h @@ -21,7 +21,7 @@ #include #include -#include "./coverage.h" +#include "./control_flow.h" namespace centipede { @@ -41,7 +41,7 @@ class SymbolTable { // pipes all PCs from pc_table though it, // and calls ReadFromLLVMSymbolizer() on the output. // Possibly uses files `tmp_path1` and `tmp_path2` for temporary storage. - void GetSymbolsFromBinary(const Coverage::PCTable &pc_table, + void GetSymbolsFromBinary(const PCTable &pc_table, std::string_view binary_path, std::string_view symbolizer_path, std::string_view tmp_path1, diff --git a/testing/BUILD b/testing/BUILD index 6b7af56..31d48f9 100644 --- a/testing/BUILD +++ b/testing/BUILD @@ -98,6 +98,7 @@ cc_test( ], deps = [ "@centipede//:centipede_interface", + "@centipede//:control_flow", "@centipede//:coverage", "@centipede//:defs", "@centipede//:environment", diff --git a/testing/coverage_test.cc b/testing/coverage_test.cc index 695368f..34c6d03 100644 --- a/testing/coverage_test.cc +++ b/testing/coverage_test.cc @@ -33,6 +33,7 @@ #include "absl/container/flat_hash_map.h" #include "absl/strings/str_cat.h" #include "./centipede_interface.h" +#include "./control_flow.h" #include "./defs.h" #include "./environment.h" #include "./execution_result.h" @@ -70,10 +71,10 @@ const char *symbolizer_output = "\n"; // PCTable that corresponds to symbolizer_output above. -static const Coverage::PCTable g_pc_table = { - {100, Coverage::PCInfo::kFuncEntry}, - {200, Coverage::PCInfo::kFuncEntry}, - {300, Coverage::PCInfo::kFuncEntry}, +static const PCTable g_pc_table = { + {100, PCInfo::kFuncEntry}, + {200, PCInfo::kFuncEntry}, + {300, PCInfo::kFuncEntry}, {400, 0}, {500, 0}, {600, 0}, @@ -126,7 +127,6 @@ TEST(Coverage, SymbolTable) { EXPECT_EQ(symbols.full_description(1), "? ?"); } - TEST(Coverage, CoverageLoad) { Coverage cov(g_pc_table, {0, 2, 4, 5}); @@ -140,20 +140,6 @@ TEST(Coverage, CoverageLoad) { EXPECT_TRUE(cov.FunctionIsFullyCovered(0)); EXPECT_FALSE(cov.FunctionIsFullyCovered(1)); EXPECT_FALSE(cov.FunctionIsFullyCovered(2)); - - EXPECT_TRUE(cov.BlockIsFunctionEntry(0)); - EXPECT_TRUE(cov.BlockIsFunctionEntry(1)); - EXPECT_TRUE(cov.BlockIsFunctionEntry(2)); - EXPECT_FALSE(cov.BlockIsFunctionEntry(3)); - EXPECT_FALSE(cov.BlockIsFunctionEntry(4)); - EXPECT_FALSE(cov.BlockIsFunctionEntry(5)); - - EXPECT_EQ(cov.GetPcIndex(100), 0); - EXPECT_EQ(cov.GetPcIndex(200), 1); - EXPECT_EQ(cov.GetPcIndex(300), 2); - EXPECT_EQ(cov.GetPcIndex(400), 3); - EXPECT_EQ(cov.GetPcIndex(500), 4); - EXPECT_EQ(cov.GetPcIndex(600), 5); } TEST(Coverage, CoverageLogger) { @@ -178,7 +164,7 @@ TEST(Coverage, CoverageLogger) { CoverageLogger concurrently_used_logger(g_pc_table, symbols); auto cb = [&]() { for (int i = 0; i < 1000; i++) { - Coverage::PCIndex pc_index = i % g_pc_table.size(); + PCIndex pc_index = i % g_pc_table.size(); logger.ObserveAndDescribeIfNew(pc_index); } }; @@ -210,92 +196,6 @@ static std::string GetLLVMSymbolizerPath() { return "llvm-symbolizer"; } -// Tests GetCfTableFromBinary() on test_fuzz_target. -TEST(Coverage, GetCfTable) { - auto target_path = GetTargetPath(); - std::string tmp_path1 = GetTempFilePath(1); - std::string tmp_path2 = GetTempFilePath(2); - - // Load the cf table. - auto cf_table = Coverage::GetCfTableFromBinary(target_path, tmp_path1); - if (cf_table.empty()) { - LOG(INFO) << "__sancov_cfs is empty."; - // TODO(navidem): This should be removed once OSS's clang supports - // control-flow. - GTEST_SKIP(); - } - - ASSERT_EQ(std::filesystem::exists(tmp_path1.c_str()), - false); // tmp_path1 was deleted. - LOG(INFO) << VV(cf_table.size()); - - // Load the pc table. - auto pc_table = Coverage::GetPcTableFromBinary(target_path, tmp_path1); - EXPECT_EQ(fopen(tmp_path1.c_str(), "r"), nullptr); // tmp_path1 was deleted. - LOG(INFO) << VV(pc_table.size()); - EXPECT_THAT(pc_table.empty(), false); - - // Symbilize pc_table. - SymbolTable symbols; - symbols.GetSymbolsFromBinary(pc_table, target_path, GetLLVMSymbolizerPath(), - tmp_path1, tmp_path2); - EXPECT_EQ(symbols.size(), pc_table.size()); - - absl::flat_hash_map pc_table_index; - for (size_t i = 0; i < pc_table.size(); i++) { - pc_table_index[pc_table[i].pc] = i; - } - - for (size_t j = 0; j < cf_table.size();) { - auto current_pc = cf_table[j]; - ++j; - size_t succ_num = 0; - size_t callee_num = 0; - size_t icallee_num = 0; - - // Iterate over successors. - while (cf_table[j]) { - ++succ_num; - ++j; - } - ++j; // Step over the delimeter. - - // Iterate over callees. - while (cf_table[j]) { - if (cf_table[j] > 0) ++callee_num; - if (cf_table[j] < 0) ++icallee_num; - ++j; - } - ++j; // Step over the delimeter. - - // Determine if current_pc is a function entry. - if (pc_table_index.contains(current_pc)) { - size_t index = pc_table_index[current_pc]; - if (pc_table[index].has_flag(Coverage::PCInfo::kFuncEntry)) { - std::string_view current_function = symbols.func(index); - // Check for properties. - LOG(INFO) << "Checking for " << VV(current_function); - SCOPED_TRACE(testing::Message() - << "Checking for " << VV(current_function) - << VV(current_pc) << VV(cf_table[j]) << VV(j)); - if (current_function == "SingleEdgeFunc") { - EXPECT_EQ(succ_num, 0); - EXPECT_EQ(icallee_num, 0); - EXPECT_EQ(callee_num, 0); - } else if (current_function == "MultiEdgeFunc") { - EXPECT_EQ(succ_num, 2); - EXPECT_EQ(icallee_num, 0); - EXPECT_EQ(callee_num, 0); - } else if (current_function == "IndirectCallFunc") { - EXPECT_EQ(succ_num, 0); - EXPECT_EQ(icallee_num, 1); - EXPECT_EQ(callee_num, 0); - } - } - } - } -} - // A simple CentipedeCallbacks derivative for this test. class TestCallbacks : public CentipedeCallbacks { public: @@ -345,8 +245,7 @@ TEST(Coverage, CoverageFeatures) { EXPECT_EQ(features.size(), 2); EXPECT_NE(features[0], features[1]); // Get pc_table and symbols. - auto pc_table = - Coverage::GetPcTableFromBinary(GetTargetPath(), GetTempFilePath(0)); + auto pc_table = GetPcTableFromBinary(GetTargetPath(), GetTempFilePath(0)); SymbolTable symbols; symbols.GetSymbolsFromBinary(pc_table, GetTargetPath(), GetLLVMSymbolizerPath(), GetTempFilePath(0), @@ -459,8 +358,7 @@ TEST(Coverage, PathFeatures) { TEST(Coverage, FunctionFilter) { // initialize coverage data. - Coverage::PCTable pc_table = - Coverage::GetPcTableFromBinary(GetTargetPath(), GetTempFilePath(0)); + PCTable pc_table = GetPcTableFromBinary(GetTargetPath(), GetTempFilePath(0)); SymbolTable symbols; symbols.GetSymbolsFromBinary(pc_table, GetTargetPath(), GetLLVMSymbolizerPath(), GetTempFilePath(0),