Skip to content
This repository has been archived by the owner on Oct 11, 2023. It is now read-only.

Commit

Permalink
add an experimental flag to treat pc pairs as features.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 458108404
  • Loading branch information
kcc authored and copybara-github committed Jun 30, 2022
1 parent 04d8d8c commit 5564535
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 0 deletions.
44 changes: 44 additions & 0 deletions centipede.cc
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ void Centipede::Log(std::string_view log_type, size_t min_log_level) {
<< " df: " << fs_.CountFeatures(FeatureDomains::kDataFlow)
<< " cmp: " << fs_.CountFeatures(FeatureDomains::kCMP)
<< " path: " << fs_.CountFeatures(FeatureDomains::kBoundedPath)
<< " pair: " << fs_.CountFeatures(FeatureDomains::kPCPair)
<< " corp: " << corpus_.NumActive() << "/" << corpus_.NumTotal()
<< " max/avg " << max << " " << avg << " exec/s: " << exec_speed
<< " mb: " << (MemoryUsage() >> 20);
Expand Down Expand Up @@ -238,6 +239,47 @@ bool Centipede::ExecuteAndReportCrash(std::string_view binary,
return success;
}

// *** Highly experimental and risky. May not scale well for large targets. ***
//
// The idea: an unordered pair of two features {a, b} is by itself a feature.
// In the worst case, the number of such synthetic features is a square of
// the number of regular features, which may not scale.
// For now, we only treat pairs of PCs as features, which is still quadratic
// by the number of PCs. But in moderate-sized programs this may be tolerable.
//
// Rationale: if two different parts of the target are exercised simultaneously,
// this may create interesting behaviour that is hard to capture with regular
// control flow (or other) features.
size_t Centipede::AddPcPairFeatures(FeatureVec &fv) {
// Using a scratch vector to avoid allocations.
auto &pcs = add_pc_pair_scratch_;
pcs.clear();

size_t num_pcs = pc_table_.size();
size_t num_added_pairs = 0;

// Collect PCs from fv.
for (auto feature : fv) {
if (FeatureDomains::k8bitCounters.Contains(feature))
pcs.push_back(Convert8bitCounterFeatureToPcIndex(feature));
}

// The quadratic loop: iterate all PC pairs (!!).
for (size_t i = 0, n = pcs.size(); i < n; ++i) {
size_t pc1 = pcs[i];
for (size_t j = i + 1; j < n; ++j) {
size_t pc2 = pcs[j];
feature_t f = FeatureDomains::kPCPair.ConvertToMe(
ConvertPcPairToNumber(pc1, pc2, num_pcs));
// If we have seen this pair at least once, ignore it.
if (fs_.Frequency(f)) continue;
fv.push_back(f);
++num_added_pairs;
}
}
return num_added_pairs;
}

bool Centipede::RunBatch(const std::vector<ByteArray> &input_vec,
BatchResult &batch_result,
BlobFileAppender *corpus_file,
Expand Down Expand Up @@ -265,6 +307,8 @@ bool Centipede::RunBatch(const std::vector<ByteArray> &input_vec,
bool function_filter_passed = function_filter_.filter(fv);
bool input_gained_new_coverage =
fs_.CountUnseenAndPruneFrequentFeatures(fv);
if (env_.use_pcpair_features && AddPcPairFeatures(fv))
input_gained_new_coverage = true;
if (unconditional_features_file) {
CHECK_OK(unconditional_features_file->Append(
PackFeaturesAndHash(input_vec[i], fv)));
Expand Down
8 changes: 8 additions & 0 deletions centipede.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,11 @@ class Centipede {
void MergeFromOtherCorpus(std::string_view merge_from_dir,
size_t shard_index_to_merge);

// Collects all PCs from `fv`, then adds PC-pair features to `fv`.
// Returns the number of added features.
// See more comments in centipede.cc.
size_t AddPcPairFeatures(FeatureVec &fv);

FeatureSet fs_;
Timer timer_; // counts time for coverage collection rate computation
Corpus corpus_;
Expand All @@ -145,6 +150,9 @@ class Centipede {
// Counts the number of crashes reported so far.
int num_crash_reports_ = 0;

// Scratch object for AddPcPairFeatures.
std::vector<size_t> add_pc_pair_scratch_;

// Path and command for the input_filter.
std::string input_filter_path_;
Command input_filter_cmd_;
Expand Down
4 changes: 4 additions & 0 deletions environment.cc
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,9 @@ ABSL_FLAG(bool, use_counter_features, false,
"When available from instrumentation, use features derived from "
"counting the number of occurrences of a given PC. "
"When enabled, supersedes --use_pc_features.");
ABSL_FLAG(bool, use_pcpair_features, false,
"If true, PC pairs are used as additional synthetic features. "
"Experimental, use with care - it may explode the corpus.");
ABSL_FLAG(bool, generate_corpus_stats, false,
"If true, a file workdir/corpus-stats-BINARY.json containing"
"corpus stats will be generated periodically");
Expand Down Expand Up @@ -226,6 +229,7 @@ Environment::Environment(int argc, char** argv)
use_cmp_features(absl::GetFlag(FLAGS_use_cmp_features)),
use_dataflow_features(absl::GetFlag(FLAGS_use_dataflow_features)),
use_counter_features(absl::GetFlag(FLAGS_use_counter_features)),
use_pcpair_features(absl::GetFlag(FLAGS_use_pcpair_features)),
generate_corpus_stats(absl::GetFlag(FLAGS_generate_corpus_stats)),
distill_shards(absl::GetFlag(FLAGS_distill_shards)),
fork_server_helper_path(absl::GetFlag(FLAGS_fork_server_helper_path)),
Expand Down
1 change: 1 addition & 0 deletions environment.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ struct Environment {
bool use_cmp_features;
bool use_dataflow_features;
bool use_counter_features;
size_t use_pcpair_features;
bool generate_corpus_stats;
size_t distill_shards;
std::string fork_server_helper_path;
Expand Down
3 changes: 3 additions & 0 deletions feature.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ constexpr Domain kCMP = {kDataFlow.end(), 1UL << 40, "cmp"};
// their number to 2^32.
constexpr Domain kBoundedPath = {kCMP.end(), 1UL << 32, "path"};

// Features derived from (unordered) pairs of PCs.
constexpr Domain kPCPair = {kBoundedPath.end(), 1UL << 40, "pc-pair"};

} // namespace FeatureDomains

// Converts a 8-bit coverage counter, i.e. a pair of
Expand Down

0 comments on commit 5564535

Please sign in to comment.