Skip to content
This repository has been archived by the owner on Mar 22, 2024. It is now read-only.

Commit

Permalink
Hot-path acceleration: one-pass scan and vectorization
Browse files Browse the repository at this point in the history
  • Loading branch information
hghwng committed Dec 16, 2020
1 parent 63eb62a commit b537640
Show file tree
Hide file tree
Showing 4 changed files with 173 additions and 18 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ MISC_PATH = $(PREFIX)/share/afl
PROGS = afl-gcc afl-fuzz afl-showmap afl-tmin afl-gotcpu afl-analyze
SH_PROGS = afl-plot afl-cmin afl-whatsup

CFLAGS ?= -O3 -funroll-loops
CFLAGS ?= -O3 -march=native
CFLAGS += -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign \
-DAFL_PATH=\"$(HELPER_PATH)\" -DDOC_PATH=\"$(DOC_PATH)\" \
-DBIN_PATH=\"$(BIN_PATH)\"
Expand Down
30 changes: 13 additions & 17 deletions afl-fuzz.c
Original file line number Diff line number Diff line change
Expand Up @@ -1085,22 +1085,6 @@ static inline u8 has_new_bits(u8* virgin_map) {
}


/* A combination of classify_counts and has_new_bits. If 0 is returned, then the
* trace bits are kept as-is. Otherwise, the trace bits are overwritten with
* classified values.
*
* This accelerates the processing: in most cases, no interesting behavior
* happen, and the trace bits will be discarded soon. This function optimizes
* for such cases: one-pass scan on trace bits without modifying anything. Only
* on rare cases it fall backs to the slow path: classify_counts() first, then
* return has_new_bits(). */

static inline u8 has_new_bits_unclassified(u8* virgin_map) {
classify_counts(trace_bits); // TODO
return has_new_bits(virgin_map);
}


/* Get rid of shared memory (atexit handler). */

static void remove_shm(void) {
Expand Down Expand Up @@ -3044,7 +3028,19 @@ static u8 save_if_interesting(char** argv, void* mem, u32 len, u8 fault) {
/* Keep only if there are new bits in the map, add to queue for
future fuzzing, etc. */

if (!(hnb = has_new_bits_unclassified(virgin_bits))) {

/* A combination of classify_counts and has_new_bits. If 0 is returned, then
* the trace bits are kept as-is. Otherwise, the trace bits are overwritten
* with classified values.
*
* This accelerates the processing: in most cases, no interesting behavior
* happen, and the trace bits will be discarded soon. This function
* optimizes for such cases: one-pass scan on trace bits without modifying
* anything. Only on rare cases it fall backs to the slow path:
* classify_counts() first, then return has_new_bits(). */
hnb = has_new_bits_unclassified(virgin_bits);

if (!hnb) {
if (crash_mode) total_crashes++;
return 0;
}
Expand Down
47 changes: 47 additions & 0 deletions coverage-32.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,50 @@ static inline void discover_word(u8* ret, u32* current, u32* virgin) {
*virgin &= ~*current;
}
}


#define PACK_SIZE 16
static inline const u32* skim(const u32* virgin, const u32* current, const u32* current_end) {

for (; current != current_end; virgin += 4, current += 4) {

if (current[0] && classify_word(current[0]) & virgin[0]) return &current[0];
if (current[1] && classify_word(current[1]) & virgin[1]) return &current[1];
if (current[2] && classify_word(current[2]) & virgin[2]) return &current[2];
if (current[3] && classify_word(current[3]) & virgin[3]) return &current[3];

}

return current_end;
}


static inline u8 has_new_bits_unclassified(u8* virgin_map) {
u32* virgin = (u32*)virgin_map;
u32* current = (u32*)trace_bits;
u32* current_end = (u32*)(trace_bits + MAP_SIZE);

u8 ret = 0;
while ((current = (u32*)skim(virgin, current, current_end)) != current_end) {
/* Compute the word offset inside current pack. */
u32 offset = ((uintptr_t)current & (PACK_SIZE - 1)) / 4;
virgin = (u32*)((u8*)current - trace_bits + virgin_map);

#define UNROLL(x) \
case x: \
if (*current) { \
*current = classify_word(*current); \
discover_word(&ret, current, virgin); \
} \
++current, ++virgin;

/* Ensure the alignment of the next iteration. */
switch (offset) {
UNROLL(0) UNROLL(1) UNROLL(2) UNROLL(3)
}

#undef UNROLL
}

return ret;
}
112 changes: 112 additions & 0 deletions coverage-64.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#include "config.h"
#include "types.h"

#if (defined(__AVX512F__) && defined(__AVX512DQ__)) || defined(__AVX2__)
# include <immintrin.h>
#endif

static inline u64 classify_word(u64 word) {

Expand Down Expand Up @@ -94,3 +97,112 @@ static inline void discover_word(u8* ret, u64* current, u64* virgin) {
}

}


#if defined(__AVX512F__) && defined(__AVX512DQ__)
#define PACK_SIZE 64
static inline const u64* skim(const u64* virgin, const u64* current, const u64* current_end) {

for (; current != current_end; virgin += 8, current += 8) {

__m512i value = *(__m512i*)current;
__mmask8 mask = _mm512_testn_epi64_mask(value, value);

/* All bytes are zero. */
if (mask == 0xff) continue;

/* Look for nonzero bytes and check for new bits. */
#define UNROLL(x) \
if (!(mask & (1 << x)) && classify_word(current[x]) & virgin[x]) return &current[x]
UNROLL(0); UNROLL(1); UNROLL(2); UNROLL(3);
UNROLL(4); UNROLL(5); UNROLL(6); UNROLL(7);
#undef UNROLL

}

return current_end;

}
#endif


#if !defined(PACK_SIZE) && defined(__AVX2__)
#define PACK_SIZE 32
static inline const u64* skim(const u64* virgin, const u64* current, const u64* current_end) {

__m256i zeroes = _mm256_setzero_si256();

for (; current != current_end; virgin += 4, current += 4) {

__m256i value = *(__m256i*)current;
__m256i cmp = _mm256_cmpeq_epi64(value, zeroes);
u32 mask = _mm256_movemask_epi8(cmp);

/* All bytes are zero. */
if (mask == -1) continue;

/* Look for nonzero bytes and check for new bits. */
if (!(mask & 0xff) && classify_word(current[0]) & virgin[0]) return &current[0];
if (!(mask & 0xff00) && classify_word(current[1]) & virgin[1]) return &current[1];
if (!(mask & 0xff0000) && classify_word(current[2]) & virgin[2]) return &current[2];
if (!(mask & 0xff000000) && classify_word(current[3]) & virgin[3]) return &current[3];

}

return current_end;

}
#endif


#if !defined(PACK_SIZE)
#define PACK_SIZE 32
static inline const u64* skim(const u64* virgin, const u64* current, const u64* current_end) {

for (; current != current_end; virgin += 4, current += 4) {

if (current[0] && classify_word(current[0]) & virgin[0]) return &current[0];
if (current[1] && classify_word(current[1]) & virgin[1]) return &current[1];
if (current[2] && classify_word(current[2]) & virgin[2]) return &current[2];
if (current[3] && classify_word(current[3]) & virgin[3]) return &current[3];

}

return current_end;

}
#endif


static inline u8 has_new_bits_unclassified(u8* virgin_map) {
u64* virgin = (u64*)virgin_map;
u64* current = (u64*)trace_bits;
u64* current_end = (u64*)(trace_bits + MAP_SIZE);

u8 ret = 0;
while ((current = (u64*)skim(virgin, current, current_end)) != current_end) {
/* Compute the word offset inside current pack. */
u64 offset = ((uintptr_t)current & (PACK_SIZE - 1)) / 8;
virgin = (u64*)((u8*)current - trace_bits + virgin_map);

#define UNROLL(x) \
case x: \
if (*current) { \
*current = classify_word(*current); \
discover_word(&ret, current, virgin); \
} \
++current, ++virgin;

/* Ensure the alignment of the next iteration. */
switch (offset) {
UNROLL(0) UNROLL(1) UNROLL(2) UNROLL(3)
#if PACK_SIZE == 64
UNROLL(4) UNROLL(5) UNROLL(6) UNROLL(7)
#endif
}

#undef UNROLL
}

return ret;
}

0 comments on commit b537640

Please sign in to comment.