From b537640fbb3b353f9ddebe7181dc99c4fbbc2207 Mon Sep 17 00:00:00 2001 From: Hugh Wang Date: Wed, 16 Dec 2020 16:30:41 +0800 Subject: [PATCH] Hot-path acceleration: one-pass scan and vectorization --- Makefile | 2 +- afl-fuzz.c | 30 ++++++-------- coverage-32.h | 47 +++++++++++++++++++++ coverage-64.h | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 173 insertions(+), 18 deletions(-) diff --git a/Makefile b/Makefile index 381931289..8de81f3a8 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,7 @@ MISC_PATH = $(PREFIX)/share/afl PROGS = afl-gcc afl-fuzz afl-showmap afl-tmin afl-gotcpu afl-analyze SH_PROGS = afl-plot afl-cmin afl-whatsup -CFLAGS ?= -O3 -funroll-loops +CFLAGS ?= -O3 -march=native CFLAGS += -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign \ -DAFL_PATH=\"$(HELPER_PATH)\" -DDOC_PATH=\"$(DOC_PATH)\" \ -DBIN_PATH=\"$(BIN_PATH)\" diff --git a/afl-fuzz.c b/afl-fuzz.c index 844e01a06..af375bec8 100644 --- a/afl-fuzz.c +++ b/afl-fuzz.c @@ -1085,22 +1085,6 @@ static inline u8 has_new_bits(u8* virgin_map) { } -/* A combination of classify_counts and has_new_bits. If 0 is returned, then the - * trace bits are kept as-is. Otherwise, the trace bits are overwritten with - * classified values. - * - * This accelerates the processing: in most cases, no interesting behavior - * happen, and the trace bits will be discarded soon. This function optimizes - * for such cases: one-pass scan on trace bits without modifying anything. Only - * on rare cases it fall backs to the slow path: classify_counts() first, then - * return has_new_bits(). */ - -static inline u8 has_new_bits_unclassified(u8* virgin_map) { - classify_counts(trace_bits); // TODO - return has_new_bits(virgin_map); -} - - /* Get rid of shared memory (atexit handler). */ static void remove_shm(void) { @@ -3044,7 +3028,19 @@ static u8 save_if_interesting(char** argv, void* mem, u32 len, u8 fault) { /* Keep only if there are new bits in the map, add to queue for future fuzzing, etc. */ - if (!(hnb = has_new_bits_unclassified(virgin_bits))) { + + /* A combination of classify_counts and has_new_bits. If 0 is returned, then + * the trace bits are kept as-is. Otherwise, the trace bits are overwritten + * with classified values. + * + * This accelerates the processing: in most cases, no interesting behavior + * happen, and the trace bits will be discarded soon. This function + * optimizes for such cases: one-pass scan on trace bits without modifying + * anything. Only on rare cases it fall backs to the slow path: + * classify_counts() first, then return has_new_bits(). */ + hnb = has_new_bits_unclassified(virgin_bits); + + if (!hnb) { if (crash_mode) total_crashes++; return 0; } diff --git a/coverage-32.h b/coverage-32.h index bead63017..7bc5f2173 100644 --- a/coverage-32.h +++ b/coverage-32.h @@ -84,3 +84,50 @@ static inline void discover_word(u8* ret, u32* current, u32* virgin) { *virgin &= ~*current; } } + + +#define PACK_SIZE 16 +static inline const u32* skim(const u32* virgin, const u32* current, const u32* current_end) { + + for (; current != current_end; virgin += 4, current += 4) { + + if (current[0] && classify_word(current[0]) & virgin[0]) return ¤t[0]; + if (current[1] && classify_word(current[1]) & virgin[1]) return ¤t[1]; + if (current[2] && classify_word(current[2]) & virgin[2]) return ¤t[2]; + if (current[3] && classify_word(current[3]) & virgin[3]) return ¤t[3]; + + } + + return current_end; +} + + +static inline u8 has_new_bits_unclassified(u8* virgin_map) { + u32* virgin = (u32*)virgin_map; + u32* current = (u32*)trace_bits; + u32* current_end = (u32*)(trace_bits + MAP_SIZE); + + u8 ret = 0; + while ((current = (u32*)skim(virgin, current, current_end)) != current_end) { + /* Compute the word offset inside current pack. */ + u32 offset = ((uintptr_t)current & (PACK_SIZE - 1)) / 4; + virgin = (u32*)((u8*)current - trace_bits + virgin_map); + +#define UNROLL(x) \ + case x: \ + if (*current) { \ + *current = classify_word(*current); \ + discover_word(&ret, current, virgin); \ + } \ + ++current, ++virgin; + + /* Ensure the alignment of the next iteration. */ + switch (offset) { + UNROLL(0) UNROLL(1) UNROLL(2) UNROLL(3) + } + +#undef UNROLL + } + + return ret; +} diff --git a/coverage-64.h b/coverage-64.h index ba4a37d8a..02b600beb 100644 --- a/coverage-64.h +++ b/coverage-64.h @@ -1,6 +1,9 @@ #include "config.h" #include "types.h" +#if (defined(__AVX512F__) && defined(__AVX512DQ__)) || defined(__AVX2__) +# include +#endif static inline u64 classify_word(u64 word) { @@ -94,3 +97,112 @@ static inline void discover_word(u8* ret, u64* current, u64* virgin) { } } + + +#if defined(__AVX512F__) && defined(__AVX512DQ__) +#define PACK_SIZE 64 +static inline const u64* skim(const u64* virgin, const u64* current, const u64* current_end) { + + for (; current != current_end; virgin += 8, current += 8) { + + __m512i value = *(__m512i*)current; + __mmask8 mask = _mm512_testn_epi64_mask(value, value); + + /* All bytes are zero. */ + if (mask == 0xff) continue; + + /* Look for nonzero bytes and check for new bits. */ +#define UNROLL(x) \ + if (!(mask & (1 << x)) && classify_word(current[x]) & virgin[x]) return ¤t[x] + UNROLL(0); UNROLL(1); UNROLL(2); UNROLL(3); + UNROLL(4); UNROLL(5); UNROLL(6); UNROLL(7); +#undef UNROLL + + } + + return current_end; + +} +#endif + + +#if !defined(PACK_SIZE) && defined(__AVX2__) +#define PACK_SIZE 32 +static inline const u64* skim(const u64* virgin, const u64* current, const u64* current_end) { + + __m256i zeroes = _mm256_setzero_si256(); + + for (; current != current_end; virgin += 4, current += 4) { + + __m256i value = *(__m256i*)current; + __m256i cmp = _mm256_cmpeq_epi64(value, zeroes); + u32 mask = _mm256_movemask_epi8(cmp); + + /* All bytes are zero. */ + if (mask == -1) continue; + + /* Look for nonzero bytes and check for new bits. */ + if (!(mask & 0xff) && classify_word(current[0]) & virgin[0]) return ¤t[0]; + if (!(mask & 0xff00) && classify_word(current[1]) & virgin[1]) return ¤t[1]; + if (!(mask & 0xff0000) && classify_word(current[2]) & virgin[2]) return ¤t[2]; + if (!(mask & 0xff000000) && classify_word(current[3]) & virgin[3]) return ¤t[3]; + + } + + return current_end; + +} +#endif + + +#if !defined(PACK_SIZE) +#define PACK_SIZE 32 +static inline const u64* skim(const u64* virgin, const u64* current, const u64* current_end) { + + for (; current != current_end; virgin += 4, current += 4) { + + if (current[0] && classify_word(current[0]) & virgin[0]) return ¤t[0]; + if (current[1] && classify_word(current[1]) & virgin[1]) return ¤t[1]; + if (current[2] && classify_word(current[2]) & virgin[2]) return ¤t[2]; + if (current[3] && classify_word(current[3]) & virgin[3]) return ¤t[3]; + + } + + return current_end; + +} +#endif + + +static inline u8 has_new_bits_unclassified(u8* virgin_map) { + u64* virgin = (u64*)virgin_map; + u64* current = (u64*)trace_bits; + u64* current_end = (u64*)(trace_bits + MAP_SIZE); + + u8 ret = 0; + while ((current = (u64*)skim(virgin, current, current_end)) != current_end) { + /* Compute the word offset inside current pack. */ + u64 offset = ((uintptr_t)current & (PACK_SIZE - 1)) / 8; + virgin = (u64*)((u8*)current - trace_bits + virgin_map); + +#define UNROLL(x) \ + case x: \ + if (*current) { \ + *current = classify_word(*current); \ + discover_word(&ret, current, virgin); \ + } \ + ++current, ++virgin; + + /* Ensure the alignment of the next iteration. */ + switch (offset) { + UNROLL(0) UNROLL(1) UNROLL(2) UNROLL(3) +#if PACK_SIZE == 64 + UNROLL(4) UNROLL(5) UNROLL(6) UNROLL(7) +#endif + } + +#undef UNROLL + } + + return ret; +}