You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
For my work on learning symbolic inputs, I wrote a variant of delta debugging which reduces parse trees based on an existing grammar. It works quite well and reasonably fast so far. I do not know whether it is relevant for the Debugging Book, and the code is quite lengthy and not didactically optimized, but thought I'd share it here anyway.
After the below code implementing the reduction, I'll also post an example processing CSV.
importitertoolsimportloggingimportrefromfuzzingbook.GrammarFuzzerimporttree_to_stringfromfuzzingbook.GrammarsimportRE_NONTERMINAL, is_nonterminalfromfuzzingbook.ParserimportEarleyParserclassReducer:
def__init__(self, grammar,
concrete_input,
test,
expected_exc_type=Exception,
max_tries_for_generalization=50,
max_tries_for_validation=200,
max_retries_after_failed_validation=3):
self.grammar=grammarself.test=testself.max_tries_for_generalization=max_tries_for_generalizationself.max_tries_for_validation=max_tries_for_validationself.max_retries_after_failed_validation=max_retries_after_failed_validationself.expected_exc_type=expected_exc_typeself.logger=logging.getLogger("Reducer")
self.concrete_input=concrete_inputifmax_tries_for_validation<max_tries_for_generalization:
self.max_tries_for_validation=2*max_tries_for_generalizationdefreduce(self, concrete_input=None):
"""Implements the idea of Delta Debugging for input trees."""self.logger.info("PHASE START: Reduction à la Delta Debugging.")
ifconcrete_inputisNone:
concrete_input=self.concrete_inputparse_tree=list(EarleyParser(self.grammar).parse(concrete_input))[0]
did_generalize=Truewhiledid_generalize:
paths= [[0]]
did_generalize=Falsewhilepaths:
path=paths.pop()
node, children=Reducer.get_subtree(parse_tree, path)
tree_for_log=tree_to_string((node, children))
compatible_subtree_paths= []
fori, childinenumerate(children):
compatible_paths=self.find_paths(child, lambdap, t: t[0] ==node)
ifcompatible_paths:
compatible_subtree_paths.extend(list(map(lambdap: path+ [i] +p, compatible_paths)))
did_replace=False# Step 1: Try to replace this subtree by one of its subtreeforcompatible_pathincompatible_subtree_paths:
subtree=Reducer.get_subtree(parse_tree, compatible_path)
maybe_new_input=Reducer.replace_subtree(parse_tree, subtree, path)
ifself.test_input(tree_to_string(maybe_new_input)):
self.logger.info(f"Generalizing path {path}, "f"replacing {tree_to_string((node, children))} "f"with {tree_to_string(subtree)}")
parse_tree=maybe_new_inputself.logger.info(f"Current input: {tree_to_string(parse_tree)}")
did_replace, did_generalize=True, Truebreak# Step 2: Try to choose another expansion in the grammar to replace the# children set by just one child or fewer children.ifnotdid_replace:
expansions= [list(filter(lambdal: l, re.split(RE_NONTERMINAL, e)))
foreinself.grammar.get(node, [])]
current_expansion= [nodefornodein [c[0] forcinchildren]]
forelementsin [eforeinexpansionsife!=current_expansion]:
# Find matching subtrees for each nonterminalnonterminals= [eforeinelementsifis_nonterminal(e)]
compatible_subtree_paths= [None] *len(nonterminals)
fork, nonterminalinenumerate(nonterminals):
compatible_subtree_paths[k] = []
fori, childinenumerate(children):
compatible_paths=self.find_paths(child, lambdap, t: t[0] ==nonterminal)
ifcompatible_paths:
compatible_subtree_paths[k].extend(
list(map(lambdap: path+ [i] +p, compatible_paths)))
# What itertools.product does:# [[1, 2], [3, 4, 5]] -> [[1, 3], [1, 4], [1, 5], [2, 3], ...]# Try all instantiationsforcombinationinitertools.product(*compatible_subtree_paths):
i=0new_children= []
forelementinelements:
ifnotis_nonterminal(element):
iftype(element) isnottuple:
element= (element, [])
new_children.append(element)
else:
new_children.append(Reducer.get_subtree(parse_tree, combination[i]))
i+=1subtree= (node, new_children)
ifReducer.count_nodes((node, children)) <=Reducer.count_nodes((node, new_children)):
continuemaybe_new_input=Reducer.replace_subtree(parse_tree, subtree, path)
ifself.test_input(tree_to_string(maybe_new_input)):
self.logger.info(f"Generalizing path {path}, "f"replacing {tree_to_string((node, children))} "f"with {tree_to_string(subtree)}")
parse_tree=maybe_new_inputself.logger.info(f"Current input: {str(parse_tree)}")
did_replace, did_generalize=True, Truebreakifnotdid_replace:
self.logger.info(f"Cannot generalize path {path}, expression {tree_for_log}")
ifchildrenisnotNone:
fori, childinenumerate(children):
child_symbol, _=childifchild_symbolinself.grammar:
paths.append(path+ [i])
self.logger.info("PHASE END: Reduction à la Delta Debugging.")
self.concrete_input=tree_to_string(parse_tree)
returnself.concrete_inputdeftest_input(self, concrete_input):
self.logger.debug(f"Testing {repr(concrete_input)}...")
try:
self.test(concrete_input)
exceptExceptionasexc:
ifissubclass(type(exc), self.expected_exc_type):
self.logger.debug(f"FAIL ({type(exc).__name__})")
returnTrueelse:
self.logger.info(f"UNEXPECTED FAIL ({type(exc).__name__})")
returnFalseelse:
self.logger.debug(f"PASS")
returnFalsedeffind_paths(self, tree, predicate, path=None):
""" Return a list of all paths for which `predicate` holds. `predicate` is a function `predicate`(`path`, `tree`), where `path` denotes a subtree in `tree`. If `predicate()` returns True, `path` is included in the returned list. Taken from the Debugging Book. """ifpathisNone:
path= []
symbol, children=Reducer.get_subtree(tree, path)
ifpredicate(path, (symbol, children)):
return [path]
paths= []
ifchildrenisnotNone:
fori, childinenumerate(children):
child_symbol, _=childifchild_symbolinself.grammar:
paths+=self.find_paths(tree, predicate, path+ [i])
returnpaths@staticmethoddefget_subtree(tree, path):
"""Access a subtree based on `path` (a list of children numbers)"""node, children=treeifnotpath:
returntreereturnReducer.get_subtree(children[path[0]], path[1:])
@staticmethoddefreplace_subtree(tree, replacement_tree, path):
"""Returns a symbolic input with a new tree where replacement_tree has been inserted at `path`"""defrecurse(tree, path):
node, children=treeifnotpath:
returnreplacement_treehead=path[0]
new_children= (children[:head] +
[recurse(children[head], path[1:])] +children[head+1:])
returnnode, new_childrenreturnrecurse(tree, path)
@staticmethoddefdfs(tree, action=print):
node, children=treeaction(tree)
forchildinchildren:
Reducer.dfs(child, action)
@staticmethoddefcount_nodes(parse_tree):
num=0defcount(t):
nonlocalnumnum+=1Reducer.dfs(parse_tree, count)
returnnum
The method read_csv_file reads a CSV file into a custom data structure. If one line has more fields than the header, an IndexError is thrown.
Thanks very much for this super quick reply! I did not know about GrammarReducer, could have saved me some work. Unfortunately, I don't manage to get it to work as expected for my CSV example, the output I get is unreduced and contains nonterminal symbols on top. I just filed a bug report for the fuzzing book: uds-se/fuzzingbook#96
For my work on learning symbolic inputs, I wrote a variant of delta debugging which reduces parse trees based on an existing grammar. It works quite well and reasonably fast so far. I do not know whether it is relevant for the Debugging Book, and the code is quite lengthy and not didactically optimized, but thought I'd share it here anyway.
After the below code implementing the reduction, I'll also post an example processing CSV.
The method
read_csv_file
reads a CSV file into a custom data structure. If one line has more fields than the header, an IndexError is thrown.It can be used as shown below:
The text was updated successfully, but these errors were encountered: