From 7297a5436378775d5d3270109e3f57c400c2c7c9 Mon Sep 17 00:00:00 2001 From: Jackson Owens Date: Tue, 20 Oct 2020 17:07:56 -0400 Subject: [PATCH] Allow specifying a set of errnos to select from. Allow setting a set of errnos rather than just a particular errno or a random one from the entire set. Update the cookbook for random faults to exclude any errnos passed via extra arguments. My objective here is to be able to exclude a specific errno from random injection. The Go runtime gets confused by EAGAINs, which cause it to epoll_wait on the file descriptor. I'd like to exclude EAGAIN from the set of injected errors for my use case. --- cookbook/recipes.py | 24 ++++++++++-------- python_client.py | 4 +-- server.cc | 60 ++++++++++++++++++++++++++------------------- server.thrift | 6 ++--- 4 files changed, 54 insertions(+), 40 deletions(-) diff --git a/cookbook/recipes.py b/cookbook/recipes.py index 4cc5afe..9922f92 100644 --- a/cookbook/recipes.py +++ b/cookbook/recipes.py @@ -36,7 +36,7 @@ def connect(): def main(): - if len(sys.argv) != 2: + if len(sys.argv) < 2: usage() client = connect() @@ -48,31 +48,35 @@ def main(): sys.exit(0) elif sys.argv[1] == "--full": print("Simulating disk full") - client.set_all_fault(False, errno.ENOSPC, 0, "", False, 0, False) + client.set_all_fault(False, [errno.ENOSPC], 0, "", False, 0, False) elif sys.argv[1] == "--io-error": print("Simulating IO error") - client.set_all_fault(False, errno.EIO, 0, "", False, 0, False) + client.set_all_fault(False, [errno.EIO], 0, "", False, 0, False) elif sys.argv[1] == "--quota": print("Simulating quota exceeded") - client.set_all_fault(False, errno.EDQUOT, 0, "", False, 0, False) + client.set_all_fault(False, [errno.EDQUOT], 0, "", False, 0, False) elif sys.argv[1] == "--delay": print("Simulating delayed IO") - client.set_all_fault(False, 0, 0, "", False, 50000, False) + client.set_all_fault(False, [], 0, "", False, 50000, False) elif sys.argv[1] == "--random": + # Use all errnos, minus any specified in trailing arguments. + errnos_selected = {code: name for code, name in + errno.errorcode.items() if name not in sys.argv[1:]} print("Simulating random errno") - client.set_all_fault(True, 0, 0, "", False, 0, False) + print("Using errnos: " + ", ".join(errnos_selected.values())) + client.set_all_fault(True, errnos_selected.keys(), 0, "", False, 0, False) elif sys.argv[1] == "--specific-syscalls": print("Restricting random IO restricted to specific syscalls") - client.set_fault(['read', 'read_buf', 'write', 'write_buf'], True, 0, 0, "", False, 0, False) + client.set_fault(['read', 'read_buf', 'write', 'write_buf'], True, [], 0, "", False, 0, False) elif sys.argv[1] == "--probability": print("Restricting random IO restricted to specific syscalls and 1% error probability") - client.set_fault(['read', 'read_buf', 'write', 'write_buf'], True, 0, 1000, "", False, 0, False) + client.set_fault(['read', 'read_buf', 'write', 'write_buf'], True, [], 1000, "", False, 0, False) elif sys.argv[1] == "--file-pattern": print("Restricting random IO restricted to specific syscalls while cursing *.sendmail.cf") - client.set_fault(['read', 'read_buf', 'write', 'write_buf'], True, 0, 0, ".*sendmail.cf", False, 0, False) + client.set_fault(['read', 'read_buf', 'write', 'write_buf'], True, [], 0, ".*sendmail.cf", False, 0, False) elif sys.argv[1] == "--broken-drive": print("The agonising drive simulator") - client.set_all_fault(False, errno.EIO, 100, "", False, 100000, False) + client.set_all_fault(False, [errno.EIO], 100, "", False, 100000, False) else: usage() diff --git a/python_client.py b/python_client.py index 8d32281..28689c9 100644 --- a/python_client.py +++ b/python_client.py @@ -19,8 +19,8 @@ print(client.get_methods()) - # client.set_fault(['flush', 'fsync', 'fsyncdir'], False, 0, 100000, "", True, 500000) - client.set_fault(['flush', 'fsync', 'fsyncdir'], False, 0, 99000, "", True, 500000) + # client.set_fault(['flush', 'fsync', 'fsyncdir'], False, [], 100000, "", True, 500000) + client.set_fault(['flush', 'fsync', 'fsyncdir'], False, [], 99000, "", True, 500000) # client.clear_all_faults() except Thrift.TException as tx: diff --git a/server.cc b/server.cc index 69fb209..e735741 100644 --- a/server.cc +++ b/server.cc @@ -42,19 +42,28 @@ using namespace ::apache::thrift::server; using std::shared_ptr; struct fault_descriptor { - bool random; // error code must be randomized - int err_no; // error code to return - int32_t probability; // 0 < probability < 100 - std::string regexp; // regular expression on filename - bool kill_caller; // Must we kill the caller - int32_t delay_us; // operation delay in us - bool auto_delay; // must auto delay like an SSD + bool random; // error code must be randomized + std::vector err_nos; // error code(s) to select from + int32_t probability; // 0 < probability < 100 + std::string regexp; // regular expression on filename + bool kill_caller; // Must we kill the caller + int32_t delay_us; // operation delay in us + bool auto_delay; // must auto delay like an SSD }; +std::vector default_errnos; std::set valid_methods; std::map fault_map; std::mutex mutex; +void init_default_errnos() +{ + int32_t errno; + for (errno = E2BIG; errno < EXFULL; errno++) { + default_errnos.push_back(errno); + } +} + void init_valid_methods() { valid_methods.insert("getattr"); @@ -104,13 +113,16 @@ static bool is_valid_method(std::string method) } // return a random err_no -static int random_err_no() +static int random_err_no(std::vector err_nos) { - std::random_device rd; - std::uniform_int_distribution dist(E2BIG, EXFULL); + if (err_nos.empty() == 0) { + err_nos = default_errnos; + } - return dist(rd); -} + std::random_device rd; + std::uniform_int_distribution dist(0, err_nos.size()); + return int(err_nos[dist(rd)]); +} // return true if random number is not in the probability static bool get_lucky(int probability) @@ -125,7 +137,7 @@ static bool get_lucky(int probability) if (dist(rd) > probability) { return true; } - + return false; } @@ -139,16 +151,13 @@ int error_inject(volatile int in_flight, std::string path, std::string method) return 0; } - // get the fault injection descritor + // get the fault injection descriptor auto descr = fault_map[method]; - int err_no = 0; - // get the err_no to inject - if (descr.err_no) { - err_no = descr.err_no; - } else if (descr.random) { - err_no = random_err_no(); + int err_no = 0; + if (descr.random || !descr.err_nos.empty()) { + err_no = random_err_no(descr.err_nos); } if (descr.regexp.size()) { @@ -208,14 +217,14 @@ class server_handler: public serverIf { } void set_fault(const std::vector& methods, const bool random, - const int32_t err_no, const int32_t probability, + const std::vector & err_nos, const int32_t probability, const std::string& regexp, const bool kill_caller, int32_t delay_us, const bool auto_delay) { struct fault_descriptor descr; descr.random = random; - descr.err_no = err_no; + descr.err_nos = err_nos; descr.probability = probability; descr.regexp = regexp; descr.kill_caller = kill_caller; @@ -230,7 +239,7 @@ class server_handler: public serverIf { } } - void set_all_fault(const bool random, const int32_t err_no, + void set_all_fault(const bool random, const std::vector & err_nos, const int32_t probability, const std::string& regexp, const bool kill_caller, const int32_t delay_us, const bool auto_delay) @@ -240,8 +249,8 @@ class server_handler: public serverIf { for (auto method: valid_methods) { methods.push_back(method); } - - set_fault(methods, random, err_no, probability, + + set_fault(methods, random, err_nos, probability, regexp, kill_caller, delay_us, auto_delay); } @@ -253,6 +262,7 @@ void server_thread() int port = 9090; init_valid_methods(); + init_default_errnos(); std::cout << "Server Thread started" << std::endl; try { diff --git a/server.thrift b/server.thrift index 8d52986..0fa249d 100644 --- a/server.thrift +++ b/server.thrift @@ -7,7 +7,7 @@ service server { // Used to get the list of availables systems calls - list get_methods(), + list get_methods(), // Used to clear all faults sources void clear_all_faults(), @@ -18,7 +18,7 @@ service server { // Set fault on a specific list of methods void set_fault(list methods, // the list of methods to operate on bool random, // Must we return random errno - i32 err_no, // A specific errno to return + list err_nos, // A list of specific errnos to select from i32 probability, // Fault probability over 100 000 string regexp, // A regexp matching a victim file bool kill_caller, // Kill -9 the caller process @@ -27,7 +27,7 @@ service server { // Works like set_fault but applies the fault to all methods void set_all_fault(bool random, - i32 err_no, + list err_nos, i32 probability, string regexp, bool kill_caller,