diff --git a/.github/workflows/push-rockspec.yml b/.github/workflows/push-rockspec.yml index 42aa673..f3340cc 100644 --- a/.github/workflows/push-rockspec.yml +++ b/.github/workflows/push-rockspec.yml @@ -10,9 +10,9 @@ env: jobs: pack-and-push-tagged-rockspec: - runs-on: ubuntu-latest - if: startsWith(github.ref, 'refs/tags') - steps: + runs-on: ubuntu-latest + if: startsWith(github.ref, 'refs/tags') + steps: - uses: actions/checkout@master - uses: tarantool/setup-tarantool@v1 with: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..094017c --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,61 @@ +name: Run unit tests + +on: + push: + +env: + ROCK_NAME: config + +jobs: + run-luacheck: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + - uses: tarantool/setup-tarantool@v2 + with: + tarantool-version: '2.10.7' + - name: install luacheck 0.26.0 + run: tarantoolctl rocks install luacheck 0.26.0 + - name: run luacheck + run: .rocks/bin/luacheck . + run-unit-tests: + runs-on: ubuntu-latest + strategy: + matrix: + version: ["1.10.15", "2.8.4", "2.10.6", "2.10.7-gc64-amd64", "2.11.0", "2.11.1"] + steps: + - uses: actions/checkout@master + - uses: docker/setup-buildx-action@v2 + - name: run test suite for ${{matrix.version}} + run: make test-${{matrix.version}} + - name: rename luacov.stats.out + run: mv luacov.stats.out luacov.stats.out-${{matrix.version}} + - uses: actions/upload-artifact@master + with: + name: luacov.stats.out + path: luacov.stats.out-${{matrix.version}} + run-coverage-report: + runs-on: ubuntu-latest + needs: ["run-unit-tests"] + steps: + - uses: actions/checkout@master + - uses: tarantool/setup-tarantool@v2 + with: + tarantool-version: '2.10.7' + - name: install luacov-coveralls 0.2.3 + run: tarantoolctl rocks install --server=https://luarocks.org luacov-coveralls 0.2.3 + - name: install luacov-console 1.2.0 + run: tarantoolctl rocks --server http://moonlibs.github.io/rocks install luacov-console 1.2.0 + - uses: actions/download-artifact@master + with: + name: luacov.stats.out + - name: debug + run: ls -la . + - name: merge luacov.stats.out + run: cat luacov.stats.out-* | >luacov.stats.out tarantool -e 'm={} for k in io.lines() do local vs=io.read():split(" ") vs[#vs]=nil local r = m[k] if r then for i, v in pairs(vs) do r[i]=r[i]+v end else m[k]=vs end end; for k, v in pairs(m) do print(k) print(table.concat(v, " ")) end' + - name: prepare coverage report + run: .rocks/bin/luacov-console . && .rocks/bin/luacov-console -s + - name: publish coveralls report + env: + COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} + run: .rocks/bin/luacov-coveralls -v diff --git a/.luacov b/.luacov new file mode 100644 index 0000000..130ed1c --- /dev/null +++ b/.luacov @@ -0,0 +1,23 @@ +runreport = false +deletestats = false + +exclude = { + "spec/", + "test/", + "test_peek", + "%.rocks/", + "builtin/", +} + +pathcorrect = { + { "^/source/config/", "" }, +} + +coveralls = { + root = "/", + debug = true, + pathcorrect = { + { "^/home/runner/work/config/config/", "" }, + { "^/source/config", "" }, + }, +} \ No newline at end of file diff --git a/Dockerfile.build b/Dockerfile.build new file mode 100644 index 0000000..ca03b26 --- /dev/null +++ b/Dockerfile.build @@ -0,0 +1,6 @@ +FROM tarantool/tarantool:2.10 as builder +RUN apk add -u git cmake make gcc musl-dev curl wget +WORKDIR /root +RUN tarantoolctl rocks install luatest scm-1 +RUN tarantoolctl rocks install luacov-console 1.1.0 +RUN tarantoolctl rocks --server https://moonlibs.github.io/rocks install package-reload scm-1 diff --git a/Dockerfile.test b/Dockerfile.test index adfd643..a19e475 100644 --- a/Dockerfile.test +++ b/Dockerfile.test @@ -1,5 +1,8 @@ -FROM tarantool/tarantool:2.10 +ARG IMAGE=1.10.14 +FROM config-test-builder as builder -RUN apk add -u git cmake make gcc musl-dev -RUN tarantoolctl rocks install luatest 0.5.7 -RUN tarantoolctl rocks install luacov-console +FROM tarantool/tarantool:${IMAGE} + +WORKDIR /root +COPY --from=builder /root/.rocks /root/.rocks +WORKDIR /opt/tarantool diff --git a/Makefile b/Makefile index 778c58e..f4de20f 100644 --- a/Makefile +++ b/Makefile @@ -1,18 +1,21 @@ .PHONY := all test -run-compose: - make -C test run-compose +run-etcd: + make -C test run-compose-etcd -build-testing-image: - docker build -t config-test -f Dockerfile.test . +config-test-builder: + docker build -t config-test-builder -f Dockerfile.build . -test: build-testing-image run-compose - docker run --name config-test \ +config-test-%: config-test-builder run-etcd + docker build -t $(@) --build-arg IMAGE=$(subst config-test-,,$@) -f Dockerfile.test . + +test-%: config-test-% + docker run --name $(<) \ --net tt_net \ -e TT_ETCD_ENDPOINTS="http://etcd0:2379,http://etcd1:2379,http://etcd2:2379" \ --rm -v $$(pwd):/source/config \ -v $$(pwd)/data:/tmp/ \ --workdir /source/config \ --entrypoint '' \ - config-test \ + $(<) \ ./run_test_in_docker.sh diff --git a/config.lua b/config.lua index 7f68529..5ffb25c 100644 --- a/config.lua +++ b/config.lua @@ -1,12 +1,20 @@ +---@diagnostic disable: inject-field local log = require 'log' +if log.new then + log = log.new('moonlibs.config') +end local fio = require 'fio' local json = require 'json'.new() local yaml = require 'yaml'.new() local digest = require 'digest' local fiber = require 'fiber' +local clock = require 'clock' json.cfg{ encode_invalid_as_nil = true } yaml.cfg{ encode_use_tostring = true } +---Retrieves all upvalues of given function and returns them as kv-map +---@param fun fun() +---@return table variables local function lookaround(fun) local vars = {} local i = 1 @@ -16,14 +24,26 @@ local function lookaround(fun) vars[n] = v i = i + 1 end - i = 1 - return vars, i - 1 + return vars end +---@private +---@class moonlibs.config.reflect_internals +---@field dynamic_cfg table +---@field default_cfg table +---@field upgrade_cfg? fun(cfg: table, translate_cfg: table): table +---@field template_cfg? table +---@field translate_cfg? table +---@field log? table + + +---Unwraps box.cfg and retrieves dynamic_cfg, default_cfg tables +---@return moonlibs.config.reflect_internals local function reflect_internals() local peek = { dynamic_cfg = {}; + default_cfg = {}; upgrade_cfg = true; translate_cfg = true; template_cfg = true; @@ -46,7 +66,12 @@ local function reflect_internals() error(string.format("Neither function nor callable argument %s after steps: %s", peekf, table.concat(steps, ", "))) end - local vars, _ = lookaround(peekf) + local vars = lookaround(peekf) + if type(vars.default_cfg) == 'table' then + for k in pairs(vars.default_cfg) do + peek.default_cfg[k] = vars.default_cfg[k] + end + end if allow_unwrap and (vars.orig_cfg or vars.origin_cfg) then -- It's a wrap of tarantoolctl/tt, unwrap and repeat peekf = (vars.orig_cfg or vars.origin_cfg) @@ -96,6 +121,19 @@ local function reflect_internals() end end break; + elseif vars.reload_cfg then + table.insert(steps,"reload_cfg") + peekf = vars.reload_cfg + elseif vars.reconfig_modules then + table.insert(steps,"reconfig_modules") + for k in pairs(peek) do + if peek[k] == true then + if vars[k] ~= nil then + peek[k] = vars[k] + end + end + end + peekf = vars.reconfig_modules else for k,v in pairs(vars) do log.info("var %s=%s",k,v) end error(string.format("Bad vars for %s after steps: %s", peekf, table.concat(steps, ", "))) @@ -110,7 +148,11 @@ end local load_cfg = reflect_internals() --- TODO: suppress deprecation +---Filters only valid keys from given cfg +--- +---Edits given cfg and returns only clear config +---@param cfg table +---@return table local function prepare_box_cfg(cfg) -- 1. take config, if have upgrade, upgrade it if load_cfg.upgrade_cfg then @@ -297,6 +339,7 @@ local function toboolean(v) return false end +---@type table local master_selection_policies; master_selection_policies = { ['etcd.instance.single'] = function(M, instance_name, common_cfg, instance_cfg, cluster_cfg, local_cfg) @@ -359,8 +402,10 @@ master_selection_policies = { if cluster_cfg.master == instance_name then log.info("Instance is declared as cluster master, set read_only=false") cfg.box.read_only = false - cfg.box.replication_connect_quorum = 1 - cfg.box.replication_connect_timeout = 1 + if cfg.box.bootstrap_strategy ~= 'auto' then + cfg.box.replication_connect_quorum = 1 + cfg.box.replication_connect_timeout = 1 + end else log.info("Cluster has another master %s, not me %s, set read_only=true", cluster_cfg.master, instance_name) cfg.box.read_only = true @@ -458,6 +503,17 @@ local function gen_cluster_uuid(cluster_name) error("Can't generate uuid for cluster "..cluster_name, 2) end +---@class moonlibs.config.opts.etcd:moonlibs.config.etcd.opts +---@field instance_name string Mandatory name of the instance +---@field prefix string Mandatory prefix inside etcd tree +---@field uuid? 'auto' When auto config generates replicaset_uuid and instance_uuid for nodes +---@field fixed? table Optional ETCD tree + +---Loads configuration from etcd and evaluate master_selection_policy +---@param M moonlibs.config +---@param etcd_conf moonlibs.config.opts.etcd +---@param local_cfg table +---@return table local function etcd_load( M, etcd_conf, local_cfg ) local etcd @@ -538,19 +594,13 @@ local function etcd_load( M, etcd_conf, local_cfg ) print("Loaded config from etcd",yaml.encode(all_cfg)) end local common_cfg = all_cfg.common - -- local common_cfg = etcd:get_common() local all_instances_cfg = all_cfg.instances - -- local all_instances_cfg = etcd:get_instances() local instance_cfg = all_instances_cfg[instance_name] assert(instance_cfg,"Instance name "..instance_name.." is not known to etcd") - -- local all_clusters_cfg = etcd:get_clusters() local all_clusters_cfg = all_cfg.clusters or all_cfg.shards - -- print(yaml.encode(all_clusters_cfg)) - - local master_selection_policy local cluster_cfg if instance_cfg.cluster or local_cfg.cluster then @@ -614,13 +664,14 @@ local function etcd_load( M, etcd_conf, local_cfg ) "Start instance "..cfg.box.listen, " with replication:"..table.concat(cfg.box.replication,", "), string.format("timeout: %s, quorum: %s, lag: %s", - cfg.box.replication_connect_timeout or 'def:30', + cfg.box.replication_connect_timeout + or ('def:%s'):format(load_cfg.default_cfg.replication_connect_timeout or 30), cfg.box.replication_connect_quorum or 'def:full', - cfg.box.replication_sync_lag or 'def:10' + cfg.box.replication_sync_lag + or ('def:%s'):format(load_cfg.default_cfg.replication_sync_lag or 10) ) ) end - --end end -- print(yaml.encode(cfg)) @@ -661,9 +712,58 @@ local function optimal_rcq(upstreams) return rcq end +local function do_cfg(boxcfg, cfg) + for key, val in pairs(cfg) do + if load_cfg.default_cfg[key] == nil and load_cfg.dynamic_cfg[key] == nil then + local warn = string.format("Dropping non-boxcfg option '%s' given '%s'",key,val) + log.warn("%s",warn) + print(warn) + cfg[key] = nil + end + end + log.info("Just before box.cfg %s", yaml.encode(cfg)) + boxcfg(cfg) +end + + +---@class moonlibs.config.opts +---@field bypass_non_dynamic? boolean (default: true) drops every changed non-dynamic option on reconfiguration +---@field tidy_load? boolean (default: true) recoveries tarantool with read_only=true +---@field mkdir? boolean (default: false) should moonlibs/config create memtx_dir and wal_dir +---@field etcd? moonlibs.config.opts.etcd [legacy] configuration of etcd +---@field default_replication_connect_timeout? number (default: 1.1) default RCT in seconds +---@field default_election_mode? election_mode (default: candidate) option is respected only when etcd.cluster.raft is used +---@field default_synchro_quorum? string|number (default: 'N/2+1') option is respected only when etcd.cluster.raft is used +---@field default_read_only? boolean (default: false) option is respected only when etcd.instance.read_only is used (deprecated) +---@field master_selection_policy? 'etcd.cluster.master'|'etcd.cluster.vshard'|'etcd.cluster.raft'|'etcd.instance.single' master selection policy +---@field strict_mode? boolean (default: false) stricts config retrievals. if key is not found config.get will raise an exception +---@field strict? boolean (default: false) stricts config retrievals. if key is not found config.get will raise an exception +---@field default? table (default: nil) globally default options for config.get +---@field on_load? fun(conf: moonlibs.config, cfg: table) callback which is called every time config is loaded from file and ETCD +---@field load? fun(conf: moonlibs.config, cfg: table): table do not use this callback +---@field on_before_cfg? fun(conf: moonlibs.config, cfg: table) callback is called right before running box.cfg (but after on_load) +---@field boxcfg? fun(cfg: table) [legacy] when provided this function will be called instead box.cfg. tidy_load and everything else will not be used. +---@field wrap_box_cfg? fun(cfg: table) callback is called instead box.cfg. But tidy_load is respected. Use this, if you need to proxy every option to box.cfg on application side +---@field on_after_cfg? fun(conf: moonlibs.config, cfg: table) callback which is called after full tarantool configuration + +---@class moonlibs.config: moonlibs.config.opts +---@field etcd moonlibs.config.etcd +---@field public _load_cfg table +---@field public _flat table +---@field public _fencing_f? Fiber +---@field public _enforced_ro? boolean +---@operator call(moonlibs.config.opts): moonlibs.config + +---@type moonlibs.config local M M = setmetatable({ console = {}; + ---Retrieves value from config + ---@overload fun(k: string, def: any?): any? + ---@param self moonlibs.config + ---@param k string path inside config + ---@param def? any optional default value + ---@return any? get = function(self,k,def) if self ~= M then def = k @@ -680,8 +780,22 @@ local M return end end - end + end, + enforce_ro = function() + if not M._ro_enforcable then + return false, 'cannot enforce readonly' + end + M._enforced_ro = true + return true, { + info_ro = box.info.ro, + cfg_ro = box.cfg.read_only, + enforce_ro = M._enforced_ro, + } + end, },{ + ---Reinitiates moonlibs.config + ---@param args moonlibs.config.opts + ---@return moonlibs.config __call = function(_, args) -- args MUST belong to us, because of modification local file @@ -707,6 +821,7 @@ local M M.master_selection_policy = args.master_selection_policy M.default = args.default M.strict_mode = args.strict_mode or args.strict or false + M._load_cfg = load_cfg -- print("config", "loading ",file, json.encode(args)) if not file then file = get_opt() @@ -774,8 +889,12 @@ local M -- subject to change, just a PoC local etcd_conf = args.etcd or cfg.etcd + -- we can enforce ro during recovery only if we have etcd config + M._ro_enforcable = M._ro_enforcable and etcd_conf ~= nil if etcd_conf then + local s = clock.time() cfg = etcd_load(M, etcd_conf, cfg) + log.info("etcd_load took %.3fs", clock.time()-s) end if args.load then @@ -786,6 +905,10 @@ local M error("No box.* config given", 2) end + if cfg.box.remote_addr then + cfg.box.remote_addr = nil + end + if args.bypass_non_dynamic then cfg.box = prepare_box_cfg(cfg.box) end @@ -796,10 +919,6 @@ local M cfg.sys.boxcfg = nil cfg.sys.on_load = nil - -- if not cfg.box.custom_proc_title and args.instance_name then - -- cfg.box.custom_proc_title = args.instance_name - -- end - -- latest modifications and fixups if args.on_load then args.on_load(M,cfg) @@ -807,7 +926,10 @@ local M return cfg end - local cfg = load_config() + -- We cannot enforce ro if any of theese conditions not satisfied + -- Tarantool must be bootstraping with tidy_load and do not overwraps personal boxcfg + M._ro_enforcable = args.boxcfg == nil and args.tidy_load and type(box.cfg) == 'function' + local cfg = load_config() --[[@as table]] M._flat = flatten(cfg) @@ -836,16 +958,10 @@ local M end end - if cfg.box.remote_addr then - cfg.box.remote_addr = nil - end - - - -- print(string.format("Starting app: %s", yaml.encode(cfg.box))) - local boxcfg + local boxcfg = box.cfg if args.boxcfg then - args.boxcfg( cfg.box ) + do_cfg(args.boxcfg, cfg.box) else if args.wrap_box_cfg then boxcfg = args.wrap_box_cfg @@ -861,7 +977,7 @@ local M snap_dir = "." end end - local bootstrapped = false + local bootstrapped for _,v in pairs(fio.glob(snap_dir..'/*.snap')) do bootstrapped = v end @@ -870,13 +986,15 @@ local M print("Have etcd, use tidy load") local ro = cfg.box.read_only cfg.box.read_only = true - if not ro then - -- Only if node should be master - cfg.box.replication_connect_quorum = 1 - cfg.box.replication_connect_timeout = M.default_replication_connect_timeout - elseif not cfg.box.replication_connect_quorum then - -- For replica tune up to N/2+1 - cfg.box.replication_connect_quorum = optimal_rcq(cfg.box.replication) + if cfg.box.bootstrap_strategy ~= 'auto' then + if not ro then + -- Only if node should be master + cfg.box.replication_connect_quorum = 1 + cfg.box.replication_connect_timeout = M.default_replication_connect_timeout + elseif not cfg.box.replication_connect_quorum then + -- For replica tune up to N/2+1 + cfg.box.replication_connect_quorum = optimal_rcq(cfg.box.replication) + end end log.info("Start tidy loading with ro=true%s rcq=%s rct=%s (snap=%s)", ro ~= true and string.format(' (would be %s)',ro) or '', @@ -884,21 +1002,41 @@ local M bootstrapped ) else - if not cfg.box.replication_connect_quorum then - cfg.box.replication_connect_quorum = optimal_rcq(cfg.box.replication) + -- not bootstraped yet cluster + + -- if cfg.box.bootstrap_strategy == 'auto' then -- ≥ Tarantool 2.11 + -- local ro = cfg.box.read_only + -- local is_candidate = cfg.box.election_mode == 'candidate' + -- if not ro and not is_candidate then + -- -- master but not Raft/candidate + -- -- we decrease replication for master, + -- -- to allow him bootstrap himself + -- cfg.box.replication = {cfg.box.remote_addr or cfg.box.listen} + -- end + if cfg.box.bootstrap_strategy ~= 'auto' then -- < Tarantool 2.11 + if cfg.box.replication_connect_quorum == nil then + cfg.box.replication_connect_quorum = optimal_rcq(cfg.box.replication) + end end + log.info("Start non-bootstrapped tidy loading with ro=%s rcq=%s rct=%s (dir=%s)", - cfg.box.read_only, cfg.box.replication_connect_quorum, cfg.box.replication_connect_timeout, snap_dir) + cfg.box.read_only, cfg.box.replication_connect_quorum, + cfg.box.replication_connect_timeout, snap_dir + ) end end - log.info("Just before box.cfg %s", yaml.encode( cfg.box )) - - ;(boxcfg or box.cfg)( cfg.box ) + do_cfg(boxcfg, cfg.box) log.info("Reloading config after start") local new_cfg = load_config() + if M._enforced_ro then + log.info("Enforcing RO (should be ro=%s) because told to", new_cfg.box.read_only) + new_cfg.box.read_only = true + end + M._enforced_ro = nil + M._ro_enforcable = false local diff_box = value_diff(cfg.box, new_cfg.box) -- since load_config loads config also for reloading it removes non-dynamic options @@ -916,14 +1054,14 @@ local M if diff_box then log.info("Reconfigure after load with %s",require'json'.encode(diff_box)) - ;(boxcfg or box.cfg)(diff_box) + do_cfg(boxcfg, diff_box) else log.info("Config is actual after load") end M._flat = flatten(new_cfg) else - (boxcfg or box.cfg)( cfg.box ) + do_cfg(boxcfg, cfg.box) end else local replication = cfg.box.replication_source or cfg.box.replication @@ -935,12 +1073,12 @@ local M cfg.box.replication = nil cfg.box.replication_source = nil - (boxcfg or box.cfg)( cfg.box ) + do_cfg(boxcfg, cfg.box) cfg.box.replication = r cfg.box.replication_source = rs else - (boxcfg or box.cfg)( cfg.box ) + do_cfg(boxcfg, cfg.box) end end end @@ -953,7 +1091,7 @@ local M local msp = config.get('sys.master_selection_policy') if type(cfg.etcd) == 'table' and config.get('etcd.fencing_enabled') - and msp == 'etcd.cluster.master' + and (msp == 'etcd.cluster.master' or msp == 'etcd.cluster.vshard') and type(cfg.cluster) == 'string' and cfg.cluster ~= '' and config.get('etcd.reduce_listing_quorum') ~= true then @@ -982,9 +1120,9 @@ local M local etcd_cluster, watch_index - local function refresh_list() + local function refresh_list(opts) local s = fiber.time() - local result, resp = config.etcd:list(watch_path) + local result, resp = config.etcd:list(watch_path, opts) local elapsed = fiber.time()-s log.verbose("[fencing] list(%s) => %s in %.3fs %s", @@ -1005,6 +1143,7 @@ local M local function fencing_check(deadline) -- we can only allow half of the time till deadline local timeout = math.min((deadline-fiber.time())*0.5, fencing_pause) + log.verbose("[wait] timeout:%.3fs FP:%.3fs", timeout, fencing_pause) local check_started = fiber.time() local pcall_ok, err_or_resolution, new_cluster = pcall(function() @@ -1049,6 +1188,7 @@ local M end end) + log.verbose("[wait] took:%.3fs exp:%.3fs", fiber.time()-check_started, timeout) if not in_my_gen() then return end if not pcall_ok then @@ -1060,8 +1200,10 @@ local M end if not new_cluster then + local list_started = fiber.time() + log.verbose("[listing] left:%.3fs", deadline-fiber.time()) repeat - local ok, e_cluster = pcall(refresh_list) + local ok, e_cluster = pcall(refresh_list, {deadline = deadline}) if ok and e_cluster then new_cluster = e_cluster break @@ -1072,6 +1214,8 @@ local M local sleep = math.min(fencing_pause, 0.5*(deadline - fiber.time())) fiber.sleep(sleep) until fiber.time() > deadline + log.verbose("[list] took:%.3fs left:%.3fs", + fiber.time()-list_started, deadline-fiber.time()) end if not in_my_gen() then return end @@ -1081,7 +1225,8 @@ local M watch_path, fiber.time()-check_started, new_cluster) if not fencing_check_replication then - return false + -- ETCD is down, we do not know what is happening + return nil end -- In proper fencing we must step down immediately as soon as we discover @@ -1101,7 +1246,7 @@ local M ru.upstream.peer, ru.upstream.status, ru.upstream.message, ru.upstream.idle, ru.upstream.lag ) - return false + return nil end end end @@ -1113,16 +1258,19 @@ local M return true elseif new_cluster.switchover then -- new_cluster.master ~= my_name -- Another instance is the leader in ETCD. But we could be the one - -- who will be the next (cluster is under switching right now). + -- who is going to be the next (cluster is under switching right now). -- It is almost impossible to get this path in production. But the only one -- protection we have is `fencing_pause` and `fencing_timeout`. -- So, we will do nothing until ETCD mutex is present log.warn('[fencing] It seems that cluster is under switchover right now %s', json.encode(new_cluster)) - -- (if we are ro -- then we must end the loop) - -- (if we are rw -- then we must continue the loop) - return not box.info.ro + -- Note: this node was rw (otherwise we would not execute fencing_check at all) + -- During normal switch registered leader is RO (because we are RW, and we are not the leader) + -- And in the next step coordinator will update leader info in ETCD. + -- so this condition seems to be unreachable for every node + return nil else log.warn('[fencing] ETCD %s/master is %s not us. Stepping down', watch_path, new_cluster.master) + -- ETCD is up, master is not us => we must step down immediately return false end end @@ -1156,22 +1304,22 @@ local M fiber.sleep(math.random(math.max(0.5, fencing_pause-0.5), fencing_pause+0.5)) end - log.info("etcd_cluster is %s (index: %s)", json.encode(etcd_cluster), watch_index) - if not in_my_gen() then return end - -- we yield to get next ev_run before get fiber.time() fiber.sleep(0) + if not in_my_gen() then return end + log.info("etcd_cluster is %s (index: %s)", json.encode(etcd_cluster), watch_index) + -- we will not step down until deadline. local deadline = fiber.time()+fencing_timeout repeat -- Before ETCD check we better pause -- we do a little bit randomized sleep to not spam ETCD - fiber.sleep( - math.random(0, - 0.1*math.min(deadline-fiber.time(),fencing_timeout-fencing_pause) - ) - ) + local hard_limit = deadline-fiber.time() + local soft_limit = fencing_timeout-fencing_pause + local rand_sleep = math.random()*0.1*math.min(hard_limit, soft_limit) + log.verbose("[sleep] hard:%.3fs soft:%.3fs sleep:%.3fs", hard_limit, soft_limit, rand_sleep) + fiber.sleep(rand_sleep) -- After each yield we have to check that we are still in our generation if not in_my_gen() then return end @@ -1181,18 +1329,25 @@ local M -- fencing_check(deadline) if it returns true, -- then we update leadership leasing - if fencing_check(deadline) then + local verdict = fencing_check(deadline) + log.verbose("[verdict:%s] Leasing ft:%.3fs up:%.3fs left:%.3fs", + verdict == true and "ok" + or verdict == false and "step" + or "unknown", + fencing_timeout, + verdict and (fiber.time()+fencing_timeout-deadline) or 0, + deadline - fiber.time() + ) + if verdict == false then + -- immediate stepdown + break + elseif verdict then -- update deadline. if deadline <= fiber.time() then log.warn("[fencing] deadline was overflowed deadline:%s, now:%s", deadline, fiber.time() ) end - log.verbose("[fencing] Leasing ft:%.3fs up:%.3fs left:%.3fs", - fencing_timeout, - fiber.time()+fencing_timeout-deadline, - deadline - fiber.time() - ) deadline = fiber.time()+fencing_timeout end if not in_my_gen() then return end diff --git a/config/etcd.lua b/config/etcd.lua index aa1eec5..6eac97d 100644 --- a/config/etcd.lua +++ b/config/etcd.lua @@ -1,9 +1,34 @@ local json = require 'json' local log = require 'log' +if log.new then + log = log.new('moonlibs.config') +end +local fiber = require 'fiber' +local clock = require 'clock' local http_client = require 'http.client' local digest = require 'digest' +---@class moonlibs.config.etcd.opts +---@field endpoints? string[] (default: {'http://127.0.0.1:4001','http://127.0.0.1:2379'}) list of clientURLs to etcd +---@field timeout? number (default: 1) timeout of request to each node to etcd +---@field boolean_auto? boolean (default: false) when true each string value `true`, `false` is converted to boolean value +---@field print_config? boolean (default: false) when true loaded configuration from etcd is printed out +---@field discover_endpoints? boolean (default: true) when false connector does not automatically discovers etcd endpoints +---@field reduce_listing_quorum? boolean (default: false) when true connector does not request etcd:list with quorum +---@field login? string allows to specify username for each request (Basic-auth) +---@field password? string allows to specify password for each request (Basic-auth) + +---@class moonlibs.config.etcd +---@field endpoints string[] (default: {'http://127.0.0.1:4001','http://127.0.0.1:2379'}) list of clientURLs to etcd +---@field client http +---@field timeout number (default: 1) timeout of request to each node to etcd +---@field boolean_auto? boolean (default: false) when true each string value `true`, `false` is converted to boolean value +---@field print_config? boolean (default: false) when true loaded configuration from etcd is printed out +---@field discover_endpoints boolean (default: true) when false connector does not automatically discovers etcd endpoints +---@field reduce_listing_quorum? boolean (default: false) when true connector does not request etcd:list with quorum +---@field authorization? string Authorization header for Basic-auth (is set only when login is present) +---@field headers? table headers which are provided on each request local M = {} M.err = {} @@ -28,6 +53,10 @@ function M.errstr(code) return M.err[ tonumber(code) ] or string.format("Unknown error %s",code) end +---Creates new etcd connector +---@param mod moonlibs.config.etcd +---@param options moonlibs.config.etcd.opts +---@return moonlibs.config.etcd function M.new(mod,options) local self = setmetatable({},{__index=mod}) self.endpoints = options.endpoints or {'http://127.0.0.1:4001','http://127.0.0.1:2379'} @@ -49,7 +78,10 @@ end setmetatable(M,{ __call = M.new }) +---Discovers every ETCD endpoint by requesting clientURLs (/v2/members) +--- ?: make it parallel function M:discovery() + local start_at = clock.time() local timeout = self.timeout or 1 local new_endpoints = {} local tried = {} @@ -82,18 +114,28 @@ function M:discovery() end end if #new_endpoints == 0 then - error("Failed to discover members "..table.concat(tried,", "),2) + error("Failed to discover members "..table.concat(tried,", ")..(" in %.3fs"):format(clock.time()-start_at),2) end if self.discover_endpoints then self.endpoints = new_endpoints table.insert(self.endpoints,table.remove(self.endpoints,1)) - log.info("discovered etcd endpoints "..table.concat(self.endpoints,", ")) + log.info("discovered etcd endpoints "..table.concat(self.endpoints,", ")..(" in %.3fs"):format(clock.time()-start_at)) else - log.info("hardcoded etcd endpoints "..table.concat(self.endpoints,", ")) + log.info("hardcoded etcd endpoints "..table.concat(self.endpoints,", ")..(" in %.3fs"):format(clock.time()-start_at)) end self.current = math.random(#self.endpoints) end +---@class moonlibs.etcd.request.opts +---@field deadline? number deadline of request (in seconds, fractional) +---@field timeout? number timeout of request to each node (in seconds, fractional) +---@field body? string request body (for PUT) + +---Performs etcd request +---@param method 'PUT'|'GET'|'DELETE'|'HEAD' http_method +---@param path string etcd path after /v2/ +---@param args? moonlibs.etcd.request.opts +---@return table, HTTPResponse function M:request(method, path, args ) -- path must be prefixed outside -- TODO: auth @@ -105,11 +147,14 @@ function M:request(method, path, args ) table.insert(query, '=') table.insert(query, tostring(v)) end + else + args = {} end local qs if #query > 0 then qs = '?'..table.concat(query) else qs = '' end local body = args and args.body or '' local lasterror, lastresponse + local deadline = args.deadline local len = #self.endpoints for i = 0, len - 1 do @@ -119,9 +164,24 @@ function M:request(method, path, args ) end local uri = string.format("%s/v2/%s%s", self.endpoints[cur], path, qs ) -- print("[debug] "..uri) - local x = self.client.request(method,uri,body,{timeout = args.timeout or self.timeout or 1; headers = self.headers}) + local request_timeout = args.timeout or self.timeout or 1 + if deadline then + request_timeout = math.min(deadline-fiber.time(), request_timeout) + end + local s = clock.time() + local x = self.client.request(method,uri,body,{timeout = request_timeout; headers = self.headers}) lastresponse = x local status,reply = pcall(json.decode,x and x.body) + local logger = log.verbose + if x.status >= 500 then + logger = log.error + end + logger("%s %s (to:%.3fs) finished with %s%s %s (in %.3fs)", + method, uri, request_timeout, x.status, + status and reply and reply.errorCode and (reply.message or M.err[reply.errorCode] or reply.errorCode), + (x.headers or {})['X-Etcd-Index'], + clock.time()-s + ) -- 408 for timeout if x.status < 500 and x.status ~= 408 then @@ -139,6 +199,10 @@ function M:request(method, path, args ) lasterror = { errorCode = 500, message = x.reason } end end + + if deadline and deadline < fiber.time() then + break + end end return lasterror, lastresponse end @@ -181,8 +245,26 @@ function M:recursive_extract(cut, node, storage) if not storage then return _storage[''] end end -function M:list(keyspath) - local res, response = self:request("GET","keys"..keyspath, { recursive = true, quorum = not self.reduce_listing_quorum }) +---@class moonlibs.config.etcd.list.opts:moonlibs.etcd.request.opts +---@field recursive? boolean (default: true) should listing be recursive +---@field quorum? boolean (default: not reduce_listing_quorum) when true requests quorum read + +---Performs listing by given path +---@param keyspath string path inside etcd +---@param opts moonlibs.config.etcd.list.opts +---@return unknown +---@return HTTPResponse +function M:list(keyspath, opts) + if type(opts) ~= 'table' then + opts = {} + end + if opts.recursive == nil then + opts.recursive = true + end + if opts.quorum == nil then + opts.quorum = not self.reduce_listing_quorum + end + local res, response = self:request("GET","keys"..keyspath, opts) -- print(yaml.encode(res)) if res.node then local result = self:recursive_extract(keyspath,res.node) @@ -197,6 +279,14 @@ function M:list(keyspath) end end +---@class moonlibs.config.etcd.wait.opts +---@field timeout? number (default: etcd.timeout) timeout for each node to await changes +---@field index number etcd-index that should be awaited + +---Awaits any change in subtree recursively +---@param keyspath string +---@param args moonlibs.config.etcd.wait.opts +---@return boolean not_timed_out, HTTPResponse function M:wait(keyspath, args) args = args or {} local _, response = self:request("GET","keys"..keyspath, { diff --git a/run_test_in_docker.sh b/run_test_in_docker.sh index 30b170e..d8bd305 100755 --- a/run_test_in_docker.sh +++ b/run_test_in_docker.sh @@ -2,4 +2,5 @@ pwd rm -rf /root/.cache/ -.rocks/bin/luatest --coverage -c -v spec/01_single_test.lua +cp -ar /root/.rocks /source/config/ +/source/config/.rocks/bin/luatest --coverage -v spec/ diff --git a/spec/01_single_test.lua b/spec/01_single_test.lua index f82c152..52020c4 100644 --- a/spec/01_single_test.lua +++ b/spec/01_single_test.lua @@ -1,14 +1,7 @@ local t = require 'luatest' --[[@as luatest]] local uri = require 'uri' -local base_config = { - apps = { - single = { - common = { box = { log_level = 4 } }, - } - } -} - +---@class test.config.single:luatest.group local g = t.group('single', { { instances = {single = '127.0.0.1:3301'}, run = {'single'} }, { @@ -27,41 +20,33 @@ local fio = require 'fio' local root = fio.dirname(this_file) local init_lua = fio.pathjoin(root, 'mock', 'single', 'init.lua') -local base_env = { - TT_WAL_DIR = nil, -- will be set at before_each trigger - TT_MEMTX_DIR = nil, -- will be set at before_each trigger - TT_ETCD_PREFIX = '/apps/single', - TT_CONFIG = fio.pathjoin(root, 'mock', 'single', 'conf.lua'), - TT_MASTER_SELECTION_POLICY = 'etcd.instance.single', - TT_ETCD_ENDPOINTS = os.getenv('TT_ETCD_ENDPOINTS') or "http://127.0.0.1:2379", -} +local base_env local h = require 'spec.helper' local test_ctx = {} -local working_dir - -g.before_each(function() - working_dir = h.create_workdir() +g.before_each(function(cg) + local working_dir = h.create_workdir() + base_env = { + TT_ETCD_PREFIX = '/apps/single', + TT_CONFIG = fio.pathjoin(root, 'mock', 'single', 'conf.lua'), + TT_MASTER_SELECTION_POLICY = 'etcd.instance.single', + TT_ETCD_ENDPOINTS = os.getenv('TT_ETCD_ENDPOINTS') or "http://127.0.0.1:2379", + } base_env.TT_WAL_DIR = working_dir base_env.TT_MEMTX_DIR = working_dir -end) - -g.after_each(function() - for _, info in pairs(test_ctx) do - for _, tt in pairs(info.tts) do - tt.tt:stop() - end - end + base_env.TT_WORK_DIR = working_dir - h.clean_directory(working_dir) + local base_config = { + apps = { + single = { + common = { box = { log_level = 1 } }, + } + } + } h.clear_etcd() -end) -function g.test_run_instances(cg) local params = cg.params - local this_ctx = { tts = {} } - test_ctx[cg.name] = this_ctx local etcd_config = table.deepcopy(base_config) etcd_config.apps.single.instances = {} @@ -69,39 +54,41 @@ function g.test_run_instances(cg) etcd_config.apps.single.instances[instance_name] = { box = { listen = listen_uri } } end + local ctx = { tts = {}, env = base_env, etcd_config = etcd_config, params = cg.params } + test_ctx[cg.name] = ctx + h.upload_to_etcd(etcd_config) +end) - for _, name in ipairs(params.run) do - local env = table.deepcopy(base_env) - env.TT_INSTANCE_NAME = name - local net_box_port = tonumber(uri.parse(etcd_config.apps.single.instances[name].box.listen).service) - - local tt = h.start_tarantool({ - alias = name, - env = env, - command = init_lua, - args = {}, - net_box_port = net_box_port, - }) - - table.insert(this_ctx.tts, { - tt = tt, - net_box_port = net_box_port, - env = env, - name = name, - }) +g.after_each(function() + for _, info in pairs(test_ctx) do + for _, tt in pairs(info.tts) do + tt.server:stop() + end + h.clean_directory(info.env.TT_WAL_DIR) + h.clean_directory(info.env.TT_MEMTX_DIR) + h.clean_directory(info.env.TT_WORK_DIR) end - for _, tt in ipairs(this_ctx.tts) do - tt.tt:connect_net_box() - local box_cfg = tt.tt:get_box_cfg() + h.clear_etcd() +end) + +function g.test_run_instances(cg) + local ctx = test_ctx[cg.name] + + -- Start tarantools + h.start_all_tarantools(ctx, init_lua, root, ctx.etcd_config.apps.single.instances) + + for _, tt in ipairs(ctx.tts) do + tt.server:connect_net_box() + local box_cfg = tt.server:get_box_cfg() t.assert_covers(box_cfg, { - log_level = etcd_config.apps.single.common.box.log_level, - listen = etcd_config.apps.single.instances[tt.name].box.listen, + log_level = ctx.etcd_config.apps.single.common.box.log_level, + listen = ctx.etcd_config.apps.single.instances[tt.name].box.listen, read_only = false, }, 'box.cfg is correct') - local conn = tt.tt --[[@as luatest.server]] + local conn = tt.server --[[@as luatest.server]] local ret = conn:exec(function() local r = table.deepcopy(config.get('sys')) for k, v in pairs(r) do @@ -119,14 +106,15 @@ function g.test_run_instances(cg) }, 'get("sys") has correct fields') end - for _, tt in ipairs(this_ctx.tts) do - local conn = tt.tt --[[@as luatest.server]] + -- restart tarantools + for _, tt in ipairs(ctx.tts) do + local conn = tt.server --[[@as luatest.server]] h.restart_tarantool(conn) - local box_cfg = tt.tt:get_box_cfg() + local box_cfg = tt.server:get_box_cfg() t.assert_covers(box_cfg, { - log_level = etcd_config.apps.single.common.box.log_level, - listen = etcd_config.apps.single.instances[tt.name].box.listen, + log_level = ctx.etcd_config.apps.single.common.box.log_level, + listen = ctx.etcd_config.apps.single.instances[tt.name].box.listen, read_only = false, }, 'box.cfg is correct after restart') diff --git a/spec/02_cluster_master_test.lua b/spec/02_cluster_master_test.lua new file mode 100644 index 0000000..db3877c --- /dev/null +++ b/spec/02_cluster_master_test.lua @@ -0,0 +1,299 @@ +local t = require 'luatest' --[[@as luatest]] +local uuid = require 'uuid' +local fiber = require 'fiber' + +---@class test.config.master:luatest.group +local g = t.group('master', { + { + cluster = 'single', + master = 'first_01', + instances = {first_01 = '127.0.0.1:3301', first_02 = '127.0.0.1:3302'}, + run = {'first_01', 'first_02'} + }, + { + cluster = 'single', + master = 'second_01', + instances = {second_01 = '127.0.0.1:3301', second_02 = '127.0.0.1:3302'}, + run = {'second_01'} + }, + { + cluster = 'single', + master = 'third_01', + instances = {third_01 = '127.0.0.1:3301', third_02 = '127.0.0.1:3302',third_03='127.0.0.1:3303'}, + run = {'third_03','third_02','third_01'} + }, +}) + +local this_file = debug.getinfo(1, "S").source:sub(2) +local fio = require 'fio' + +local root = fio.dirname(this_file) +local init_lua = fio.pathjoin(root, 'mock', 'single', 'init.lua') + +local base_env + +local h = require 'spec.helper' + +---@class moonlibs.config.test.tarantool +---@field server luatest.server +---@field net_box_port number +---@field env table +---@field name string + +---@class moonlibs.config.test.context +---@field tts moonlibs.config.test.tarantool[] +---@field env table +---@field etcd_config table +---@field params table + +---@type table +local test_ctx = {} + +g.before_each(function(cg) + local working_dir = h.create_workdir() + base_env = { + TT_ETCD_PREFIX = '/apps/single', + TT_CONFIG = fio.pathjoin(root, 'mock', 'single', 'conf.lua'), + TT_MASTER_SELECTION_POLICY = 'etcd.cluster.master', + TT_ETCD_ENDPOINTS = os.getenv('TT_ETCD_ENDPOINTS') or "http://127.0.0.1:2379", + } + + base_env.TT_WAL_DIR = working_dir + base_env.TT_MEMTX_DIR = working_dir + + local base_config = { + apps = { + single = { + common = { + etcd = { fencing_enabled = true }, + box = { log_level = 5 }, + }, + clusters = { + single = { + master = cg.params.master, + replicaset_uuid = uuid.str(), + } + }, + } + }, + } + h.clear_etcd() + + local etcd_config = table.deepcopy(base_config) + etcd_config.apps.single.instances = {} + for instance_name, listen_uri in pairs(cg.params.instances) do + etcd_config.apps.single.instances[instance_name] = { + box = { listen = listen_uri }, + cluster = cg.params.cluster, + } + end + + local this_ctx = { tts = {}, env = base_env, etcd_config = etcd_config, params = cg.params } + test_ctx[cg.name] = this_ctx + + h.upload_to_etcd(etcd_config) +end) + +g.after_each(function() + for _, info in pairs(test_ctx) do + for _, tt in pairs(info.tts) do + tt.server:stop() + end + h.clean_directory(info.env.TT_WAL_DIR) + h.clean_directory(info.env.TT_MEMTX_DIR) + end + + h.clear_etcd() +end) + +function g.test_run_instances(cg) + local ctx = test_ctx[cg.name] + + -- Start tarantools + h.start_all_tarantools(ctx, init_lua, root, ctx.etcd_config.apps.single.instances) + + -- Check configuration + for _, tnt in ipairs(ctx.tts) do + tnt.server:connect_net_box() + local box_cfg = tnt.server:get_box_cfg() + t.assert_covers(box_cfg, { + log_level = ctx.etcd_config.apps.single.common.box.log_level, + listen = ctx.etcd_config.apps.single.instances[tnt.name].box.listen, + read_only = ctx.etcd_config.apps.single.clusters.single.master ~= tnt.name, + }, 'box.cfg is correct') + + local conn = tnt.server --[[@as luatest.server]] + local ret = conn:exec(function() + local r = table.deepcopy(config.get('sys')) + for k, v in pairs(r) do + if type(v) == 'function' then + r[k] = nil + end + end + return r + end) + + t.assert_covers(ret, { + instance_name = tnt.name, + master_selection_policy = 'etcd.cluster.master', + file = base_env.TT_CONFIG, + }, 'get("sys") has correct fields') + end + + -- restart+check configuration + for _, tt in ipairs(ctx.tts) do + h.restart_tarantool(tt.server) + + local box_cfg = tt.server:get_box_cfg() + t.assert_covers(box_cfg, { + log_level = ctx.etcd_config.apps.single.common.box.log_level, + listen = ctx.etcd_config.apps.single.instances[tt.name].box.listen, + read_only = ctx.etcd_config.apps.single.clusters.single.master ~= tt.name, + }, 'box.cfg is correct after restart') + + local ret = tt.server:exec(function() + local r = table.deepcopy(config.get('sys')) + for k, v in pairs(r) do + if type(v) == 'function' then + r[k] = nil + end + end + return r + end) + + t.assert_covers(ret, { + instance_name = tt.name, + master_selection_policy = 'etcd.cluster.master', + file = base_env.TT_CONFIG, + }, 'get("sys") has correct fields after restart') + end +end + +function g.test_reload(cg) + local ctx = test_ctx[cg.name] + + -- Start tarantools + h.start_all_tarantools(ctx, init_lua, root, ctx.etcd_config.apps.single.instances) + + -- reload+check configuration + for _, tt in ipairs(ctx.tts) do + h.reload_tarantool(tt.server) + + local box_cfg = tt.server:get_box_cfg() + t.assert_covers(box_cfg, { + log_level = ctx.etcd_config.apps.single.common.box.log_level, + listen = ctx.etcd_config.apps.single.instances[tt.name].box.listen, + read_only = ctx.etcd_config.apps.single.clusters.single.master ~= tt.name, + }, 'box.cfg is correct after restart') + + local ret = tt.server:exec(function() + local r = table.deepcopy(config.get('sys')) + for k, v in pairs(r) do + if type(v) == 'function' then + r[k] = nil + end + end + return r + end) + + t.assert_covers(ret, { + instance_name = tt.name, + master_selection_policy = 'etcd.cluster.master', + file = base_env.TT_CONFIG, + }, 'get("sys") has correct fields after restart') + end +end + +function g.test_fencing(cg) + local ctx = test_ctx[cg.name] + t.skip_if(not ctx.etcd_config.apps.single.common.etcd.fencing_enabled, "fencing disabled") + + -- Start tarantools + h.start_all_tarantools(ctx, init_lua, root, ctx.etcd_config.apps.single.instances) + + -- Check configuration + for _, tnt in ipairs(ctx.tts) do + tnt.server:connect_net_box() + local box_cfg = tnt.server:get_box_cfg() + t.assert_covers(box_cfg, { + log_level = ctx.etcd_config.apps.single.common.box.log_level, + listen = ctx.etcd_config.apps.single.instances[tnt.name].box.listen, + read_only = ctx.etcd_config.apps.single.clusters.single.master ~= tnt.name, + }, 'box.cfg is correct') + + local conn = tnt.server --[[@as luatest.server]] + local ret = conn:exec(function() + local r = table.deepcopy(config.get('sys')) + for k, v in pairs(r) do + if type(v) == 'function' then + r[k] = nil + end + end + return r + end) + + t.assert_covers(ret, { + instance_name = tnt.name, + master_selection_policy = 'etcd.cluster.master', + file = base_env.TT_CONFIG, + }, 'get("sys") has correct fields') + end + + local master_name = ctx.params.master + + ---@type moonlibs.config.test.tarantool + local master + for _, tt in ipairs(ctx.tts) do + if tt.name == master_name then + master = tt + break + end + end + + t.assert(master, 'master is not connected') + + local ret = master.server:exec(function() + return { cfg_ro = box.cfg.read_only, ro = box.info.ro } + end) + + t.assert_equals(ret.cfg_ro, false, 'box.cfg.read_only == false (before fencing)') + t.assert_equals(ret.ro, false, 'box.info.ro == false (before fencing)') + + ctx.etcd_config.apps.single.clusters.single.master = 'not_exists' + h.upload_to_etcd(ctx.etcd_config) + + local fencing_cfg = ctx.etcd_config.apps.single.common.etcd + local fencing_timeout = fencing_cfg.fencing_timeout or 10 + local fencing_pause = fencing_cfg.fencing_pause or fencing_timeout/2 + + t.helpers.retrying({ + timeout = fencing_pause, + delay = 0.1, + }, function () + local ret = master.server:exec(function() + return { cfg_ro = box.cfg.read_only, ro = box.info.ro } + end) + assert(ret.cfg_ro, "cfg.read_only must be true") + assert(ret.ro, "info.ro must be true") + end) + + local ret = master.server:exec(function() + return { cfg_ro = box.cfg.read_only, ro = box.info.ro } + end) + + t.assert_equals(ret.cfg_ro, true, 'box.cfg.read_only == true') + t.assert_equals(ret.ro, true, 'box.info.ro == true') + + ctx.etcd_config.apps.single.clusters.single.master = master_name + h.upload_to_etcd(ctx.etcd_config) + + local deadline = 2*fencing_timeout+fiber.time() + while fiber.time() < deadline do + local ret2 = master.server:exec(function() + return { cfg_ro = box.cfg.read_only, ro = box.info.ro } + end) + + t.assert_equals(ret2.cfg_ro, true, 'box.cfg.read_only == true (double check)') + t.assert_equals(ret2.ro, true, 'box.info.ro == true (double check)') + end +end diff --git a/spec/helper.lua b/spec/helper.lua index a9c6743..c66bf92 100644 --- a/spec/helper.lua +++ b/spec/helper.lua @@ -2,11 +2,11 @@ local h = {} local t = require 'luatest' --[[@as luatest]] local fio = require 'fio' local log = require 'log' +local uri = require 'uri' local fun = require 'fun' local clock = require 'clock' local fiber = require 'fiber' -local http = require 'http.client' -local json = require 'json' +local json = require 'json' ---Creates temporary working directory ---@return string @@ -69,7 +69,8 @@ function h.clear_etcd() local etcd = h.get_etcd() local _, res = etcd:request('DELETE', 'keys/apps', { recursive = true, dir = true, force = true }) - assert(res.status >= 200 and res.status < 300, ("%s %s"):format(res.status, res.body)) + log.info("clear_etcd(%s) => %s:%s", '/apps', res.status, res.reason) + assert(res.status >= 200 and(res.status < 300 or res.status == 404), ("%s %s"):format(res.status, res.body)) end function h.upload_to_etcd(tree) @@ -80,58 +81,94 @@ function h.upload_to_etcd(tree) table.sort(keys) for _, key in ipairs(keys) do do - local _, res = etcd:request('PUT', 'keys'..key, { value = flat[key] }) - log.info(res) + local _, res = etcd:request('PUT', 'keys'..key, { value = flat[key], quorum = true }) assert(res.status < 300 and res.status >= 200, res.reason) end end local key = keys[1]:match('^(/[^/]+)') - log.info((etcd:list(key))) + log.info("list(%s): => %s", key, json.encode(etcd:list(key))) end ---Starts new tarantool server ---@param opts luatest.server.options ---@return luatest.server function h.start_tarantool(opts) - log.info(opts) + log.info("starting tarantool %s", json.encode(opts)) local srv = t.Server:new(opts) srv:start() local process = srv.process - local deadline = clock.time() + 15 + local deadline = clock.time() + 30 while clock.time() < deadline do - fiber.sleep(3) - assert(process:is_alive(), "tarantool is dead") + fiber.sleep(0.1) + if process:is_alive() then break end + end + return srv +end - if pcall(function() srv:connect_net_box() end) then - break - end +function h.start_all_tarantools(ctx, init_lua, root, instances) + for _, name in ipairs(ctx.params.run) do + local env = table.deepcopy(ctx.env) + env.TT_INSTANCE_NAME = name + local net_box_port = tonumber(uri.parse(instances[name].box.listen).service) + + local tt = h.start_tarantool({ + alias = name, + env = env, + command = init_lua, + args = {}, + net_box_port = net_box_port, + workdir = root, + }) + + table.insert(ctx.tts, { + server = tt, + net_box_port = net_box_port, + env = env, + name = name, + }) end - return srv + for _, tt in ipairs(ctx.tts) do + h.wait_tarantool(tt.server) + end end ----comment ----@param conn luatest.server -function h.restart_tarantool(conn) - conn:stop() +---@param srv luatest.server +function h.wait_tarantool(srv) + t.helpers.retrying({ timeout = 30, delay = 0.1 }, function () + srv:connect_net_box() + srv:call('box.info') + end) +end + +---@param server luatest.server +function h.restart_tarantool(server) + server:stop() local deadline = clock.time() + 15 fiber.sleep(3) - conn:start() + server:start() while clock.time() < deadline do fiber.sleep(3) - assert(conn.process:is_alive(), "tarantool is dead") + assert(server.process:is_alive(), "tarantool is dead") - if pcall(function() conn:connect_net_box() end) then + if pcall(function() server:connect_net_box() end) then break end end end +---@param server luatest.server +function h.reload_tarantool(server) + server:exec(function() + package.reload() + end) +end + return h diff --git a/spec/mock/single/init.lua b/spec/mock/single/init.lua index 172fa2f..379c061 100644 --- a/spec/mock/single/init.lua +++ b/spec/mock/single/init.lua @@ -1,9 +1,9 @@ #!/usr/bin/env tarantool - +require 'package.reload' require 'config' { mkdir = true, print_config = true, - instance_name = os.getenv('TT_INSTANCE_NAME') or 'single', + instance_name = os.getenv('TT_INSTANCE_NAME'), file = os.getenv('TT_CONFIG'), master_selection_policy = os.getenv('TT_MASTER_SELECTION_POLICY'), on_after_cfg = function() diff --git a/test/Dockerfile b/test/Dockerfile index d88f4e9..a5549e2 100644 --- a/test/Dockerfile +++ b/test/Dockerfile @@ -1,6 +1,7 @@ -FROM tarantool/tarantool:1.10.14 +FROM tarantool/tarantool:2.11.1 RUN apk add --no-cache -u iproute2 make bind-tools WORKDIR /opt/tarantool +RUN tarantoolctl rocks --global --server http://moonlibs.github.io/rocks install package-reload scm-1 CMD ["tarantool" "/opt/tarantool/init.lua"] diff --git a/test/Makefile b/test/Makefile index 4fff538..437a918 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,3 +1,6 @@ -run-compose: +run-compose-etcd: + docker compose up -d --remove-orphans --build etcd0 etcd1 etcd2 + +run-compose: run-compose-etcd docker compose up -d --remove-orphans --build diff --git a/test/app/conf.lua b/test/app/conf.lua index 7456fbb..1e53e8d 100644 --- a/test/app/conf.lua +++ b/test/app/conf.lua @@ -2,18 +2,8 @@ etcd = { instance_name = os.getenv("TT_INSTANCE_NAME"), prefix = '/instance', endpoints = {"http://etcd0:2379","http://etcd1:2379","http://etcd2:2379"}, - fencing_enabled = true, -} - -box = { - background = false, - log_level = 6, - log_format = 'plain', - - memtx_dir = '/var/lib/tarantool/snaps/', - wal_dir = '/var/lib/tarantool/xlogs', -} - -app = { - + fencing_enabled = false, + timeout = 2, + login = 'username', + password = 'password', } diff --git a/test/app/init.lua b/test/app/init.lua index 295817d..bd23cde 100644 --- a/test/app/init.lua +++ b/test/app/init.lua @@ -1,8 +1,9 @@ local fiber = require "fiber" +require 'package.reload' + require 'config' { mkdir = true, - print_config = true, instance_name = os.getenv("TT_INSTANCE_NAME"), file = 'conf.lua', master_selection_policy = 'etcd.cluster.master', @@ -30,9 +31,10 @@ fiber.create(function() for _ = 1, 10 do local f = fiber.create(function() fiber.self():set_joinable(true) - for i = 1, 100 do - box.space.T:replace{i, box.info.id, box.info.vclock} + for _ = 1, 10 do + box.space.T:insert{box.space.T:len(), box.info.id, box.info.vclock} end + fiber.sleep(0.001) end) table.insert(fibers, f) end diff --git a/test/docker-compose.yml b/test/docker-compose.yml index b37455a..1cb72cc 100644 --- a/test/docker-compose.yml +++ b/test/docker-compose.yml @@ -13,23 +13,23 @@ x-etcd: &etcd networks: - tarantool -x-tt: &tt - build: . - volumes: - - $PWD/../:/opt/tarantool/.rocks/share/tarantool:ro - - $PWD/app:/opt/tarantool - - $PWD/net:/opt/tarantool/net:ro - depends_on: - etcd0: - condition: service_started - etcd1: - condition: service_started - etcd2: - condition: service_started - privileged: true - networks: - - tarantool - command: ["/bin/sh", "-c", "sleep 5 && tarantool /opt/tarantool/init.lua"] +# x-tt: &tt +# build: . +# volumes: +# - $PWD/../:/opt/tarantool/.rocks/share/tarantool:ro +# - $PWD/app:/opt/tarantool +# - $PWD/net:/opt/tarantool/net:ro +# depends_on: +# etcd0: +# condition: service_started +# etcd1: +# condition: service_started +# etcd2: +# condition: service_started +# privileged: true +# networks: +# - tarantool +# command: ["/bin/sh", "-c", "sleep 5 && tarantool /opt/tarantool/init.lua"] networks: tarantool: @@ -62,28 +62,28 @@ services: ETCD_ADVERTISE_CLIENT_URLS: http://etcd2:2379 ETCD_INITIAL_ADVERTISE_PEER_URLS: http://etcd2:2380 - etcd_load: - image: registry.gitlab.com/ochaton/switchover:010a6965 - networks: - - tarantool - volumes: - - $PWD/instance.etcd.yaml:/instance.etcd.yaml:ro - depends_on: - etcd0: - condition: service_started - etcd1: - condition: service_started - etcd2: - condition: service_started - entrypoint: [''] - command: ["/bin/sh", "-c", "sleep 3 && switchover -v -e http://etcd0:2379,http://etcd1:2379,http://etcd2:2379 etcd load / /instance.etcd.yaml"] - instance_01: - <<: *tt - container_name: instance_01 - environment: - TT_INSTANCE_NAME: instance_01 - instance_02: - <<: *tt - container_name: instance_02 - environment: - TT_INSTANCE_NAME: instance_02 + # etcd_load: + # image: registry.gitlab.com/ochaton/switchover:2.7.0.20 + # networks: + # - tarantool + # volumes: + # - $PWD/instance.etcd.yaml:/instance.etcd.yaml:ro + # depends_on: + # etcd0: + # condition: service_started + # etcd1: + # condition: service_started + # etcd2: + # condition: service_started + # entrypoint: [''] + # command: ["/bin/sh", "-c", "sleep 3 && switchover -v -e http://etcd0:2379,http://etcd1:2379,http://etcd2:2379 etcd load / /instance.etcd.yaml"] + # instance_01: + # <<: *tt + # container_name: instance_01 + # environment: + # TT_INSTANCE_NAME: instance_01 + # instance_02: + # <<: *tt + # container_name: instance_02 + # environment: + # TT_INSTANCE_NAME: instance_02 diff --git a/test/instance.etcd.yaml b/test/instance.etcd.yaml index d23f6ce..9261b50 100644 --- a/test/instance.etcd.yaml +++ b/test/instance.etcd.yaml @@ -9,18 +9,22 @@ instance: fencing_timeout: 10 fencing_pause: 5 box: - replication_connect_quorum: 1 + bootstrap_strategy: auto log_level: 5 + replication_connect_timeout: 3 + listen: 0.0.0.0:3301 memtx_memory: 268435456 + memtx_dir: /var/lib/tarantool/snaps/ + wal_dir: /var/lib/tarantool/xlogs/ instances: instance_01: cluster: instance box: instance_uuid: 91157a11-0000-0001-0000-000000000000 - listen: instance_01:3301 + remote_addr: instance_01:3301 instance_02: cluster: instance box: instance_uuid: 91157a11-0000-0002-0000-000000000000 - listen: instance_02:3302 + remote_addr: instance_02:3301 ...