From caf83b22a57663eb62f88cfed64eff4f2c3bca53 Mon Sep 17 00:00:00 2001 From: Vladislav Grubov Date: Sun, 20 Aug 2023 14:05:28 +0300 Subject: [PATCH 1/6] fix: config.etcd:list now allows to pass timeout * due to built-in retries on non-200 responses config.etcd:list() takes N*timeout to complete instead of just timeout. Fencing is time-bound algorithm, so listings should not overcommit fencing_timeout --- config.lua | 63 ++++++++++++++++++++++++++++++----------------- config/etcd.lua | 25 ++++++++++++++++--- test/app/conf.lua | 1 + 3 files changed, 63 insertions(+), 26 deletions(-) diff --git a/config.lua b/config.lua index 7f68529..8274153 100644 --- a/config.lua +++ b/config.lua @@ -982,9 +982,9 @@ local M local etcd_cluster, watch_index - local function refresh_list() + local function refresh_list(opts) local s = fiber.time() - local result, resp = config.etcd:list(watch_path) + local result, resp = config.etcd:list(watch_path, opts) local elapsed = fiber.time()-s log.verbose("[fencing] list(%s) => %s in %.3fs %s", @@ -1005,6 +1005,7 @@ local M local function fencing_check(deadline) -- we can only allow half of the time till deadline local timeout = math.min((deadline-fiber.time())*0.5, fencing_pause) + log.verbose("[wait] timeout:%.3fs FP:%.3fs", timeout, fencing_pause) local check_started = fiber.time() local pcall_ok, err_or_resolution, new_cluster = pcall(function() @@ -1049,6 +1050,7 @@ local M end end) + log.verbose("[wait] took:%.3fs exp:%.3fs", fiber.time()-check_started, timeout) if not in_my_gen() then return end if not pcall_ok then @@ -1060,8 +1062,10 @@ local M end if not new_cluster then + local list_started = fiber.time() + log.verbose("[listing] left:%.3fs", deadline-fiber.time()) repeat - local ok, e_cluster = pcall(refresh_list) + local ok, e_cluster = pcall(refresh_list, {deadline = deadline}) if ok and e_cluster then new_cluster = e_cluster break @@ -1072,6 +1076,8 @@ local M local sleep = math.min(fencing_pause, 0.5*(deadline - fiber.time())) fiber.sleep(sleep) until fiber.time() > deadline + log.verbose("[list] took:%.3fs left:%.3fs", + fiber.time()-list_started, deadline-fiber.time()) end if not in_my_gen() then return end @@ -1081,7 +1087,8 @@ local M watch_path, fiber.time()-check_started, new_cluster) if not fencing_check_replication then - return false + -- ETCD is down, we do not know what is happening + return nil end -- In proper fencing we must step down immediately as soon as we discover @@ -1101,7 +1108,7 @@ local M ru.upstream.peer, ru.upstream.status, ru.upstream.message, ru.upstream.idle, ru.upstream.lag ) - return false + return nil end end end @@ -1113,16 +1120,19 @@ local M return true elseif new_cluster.switchover then -- new_cluster.master ~= my_name -- Another instance is the leader in ETCD. But we could be the one - -- who will be the next (cluster is under switching right now). + -- who is going to be the next (cluster is under switching right now). -- It is almost impossible to get this path in production. But the only one -- protection we have is `fencing_pause` and `fencing_timeout`. -- So, we will do nothing until ETCD mutex is present log.warn('[fencing] It seems that cluster is under switchover right now %s', json.encode(new_cluster)) - -- (if we are ro -- then we must end the loop) - -- (if we are rw -- then we must continue the loop) - return not box.info.ro + -- Note: this node was rw (otherwise we would not execute fencing_check at all) + -- During normal switch registered leader is RO (because we are RW, and we are not the leader) + -- And in the next step coordinator will update leader info in ETCD. + -- so this condition seems to be unreachable for every node + return nil else log.warn('[fencing] ETCD %s/master is %s not us. Stepping down', watch_path, new_cluster.master) + -- ETCD is up, master is not us => we must step down immediately return false end end @@ -1156,22 +1166,22 @@ local M fiber.sleep(math.random(math.max(0.5, fencing_pause-0.5), fencing_pause+0.5)) end - log.info("etcd_cluster is %s (index: %s)", json.encode(etcd_cluster), watch_index) - if not in_my_gen() then return end - -- we yield to get next ev_run before get fiber.time() fiber.sleep(0) + if not in_my_gen() then return end + log.info("etcd_cluster is %s (index: %s)", json.encode(etcd_cluster), watch_index) + -- we will not step down until deadline. local deadline = fiber.time()+fencing_timeout repeat -- Before ETCD check we better pause -- we do a little bit randomized sleep to not spam ETCD - fiber.sleep( - math.random(0, - 0.1*math.min(deadline-fiber.time(),fencing_timeout-fencing_pause) - ) - ) + local hard_limit = deadline-fiber.time() + local soft_limit = fencing_timeout-fencing_pause + local rand_sleep = math.random()*0.1*math.min(hard_limit, soft_limit) + log.verbose("[sleep] hard:%.3fs soft:%.3fs sleep:%.3fs", hard_limit, soft_limit, rand_sleep) + fiber.sleep(rand_sleep) -- After each yield we have to check that we are still in our generation if not in_my_gen() then return end @@ -1181,18 +1191,25 @@ local M -- fencing_check(deadline) if it returns true, -- then we update leadership leasing - if fencing_check(deadline) then + local verdict = fencing_check(deadline) + log.verbose("[verdict:%s] Leasing ft:%.3fs up:%.3fs left:%.3fs", + verdict == true and "ok" + or verdict == false and "step" + or "unknown", + fencing_timeout, + verdict and (fiber.time()+fencing_timeout-deadline) or 0, + deadline - fiber.time() + ) + if verdict == false then + -- immediate stepdown + break + elseif verdict then -- update deadline. if deadline <= fiber.time() then log.warn("[fencing] deadline was overflowed deadline:%s, now:%s", deadline, fiber.time() ) end - log.verbose("[fencing] Leasing ft:%.3fs up:%.3fs left:%.3fs", - fencing_timeout, - fiber.time()+fencing_timeout-deadline, - deadline - fiber.time() - ) deadline = fiber.time()+fencing_timeout end if not in_my_gen() then return end diff --git a/config/etcd.lua b/config/etcd.lua index aa1eec5..1009032 100644 --- a/config/etcd.lua +++ b/config/etcd.lua @@ -1,5 +1,6 @@ local json = require 'json' local log = require 'log' +local fiber = require 'fiber' local http_client = require 'http.client' local digest = require 'digest' @@ -110,6 +111,7 @@ function M:request(method, path, args ) if #query > 0 then qs = '?'..table.concat(query) else qs = '' end local body = args and args.body or '' local lasterror, lastresponse + local deadline = args.deadline local len = #self.endpoints for i = 0, len - 1 do @@ -119,7 +121,11 @@ function M:request(method, path, args ) end local uri = string.format("%s/v2/%s%s", self.endpoints[cur], path, qs ) -- print("[debug] "..uri) - local x = self.client.request(method,uri,body,{timeout = args.timeout or self.timeout or 1; headers = self.headers}) + local request_timeout = args.timeout or self.timeout or 1 + if deadline then + request_timeout = math.min(deadline-fiber.time(), request_timeout) + end + local x = self.client.request(method,uri,body,{timeout = request_timeout; headers = self.headers}) lastresponse = x local status,reply = pcall(json.decode,x and x.body) @@ -139,6 +145,10 @@ function M:request(method, path, args ) lasterror = { errorCode = 500, message = x.reason } end end + + if deadline and deadline < fiber.time() then + break + end end return lasterror, lastresponse end @@ -181,8 +191,17 @@ function M:recursive_extract(cut, node, storage) if not storage then return _storage[''] end end -function M:list(keyspath) - local res, response = self:request("GET","keys"..keyspath, { recursive = true, quorum = not self.reduce_listing_quorum }) +function M:list(keyspath, opts) + if type(opts) ~= 'table' then + opts = {} + end + if opts.recursive == nil then + opts.recursive = true + end + if opts.quorum == nil then + opts.quorum = not self.reduce_listing_quorum + end + local res, response = self:request("GET","keys"..keyspath, opts) -- print(yaml.encode(res)) if res.node then local result = self:recursive_extract(keyspath,res.node) diff --git a/test/app/conf.lua b/test/app/conf.lua index 7456fbb..d8cd24c 100644 --- a/test/app/conf.lua +++ b/test/app/conf.lua @@ -3,6 +3,7 @@ etcd = { prefix = '/instance', endpoints = {"http://etcd0:2379","http://etcd1:2379","http://etcd2:2379"}, fencing_enabled = true, + timeout = 2, } box = { From ee687e48e5457f7ab3ca5bfaa4340691374f23ea Mon Sep 17 00:00:00 2001 From: Vladislav Grubov Date: Sun, 10 Sep 2023 00:33:37 +0300 Subject: [PATCH 2/6] fix: fencing and reload in 2.11.* * this patch introduces basic test-suite for most common topologies: etcd.cluster.master and etcd.instance.single Test-suite starts 1/2/3 tarantool instances against 3 etcd nodes and checks that configuration is valid. Test-suite also checks that config is reloadable. For now test suite is running on 1.10.15, 2.8.4, 2.10.6, 2.11.0 and 2.11.1 --- .github/workflows/push-rockspec.yml | 6 +- .github/workflows/test.yml | 19 +++ Dockerfile.build | 6 + Dockerfile.test | 11 +- Makefile | 17 ++- config.lua | 16 +++ config/etcd.lua | 8 +- run_test_in_docker.sh | 3 +- spec/01_single_test.lua | 114 ++++++++--------- spec/02_cluster_master_test.lua | 190 ++++++++++++++++++++++++++++ spec/helper.lua | 79 +++++++++--- spec/mock/single/init.lua | 4 +- test/Dockerfile | 3 +- test/Makefile | 5 +- test/app/init.lua | 1 + test/docker-compose.yml | 84 ++++++------ 16 files changed, 418 insertions(+), 148 deletions(-) create mode 100644 .github/workflows/test.yml create mode 100644 Dockerfile.build create mode 100644 spec/02_cluster_master_test.lua diff --git a/.github/workflows/push-rockspec.yml b/.github/workflows/push-rockspec.yml index 42aa673..f3340cc 100644 --- a/.github/workflows/push-rockspec.yml +++ b/.github/workflows/push-rockspec.yml @@ -10,9 +10,9 @@ env: jobs: pack-and-push-tagged-rockspec: - runs-on: ubuntu-latest - if: startsWith(github.ref, 'refs/tags') - steps: + runs-on: ubuntu-latest + if: startsWith(github.ref, 'refs/tags') + steps: - uses: actions/checkout@master - uses: tarantool/setup-tarantool@v1 with: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..792c8cc --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,19 @@ +name: Create and push rockspec for moonlibs/config + +on: + push: + +env: + ROCK_NAME: config + +jobs: + test-matrix: + runs-on: ubuntu-latest + strategy: + matrix: + version: ["1.10.15", "2.8.4", "2.10.6", "2.11.0", "2.11.1"] + steps: + - uses: actions/checkout@master + - uses: docker/setup-buildx-action@v2 + - name: run test suite for ${{matrix.version}} + run: make test-${{matrix.version}} \ No newline at end of file diff --git a/Dockerfile.build b/Dockerfile.build new file mode 100644 index 0000000..ca03b26 --- /dev/null +++ b/Dockerfile.build @@ -0,0 +1,6 @@ +FROM tarantool/tarantool:2.10 as builder +RUN apk add -u git cmake make gcc musl-dev curl wget +WORKDIR /root +RUN tarantoolctl rocks install luatest scm-1 +RUN tarantoolctl rocks install luacov-console 1.1.0 +RUN tarantoolctl rocks --server https://moonlibs.github.io/rocks install package-reload scm-1 diff --git a/Dockerfile.test b/Dockerfile.test index adfd643..a19e475 100644 --- a/Dockerfile.test +++ b/Dockerfile.test @@ -1,5 +1,8 @@ -FROM tarantool/tarantool:2.10 +ARG IMAGE=1.10.14 +FROM config-test-builder as builder -RUN apk add -u git cmake make gcc musl-dev -RUN tarantoolctl rocks install luatest 0.5.7 -RUN tarantoolctl rocks install luacov-console +FROM tarantool/tarantool:${IMAGE} + +WORKDIR /root +COPY --from=builder /root/.rocks /root/.rocks +WORKDIR /opt/tarantool diff --git a/Makefile b/Makefile index 778c58e..ff39f20 100644 --- a/Makefile +++ b/Makefile @@ -1,18 +1,21 @@ .PHONY := all test -run-compose: - make -C test run-compose +run-etcd: + make -C test run-compose-etcd -build-testing-image: - docker build -t config-test -f Dockerfile.test . +config-test-builder: + docker build -t config-test-builder -f Dockerfile.build . -test: build-testing-image run-compose - docker run --name config-test \ +config-test-%: config-test-builder run-etcd + docker build -t $(@) --build-arg IMAGE=$(subst config-test-,,$@) -f Dockerfile.test . + +test-%: config-test-% + docker run -it --name $(<) \ --net tt_net \ -e TT_ETCD_ENDPOINTS="http://etcd0:2379,http://etcd1:2379,http://etcd2:2379" \ --rm -v $$(pwd):/source/config \ -v $$(pwd)/data:/tmp/ \ --workdir /source/config \ --entrypoint '' \ - config-test \ + $(<) \ ./run_test_in_docker.sh diff --git a/config.lua b/config.lua index 8274153..85259ba 100644 --- a/config.lua +++ b/config.lua @@ -4,6 +4,7 @@ local json = require 'json'.new() local yaml = require 'yaml'.new() local digest = require 'digest' local fiber = require 'fiber' +local clock = require 'clock' json.cfg{ encode_invalid_as_nil = true } yaml.cfg{ encode_use_tostring = true } @@ -96,6 +97,19 @@ local function reflect_internals() end end break; + elseif vars.reload_cfg then + table.insert(steps,"reload_cfg") + peekf = vars.reload_cfg + elseif vars.reconfig_modules then + table.insert(steps,"reconfig_modules") + for k in pairs(peek) do + if peek[k] == true then + if vars[k] ~= nil then + peek[k] = vars[k] + end + end + end + peekf = vars.reconfig_modules else for k,v in pairs(vars) do log.info("var %s=%s",k,v) end error(string.format("Bad vars for %s after steps: %s", peekf, table.concat(steps, ", "))) @@ -775,7 +789,9 @@ local M -- subject to change, just a PoC local etcd_conf = args.etcd or cfg.etcd if etcd_conf then + local s = clock.time() cfg = etcd_load(M, etcd_conf, cfg) + log.info("etcd_load took %.3fs", clock.time()-s) end if args.load then diff --git a/config/etcd.lua b/config/etcd.lua index 1009032..f6d5ee1 100644 --- a/config/etcd.lua +++ b/config/etcd.lua @@ -1,6 +1,7 @@ local json = require 'json' local log = require 'log' local fiber = require 'fiber' +local clock = require 'clock' local http_client = require 'http.client' local digest = require 'digest' @@ -51,6 +52,7 @@ end setmetatable(M,{ __call = M.new }) function M:discovery() + local start_at = clock.time() local timeout = self.timeout or 1 local new_endpoints = {} local tried = {} @@ -83,14 +85,14 @@ function M:discovery() end end if #new_endpoints == 0 then - error("Failed to discover members "..table.concat(tried,", "),2) + error("Failed to discover members "..table.concat(tried,", ")..(" in %.3fs"):format(clock.time()-start_at),2) end if self.discover_endpoints then self.endpoints = new_endpoints table.insert(self.endpoints,table.remove(self.endpoints,1)) - log.info("discovered etcd endpoints "..table.concat(self.endpoints,", ")) + log.info("discovered etcd endpoints "..table.concat(self.endpoints,", ")..(" in %.3fs"):format(clock.time()-start_at)) else - log.info("hardcoded etcd endpoints "..table.concat(self.endpoints,", ")) + log.info("hardcoded etcd endpoints "..table.concat(self.endpoints,", ")..(" in %.3fs"):format(clock.time()-start_at)) end self.current = math.random(#self.endpoints) end diff --git a/run_test_in_docker.sh b/run_test_in_docker.sh index 30b170e..b9bff2e 100755 --- a/run_test_in_docker.sh +++ b/run_test_in_docker.sh @@ -2,4 +2,5 @@ pwd rm -rf /root/.cache/ -.rocks/bin/luatest --coverage -c -v spec/01_single_test.lua +cp -ar /root/.rocks /source/config/.rocks +/source/config/.rocks/bin/luatest --coverage -v spec/ diff --git a/spec/01_single_test.lua b/spec/01_single_test.lua index f82c152..52020c4 100644 --- a/spec/01_single_test.lua +++ b/spec/01_single_test.lua @@ -1,14 +1,7 @@ local t = require 'luatest' --[[@as luatest]] local uri = require 'uri' -local base_config = { - apps = { - single = { - common = { box = { log_level = 4 } }, - } - } -} - +---@class test.config.single:luatest.group local g = t.group('single', { { instances = {single = '127.0.0.1:3301'}, run = {'single'} }, { @@ -27,41 +20,33 @@ local fio = require 'fio' local root = fio.dirname(this_file) local init_lua = fio.pathjoin(root, 'mock', 'single', 'init.lua') -local base_env = { - TT_WAL_DIR = nil, -- will be set at before_each trigger - TT_MEMTX_DIR = nil, -- will be set at before_each trigger - TT_ETCD_PREFIX = '/apps/single', - TT_CONFIG = fio.pathjoin(root, 'mock', 'single', 'conf.lua'), - TT_MASTER_SELECTION_POLICY = 'etcd.instance.single', - TT_ETCD_ENDPOINTS = os.getenv('TT_ETCD_ENDPOINTS') or "http://127.0.0.1:2379", -} +local base_env local h = require 'spec.helper' local test_ctx = {} -local working_dir - -g.before_each(function() - working_dir = h.create_workdir() +g.before_each(function(cg) + local working_dir = h.create_workdir() + base_env = { + TT_ETCD_PREFIX = '/apps/single', + TT_CONFIG = fio.pathjoin(root, 'mock', 'single', 'conf.lua'), + TT_MASTER_SELECTION_POLICY = 'etcd.instance.single', + TT_ETCD_ENDPOINTS = os.getenv('TT_ETCD_ENDPOINTS') or "http://127.0.0.1:2379", + } base_env.TT_WAL_DIR = working_dir base_env.TT_MEMTX_DIR = working_dir -end) - -g.after_each(function() - for _, info in pairs(test_ctx) do - for _, tt in pairs(info.tts) do - tt.tt:stop() - end - end + base_env.TT_WORK_DIR = working_dir - h.clean_directory(working_dir) + local base_config = { + apps = { + single = { + common = { box = { log_level = 1 } }, + } + } + } h.clear_etcd() -end) -function g.test_run_instances(cg) local params = cg.params - local this_ctx = { tts = {} } - test_ctx[cg.name] = this_ctx local etcd_config = table.deepcopy(base_config) etcd_config.apps.single.instances = {} @@ -69,39 +54,41 @@ function g.test_run_instances(cg) etcd_config.apps.single.instances[instance_name] = { box = { listen = listen_uri } } end + local ctx = { tts = {}, env = base_env, etcd_config = etcd_config, params = cg.params } + test_ctx[cg.name] = ctx + h.upload_to_etcd(etcd_config) +end) - for _, name in ipairs(params.run) do - local env = table.deepcopy(base_env) - env.TT_INSTANCE_NAME = name - local net_box_port = tonumber(uri.parse(etcd_config.apps.single.instances[name].box.listen).service) - - local tt = h.start_tarantool({ - alias = name, - env = env, - command = init_lua, - args = {}, - net_box_port = net_box_port, - }) - - table.insert(this_ctx.tts, { - tt = tt, - net_box_port = net_box_port, - env = env, - name = name, - }) +g.after_each(function() + for _, info in pairs(test_ctx) do + for _, tt in pairs(info.tts) do + tt.server:stop() + end + h.clean_directory(info.env.TT_WAL_DIR) + h.clean_directory(info.env.TT_MEMTX_DIR) + h.clean_directory(info.env.TT_WORK_DIR) end - for _, tt in ipairs(this_ctx.tts) do - tt.tt:connect_net_box() - local box_cfg = tt.tt:get_box_cfg() + h.clear_etcd() +end) + +function g.test_run_instances(cg) + local ctx = test_ctx[cg.name] + + -- Start tarantools + h.start_all_tarantools(ctx, init_lua, root, ctx.etcd_config.apps.single.instances) + + for _, tt in ipairs(ctx.tts) do + tt.server:connect_net_box() + local box_cfg = tt.server:get_box_cfg() t.assert_covers(box_cfg, { - log_level = etcd_config.apps.single.common.box.log_level, - listen = etcd_config.apps.single.instances[tt.name].box.listen, + log_level = ctx.etcd_config.apps.single.common.box.log_level, + listen = ctx.etcd_config.apps.single.instances[tt.name].box.listen, read_only = false, }, 'box.cfg is correct') - local conn = tt.tt --[[@as luatest.server]] + local conn = tt.server --[[@as luatest.server]] local ret = conn:exec(function() local r = table.deepcopy(config.get('sys')) for k, v in pairs(r) do @@ -119,14 +106,15 @@ function g.test_run_instances(cg) }, 'get("sys") has correct fields') end - for _, tt in ipairs(this_ctx.tts) do - local conn = tt.tt --[[@as luatest.server]] + -- restart tarantools + for _, tt in ipairs(ctx.tts) do + local conn = tt.server --[[@as luatest.server]] h.restart_tarantool(conn) - local box_cfg = tt.tt:get_box_cfg() + local box_cfg = tt.server:get_box_cfg() t.assert_covers(box_cfg, { - log_level = etcd_config.apps.single.common.box.log_level, - listen = etcd_config.apps.single.instances[tt.name].box.listen, + log_level = ctx.etcd_config.apps.single.common.box.log_level, + listen = ctx.etcd_config.apps.single.instances[tt.name].box.listen, read_only = false, }, 'box.cfg is correct after restart') diff --git a/spec/02_cluster_master_test.lua b/spec/02_cluster_master_test.lua new file mode 100644 index 0000000..93343f3 --- /dev/null +++ b/spec/02_cluster_master_test.lua @@ -0,0 +1,190 @@ +local t = require 'luatest' --[[@as luatest]] +local uuid = require 'uuid' + +---@class test.config.master:luatest.group +local g = t.group('master', { + { + cluster = 'single', + master = 'first_01', + instances = {first_01 = '127.0.0.1:3301', first_02 = '127.0.0.1:3302'}, + run = {'first_01', 'first_02'} + }, + { + cluster = 'single', + master = 'second_01', + instances = {second_01 = '127.0.0.1:3301', second_02 = '127.0.0.1:3302'}, + run = {'second_01'} + }, + { + cluster = 'single', + master = 'third_01', + instances = {third_01 = '127.0.0.1:3301', third_02 = '127.0.0.1:3302',third_03='127.0.0.1:3303'}, + run = {'third_03','third_02','third_01'} + }, +}) + +local this_file = debug.getinfo(1, "S").source:sub(2) +local fio = require 'fio' + +local root = fio.dirname(this_file) +local init_lua = fio.pathjoin(root, 'mock', 'single', 'init.lua') + +local base_env + +local h = require 'spec.helper' +local test_ctx = {} + +g.before_each(function(cg) + local working_dir = h.create_workdir() + base_env = { + TT_ETCD_PREFIX = '/apps/single', + TT_CONFIG = fio.pathjoin(root, 'mock', 'single', 'conf.lua'), + TT_MASTER_SELECTION_POLICY = 'etcd.cluster.master', + TT_ETCD_ENDPOINTS = os.getenv('TT_ETCD_ENDPOINTS') or "http://127.0.0.1:2379", + } + + base_env.TT_WAL_DIR = working_dir + base_env.TT_MEMTX_DIR = working_dir + + local base_config = { + etcd = { + fencing_enabled = true, + }, + apps = { + single = { + common = { box = { log_level = 1 } }, + clusters = { + single = { + master = cg.params.master, + replicaset_uuid = uuid.str(), + } + }, + } + }, + } + h.clear_etcd() + + local etcd_config = table.deepcopy(base_config) + etcd_config.apps.single.instances = {} + for instance_name, listen_uri in pairs(cg.params.instances) do + etcd_config.apps.single.instances[instance_name] = { + box = { listen = listen_uri }, + cluster = cg.params.cluster, + } + end + + local this_ctx = { tts = {}, env = base_env, etcd_config = etcd_config, params = cg.params } + test_ctx[cg.name] = this_ctx + + h.upload_to_etcd(etcd_config) +end) + +g.after_each(function() + for _, info in pairs(test_ctx) do + for _, tt in pairs(info.tts) do + tt.server:stop() + end + h.clean_directory(info.env.TT_WAL_DIR) + h.clean_directory(info.env.TT_MEMTX_DIR) + end + + h.clear_etcd() +end) + +function g.test_run_instances(cg) + local ctx = test_ctx[cg.name] + + -- Start tarantools + h.start_all_tarantools(ctx, init_lua, root, ctx.etcd_config.apps.single.instances) + + -- Check configuration + for _, tnt in ipairs(ctx.tts) do + tnt.server:connect_net_box() + local box_cfg = tnt.server:get_box_cfg() + t.assert_covers(box_cfg, { + log_level = ctx.etcd_config.apps.single.common.box.log_level, + listen = ctx.etcd_config.apps.single.instances[tnt.name].box.listen, + read_only = ctx.etcd_config.apps.single.clusters.single.master ~= tnt.name, + }, 'box.cfg is correct') + + local conn = tnt.server --[[@as luatest.server]] + local ret = conn:exec(function() + local r = table.deepcopy(config.get('sys')) + for k, v in pairs(r) do + if type(v) == 'function' then + r[k] = nil + end + end + return r + end) + + t.assert_covers(ret, { + instance_name = tnt.name, + master_selection_policy = 'etcd.cluster.master', + file = base_env.TT_CONFIG, + }, 'get("sys") has correct fields') + end + + -- restart+check configuration + for _, tt in ipairs(ctx.tts) do + h.restart_tarantool(tt.server) + + local box_cfg = tt.server:get_box_cfg() + t.assert_covers(box_cfg, { + log_level = ctx.etcd_config.apps.single.common.box.log_level, + listen = ctx.etcd_config.apps.single.instances[tt.name].box.listen, + read_only = ctx.etcd_config.apps.single.clusters.single.master ~= tt.name, + }, 'box.cfg is correct after restart') + + local ret = tt.server:exec(function() + local r = table.deepcopy(config.get('sys')) + for k, v in pairs(r) do + if type(v) == 'function' then + r[k] = nil + end + end + return r + end) + + t.assert_covers(ret, { + instance_name = tt.name, + master_selection_policy = 'etcd.cluster.master', + file = base_env.TT_CONFIG, + }, 'get("sys") has correct fields after restart') + end +end + +function g.test_reload(cg) + local ctx = test_ctx[cg.name] + + -- Start tarantools + h.start_all_tarantools(ctx, init_lua, root, ctx.etcd_config.apps.single.instances) + + -- reload+check configuration + for _, tt in ipairs(ctx.tts) do + h.reload_tarantool(tt.server) + + local box_cfg = tt.server:get_box_cfg() + t.assert_covers(box_cfg, { + log_level = ctx.etcd_config.apps.single.common.box.log_level, + listen = ctx.etcd_config.apps.single.instances[tt.name].box.listen, + read_only = ctx.etcd_config.apps.single.clusters.single.master ~= tt.name, + }, 'box.cfg is correct after restart') + + local ret = tt.server:exec(function() + local r = table.deepcopy(config.get('sys')) + for k, v in pairs(r) do + if type(v) == 'function' then + r[k] = nil + end + end + return r + end) + + t.assert_covers(ret, { + instance_name = tt.name, + master_selection_policy = 'etcd.cluster.master', + file = base_env.TT_CONFIG, + }, 'get("sys") has correct fields after restart') + end +end diff --git a/spec/helper.lua b/spec/helper.lua index a9c6743..c66bf92 100644 --- a/spec/helper.lua +++ b/spec/helper.lua @@ -2,11 +2,11 @@ local h = {} local t = require 'luatest' --[[@as luatest]] local fio = require 'fio' local log = require 'log' +local uri = require 'uri' local fun = require 'fun' local clock = require 'clock' local fiber = require 'fiber' -local http = require 'http.client' -local json = require 'json' +local json = require 'json' ---Creates temporary working directory ---@return string @@ -69,7 +69,8 @@ function h.clear_etcd() local etcd = h.get_etcd() local _, res = etcd:request('DELETE', 'keys/apps', { recursive = true, dir = true, force = true }) - assert(res.status >= 200 and res.status < 300, ("%s %s"):format(res.status, res.body)) + log.info("clear_etcd(%s) => %s:%s", '/apps', res.status, res.reason) + assert(res.status >= 200 and(res.status < 300 or res.status == 404), ("%s %s"):format(res.status, res.body)) end function h.upload_to_etcd(tree) @@ -80,58 +81,94 @@ function h.upload_to_etcd(tree) table.sort(keys) for _, key in ipairs(keys) do do - local _, res = etcd:request('PUT', 'keys'..key, { value = flat[key] }) - log.info(res) + local _, res = etcd:request('PUT', 'keys'..key, { value = flat[key], quorum = true }) assert(res.status < 300 and res.status >= 200, res.reason) end end local key = keys[1]:match('^(/[^/]+)') - log.info((etcd:list(key))) + log.info("list(%s): => %s", key, json.encode(etcd:list(key))) end ---Starts new tarantool server ---@param opts luatest.server.options ---@return luatest.server function h.start_tarantool(opts) - log.info(opts) + log.info("starting tarantool %s", json.encode(opts)) local srv = t.Server:new(opts) srv:start() local process = srv.process - local deadline = clock.time() + 15 + local deadline = clock.time() + 30 while clock.time() < deadline do - fiber.sleep(3) - assert(process:is_alive(), "tarantool is dead") + fiber.sleep(0.1) + if process:is_alive() then break end + end + return srv +end - if pcall(function() srv:connect_net_box() end) then - break - end +function h.start_all_tarantools(ctx, init_lua, root, instances) + for _, name in ipairs(ctx.params.run) do + local env = table.deepcopy(ctx.env) + env.TT_INSTANCE_NAME = name + local net_box_port = tonumber(uri.parse(instances[name].box.listen).service) + + local tt = h.start_tarantool({ + alias = name, + env = env, + command = init_lua, + args = {}, + net_box_port = net_box_port, + workdir = root, + }) + + table.insert(ctx.tts, { + server = tt, + net_box_port = net_box_port, + env = env, + name = name, + }) end - return srv + for _, tt in ipairs(ctx.tts) do + h.wait_tarantool(tt.server) + end end ----comment ----@param conn luatest.server -function h.restart_tarantool(conn) - conn:stop() +---@param srv luatest.server +function h.wait_tarantool(srv) + t.helpers.retrying({ timeout = 30, delay = 0.1 }, function () + srv:connect_net_box() + srv:call('box.info') + end) +end + +---@param server luatest.server +function h.restart_tarantool(server) + server:stop() local deadline = clock.time() + 15 fiber.sleep(3) - conn:start() + server:start() while clock.time() < deadline do fiber.sleep(3) - assert(conn.process:is_alive(), "tarantool is dead") + assert(server.process:is_alive(), "tarantool is dead") - if pcall(function() conn:connect_net_box() end) then + if pcall(function() server:connect_net_box() end) then break end end end +---@param server luatest.server +function h.reload_tarantool(server) + server:exec(function() + package.reload() + end) +end + return h diff --git a/spec/mock/single/init.lua b/spec/mock/single/init.lua index 172fa2f..379c061 100644 --- a/spec/mock/single/init.lua +++ b/spec/mock/single/init.lua @@ -1,9 +1,9 @@ #!/usr/bin/env tarantool - +require 'package.reload' require 'config' { mkdir = true, print_config = true, - instance_name = os.getenv('TT_INSTANCE_NAME') or 'single', + instance_name = os.getenv('TT_INSTANCE_NAME'), file = os.getenv('TT_CONFIG'), master_selection_policy = os.getenv('TT_MASTER_SELECTION_POLICY'), on_after_cfg = function() diff --git a/test/Dockerfile b/test/Dockerfile index d88f4e9..a3e071c 100644 --- a/test/Dockerfile +++ b/test/Dockerfile @@ -1,6 +1,7 @@ -FROM tarantool/tarantool:1.10.14 +FROM tarantool/tarantool:2.11 RUN apk add --no-cache -u iproute2 make bind-tools WORKDIR /opt/tarantool +RUN tarantoolctl rocks --global --server http://moonlibs.github.io/rocks install package-reload scm-1 CMD ["tarantool" "/opt/tarantool/init.lua"] diff --git a/test/Makefile b/test/Makefile index 4fff538..437a918 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,3 +1,6 @@ -run-compose: +run-compose-etcd: + docker compose up -d --remove-orphans --build etcd0 etcd1 etcd2 + +run-compose: run-compose-etcd docker compose up -d --remove-orphans --build diff --git a/test/app/init.lua b/test/app/init.lua index 295817d..7059c9b 100644 --- a/test/app/init.lua +++ b/test/app/init.lua @@ -1,5 +1,6 @@ local fiber = require "fiber" +require 'package.reload' require 'config' { mkdir = true, print_config = true, diff --git a/test/docker-compose.yml b/test/docker-compose.yml index b37455a..1cb72cc 100644 --- a/test/docker-compose.yml +++ b/test/docker-compose.yml @@ -13,23 +13,23 @@ x-etcd: &etcd networks: - tarantool -x-tt: &tt - build: . - volumes: - - $PWD/../:/opt/tarantool/.rocks/share/tarantool:ro - - $PWD/app:/opt/tarantool - - $PWD/net:/opt/tarantool/net:ro - depends_on: - etcd0: - condition: service_started - etcd1: - condition: service_started - etcd2: - condition: service_started - privileged: true - networks: - - tarantool - command: ["/bin/sh", "-c", "sleep 5 && tarantool /opt/tarantool/init.lua"] +# x-tt: &tt +# build: . +# volumes: +# - $PWD/../:/opt/tarantool/.rocks/share/tarantool:ro +# - $PWD/app:/opt/tarantool +# - $PWD/net:/opt/tarantool/net:ro +# depends_on: +# etcd0: +# condition: service_started +# etcd1: +# condition: service_started +# etcd2: +# condition: service_started +# privileged: true +# networks: +# - tarantool +# command: ["/bin/sh", "-c", "sleep 5 && tarantool /opt/tarantool/init.lua"] networks: tarantool: @@ -62,28 +62,28 @@ services: ETCD_ADVERTISE_CLIENT_URLS: http://etcd2:2379 ETCD_INITIAL_ADVERTISE_PEER_URLS: http://etcd2:2380 - etcd_load: - image: registry.gitlab.com/ochaton/switchover:010a6965 - networks: - - tarantool - volumes: - - $PWD/instance.etcd.yaml:/instance.etcd.yaml:ro - depends_on: - etcd0: - condition: service_started - etcd1: - condition: service_started - etcd2: - condition: service_started - entrypoint: [''] - command: ["/bin/sh", "-c", "sleep 3 && switchover -v -e http://etcd0:2379,http://etcd1:2379,http://etcd2:2379 etcd load / /instance.etcd.yaml"] - instance_01: - <<: *tt - container_name: instance_01 - environment: - TT_INSTANCE_NAME: instance_01 - instance_02: - <<: *tt - container_name: instance_02 - environment: - TT_INSTANCE_NAME: instance_02 + # etcd_load: + # image: registry.gitlab.com/ochaton/switchover:2.7.0.20 + # networks: + # - tarantool + # volumes: + # - $PWD/instance.etcd.yaml:/instance.etcd.yaml:ro + # depends_on: + # etcd0: + # condition: service_started + # etcd1: + # condition: service_started + # etcd2: + # condition: service_started + # entrypoint: [''] + # command: ["/bin/sh", "-c", "sleep 3 && switchover -v -e http://etcd0:2379,http://etcd1:2379,http://etcd2:2379 etcd load / /instance.etcd.yaml"] + # instance_01: + # <<: *tt + # container_name: instance_01 + # environment: + # TT_INSTANCE_NAME: instance_01 + # instance_02: + # <<: *tt + # container_name: instance_02 + # environment: + # TT_INSTANCE_NAME: instance_02 From 0ade1c527580161b1b81db4adfabba8227628eb9 Mon Sep 17 00:00:00 2001 From: Vladislav Grubov Date: Sun, 10 Sep 2023 00:37:17 +0300 Subject: [PATCH 3/6] ci: fix not TTY --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ff39f20..f4de20f 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ config-test-%: config-test-builder run-etcd docker build -t $(@) --build-arg IMAGE=$(subst config-test-,,$@) -f Dockerfile.test . test-%: config-test-% - docker run -it --name $(<) \ + docker run --name $(<) \ --net tt_net \ -e TT_ETCD_ENDPOINTS="http://etcd0:2379,http://etcd1:2379,http://etcd2:2379" \ --rm -v $$(pwd):/source/config \ From d047a6ad019d8fe54352df9bef39c96a1e5c552e Mon Sep 17 00:00:00 2001 From: Vladislav Grubov Date: Wed, 13 Sep 2023 09:43:16 +0300 Subject: [PATCH 4/6] support 2.11 bootstrap_strategy and derive default_cfg * feat: support bootstrap_strategy (2.11) * feat: derive load_cfg/default_cfg * fix: ensure options of box.cfg always cleared before passing to box.cfg (or wrap_box_cfg, or boxcfg) * feat: added annotations for config and config.etcd * feat: use module-aware logger (2.11 feature) * feat: expose load_cfg/{default_cfg,...} to config._load_cfg --- .github/workflows/test.yml | 35 +++++- .luacov | 19 ++++ config.lua | 193 ++++++++++++++++++++++++-------- config/etcd.lua | 69 ++++++++++++ run_test_in_docker.sh | 2 +- spec/02_cluster_master_test.lua | 117 ++++++++++++++++++- test/Dockerfile | 2 +- test/app/conf.lua | 17 +-- test/app/init.lua | 7 +- test/instance.etcd.yaml | 10 +- 10 files changed, 394 insertions(+), 77 deletions(-) create mode 100644 .luacov diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 792c8cc..aded7c7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,4 +1,4 @@ -name: Create and push rockspec for moonlibs/config +name: Run unit tests on: push: @@ -7,13 +7,40 @@ env: ROCK_NAME: config jobs: - test-matrix: + run-luacheck: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + - uses: tarantool/setup-tarantool@v2 + with: + tarantool-version: '2.10.7' + - name: install luacheck 0.26.0 + run: tarantoolctl rocks install luacheck 0.26.0 + - name: run luacheck + run: .rocks/bin/luacheck . + run-unit-tests: runs-on: ubuntu-latest strategy: matrix: - version: ["1.10.15", "2.8.4", "2.10.6", "2.11.0", "2.11.1"] + version: ["1.10.15", "2.8.4", "2.10.6", "2.10.7-gc64-amd64", "2.11.0", "2.11.1"] steps: - uses: actions/checkout@master - uses: docker/setup-buildx-action@v2 - name: run test suite for ${{matrix.version}} - run: make test-${{matrix.version}} \ No newline at end of file + run: make test-${{matrix.version}} + run-coverage-report: + runs-on: ubuntu-latest + steps: + - uses: tarantool/setup-tarantool@v2 + with: + tarantool-version: '2.10.7' + - name: install luacov-console 1.1.0 + run: tarantoolctl rocks install luacov-console 1.1.0 + - name: install luacov-coveralls 0.2.3 + run: tarantoolctl rocks install --server=http://luarocks.org luacov-coveralls 0.2.3 + - name: prepare coverage report + run: .rocks/bin/luacov-console "$(pwd)" && .rocks/bin/luacov-console -s + # - name: publish coveralls report + # env: + # COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} + # run: .rocks/bin/luacov-coveralls -v diff --git a/.luacov b/.luacov new file mode 100644 index 0000000..c03ef6c --- /dev/null +++ b/.luacov @@ -0,0 +1,19 @@ +runreport = false +deletestats = false + +exclude = { + "spec/", + "test/", + "test_peek", + "%.rocks/", + "builtin/", +} + +coveralls = { + root = "/", + debug = true, + pathcorrect = { + { "^/home/runner/work/config/config/", "" }, + { "^/source/config", "" }, + }, +} \ No newline at end of file diff --git a/config.lua b/config.lua index 85259ba..d6a9862 100644 --- a/config.lua +++ b/config.lua @@ -1,4 +1,8 @@ +---@diagnostic disable: inject-field local log = require 'log' +if log.new then + log = log.new('moonlibs.config') +end local fio = require 'fio' local json = require 'json'.new() local yaml = require 'yaml'.new() @@ -8,6 +12,9 @@ local clock = require 'clock' json.cfg{ encode_invalid_as_nil = true } yaml.cfg{ encode_use_tostring = true } +---Retrieves all upvalues of given function and returns them as kv-map +---@param fun fun() +---@return table variables local function lookaround(fun) local vars = {} local i = 1 @@ -17,14 +24,26 @@ local function lookaround(fun) vars[n] = v i = i + 1 end - i = 1 - return vars, i - 1 + return vars end +---@private +---@class moonlibs.config.reflect_internals +---@field dynamic_cfg table +---@field default_cfg table +---@field upgrade_cfg? fun(cfg: table, translate_cfg: table): table +---@field template_cfg? table +---@field translate_cfg? table +---@field log? table + + +---Unwraps box.cfg and retrieves dynamic_cfg, default_cfg tables +---@return moonlibs.config.reflect_internals local function reflect_internals() local peek = { dynamic_cfg = {}; + default_cfg = {}; upgrade_cfg = true; translate_cfg = true; template_cfg = true; @@ -47,7 +66,12 @@ local function reflect_internals() error(string.format("Neither function nor callable argument %s after steps: %s", peekf, table.concat(steps, ", "))) end - local vars, _ = lookaround(peekf) + local vars = lookaround(peekf) + if type(vars.default_cfg) == 'table' then + for k in pairs(vars.default_cfg) do + peek.default_cfg[k] = vars.default_cfg[k] + end + end if allow_unwrap and (vars.orig_cfg or vars.origin_cfg) then -- It's a wrap of tarantoolctl/tt, unwrap and repeat peekf = (vars.orig_cfg or vars.origin_cfg) @@ -124,7 +148,11 @@ end local load_cfg = reflect_internals() --- TODO: suppress deprecation +---Filters only valid keys from given cfg +--- +---Edits given cfg and returns only clear config +---@param cfg table +---@return table local function prepare_box_cfg(cfg) -- 1. take config, if have upgrade, upgrade it if load_cfg.upgrade_cfg then @@ -311,6 +339,7 @@ local function toboolean(v) return false end +---@type table local master_selection_policies; master_selection_policies = { ['etcd.instance.single'] = function(M, instance_name, common_cfg, instance_cfg, cluster_cfg, local_cfg) @@ -373,8 +402,10 @@ master_selection_policies = { if cluster_cfg.master == instance_name then log.info("Instance is declared as cluster master, set read_only=false") cfg.box.read_only = false - cfg.box.replication_connect_quorum = 1 - cfg.box.replication_connect_timeout = 1 + if cfg.box.bootstrap_strategy ~= 'auto' then + cfg.box.replication_connect_quorum = 1 + cfg.box.replication_connect_timeout = 1 + end else log.info("Cluster has another master %s, not me %s, set read_only=true", cluster_cfg.master, instance_name) cfg.box.read_only = true @@ -472,6 +503,17 @@ local function gen_cluster_uuid(cluster_name) error("Can't generate uuid for cluster "..cluster_name, 2) end +---@class moonlibs.config.opts.etcd:moonlibs.config.etcd.opts +---@field instance_name string Mandatory name of the instance +---@field prefix string Mandatory prefix inside etcd tree +---@field uuid? 'auto' When auto config generates replicaset_uuid and instance_uuid for nodes +---@field fixed? table Optional ETCD tree + +---Loads configuration from etcd and evaluate master_selection_policy +---@param M moonlibs.config +---@param etcd_conf moonlibs.config.opts.etcd +---@param local_cfg table +---@return table local function etcd_load( M, etcd_conf, local_cfg ) local etcd @@ -552,19 +594,13 @@ local function etcd_load( M, etcd_conf, local_cfg ) print("Loaded config from etcd",yaml.encode(all_cfg)) end local common_cfg = all_cfg.common - -- local common_cfg = etcd:get_common() local all_instances_cfg = all_cfg.instances - -- local all_instances_cfg = etcd:get_instances() local instance_cfg = all_instances_cfg[instance_name] assert(instance_cfg,"Instance name "..instance_name.." is not known to etcd") - -- local all_clusters_cfg = etcd:get_clusters() local all_clusters_cfg = all_cfg.clusters or all_cfg.shards - -- print(yaml.encode(all_clusters_cfg)) - - local master_selection_policy local cluster_cfg if instance_cfg.cluster or local_cfg.cluster then @@ -628,13 +664,14 @@ local function etcd_load( M, etcd_conf, local_cfg ) "Start instance "..cfg.box.listen, " with replication:"..table.concat(cfg.box.replication,", "), string.format("timeout: %s, quorum: %s, lag: %s", - cfg.box.replication_connect_timeout or 'def:30', + cfg.box.replication_connect_timeout + or ('def:%s'):format(load_cfg.default_cfg.replication_connect_quorum or 30), cfg.box.replication_connect_quorum or 'def:full', - cfg.box.replication_sync_lag or 'def:10' + cfg.box.replication_sync_lag + or ('def:%s'):format(load_cfg.default_cfg.replication_sync_lag or 10) ) ) end - --end end -- print(yaml.encode(cfg)) @@ -675,9 +712,57 @@ local function optimal_rcq(upstreams) return rcq end +local function do_cfg(boxcfg, cfg) + for key, val in pairs(cfg) do + if load_cfg.default_cfg[key] == nil and load_cfg.dynamic_cfg[key] == nil then + local warn = string.format("Dropping non-boxcfg option '%s' given '%s'",key,val) + log.warn("%s",warn) + print(warn) + cfg[key] = nil + end + end + log.info("Just before box.cfg %s", yaml.encode(cfg)) + boxcfg(cfg) +end + + +---@class moonlibs.config.opts +---@field bypass_non_dynamic? boolean (default: true) drops every changed non-dynamic option on reconfiguration +---@field tidy_load? boolean (default: true) recoveries tarantool with read_only=true +---@field mkdir? boolean (default: false) should moonlibs/config create memtx_dir and wal_dir +---@field etcd? moonlibs.config.opts.etcd [legacy] configuration of etcd +---@field default_replication_connect_timeout? number (default: 1.1) default RCT in seconds +---@field default_election_mode? election_mode (default: candidate) option is respected only when etcd.cluster.raft is used +---@field default_synchro_quorum? string|number (default: 'N/2+1') option is respected only when etcd.cluster.raft is used +---@field default_read_only? boolean (default: false) option is respected only when etcd.instance.read_only is used (deprecated) +---@field master_selection_policy? 'etcd.cluster.master'|'etcd.cluster.vshard'|'etcd.cluster.raft'|'etcd.instance.single' master selection policy +---@field strict_mode? boolean (default: false) stricts config retrievals. if key is not found config.get will raise an exception +---@field strict? boolean (default: false) stricts config retrievals. if key is not found config.get will raise an exception +---@field default? table (default: nil) globally default options for config.get +---@field on_load? fun(conf: moonlibs.config, cfg: table) callback which is called every time config is loaded from file and ETCD +---@field load? fun(conf: moonlibs.config, cfg: table): table do not use this callback +---@field on_before_cfg? fun(conf: moonlibs.config, cfg: table) callback is called right before running box.cfg (but after on_load) +---@field boxcfg? fun(cfg: table) [legacy] when provided this function will be called instead box.cfg. tidy_load and everything else will not be used. +---@field wrap_box_cfg? fun(cfg: table) callback is called instead box.cfg. But tidy_load is respected. Use this, if you need to proxy every option to box.cfg on application side +---@field on_after_cfg? fun(conf: moonlibs.config, cfg: table) callback which is called after full tarantool configuration + +---@class moonlibs.config: moonlibs.config.opts +---@field etcd moonlibs.config.etcd +---@field public _load_cfg table +---@field public _flat table +---@field public _fencing_f? Fiber +---@operator call(moonlibs.config.opts): moonlibs.config + +---@type moonlibs.config local M M = setmetatable({ console = {}; + ---Retrieves value from config + ---@overload fun(k: string, def: any?): any? + ---@param self moonlibs.config + ---@param k string path inside config + ---@param def? any optional default value + ---@return any? get = function(self,k,def) if self ~= M then def = k @@ -696,6 +781,9 @@ local M end end },{ + ---Reinitiates moonlibs.config + ---@param args moonlibs.config.opts + ---@return moonlibs.config __call = function(_, args) -- args MUST belong to us, because of modification local file @@ -721,6 +809,7 @@ local M M.master_selection_policy = args.master_selection_policy M.default = args.default M.strict_mode = args.strict_mode or args.strict or false + M._load_cfg = load_cfg -- print("config", "loading ",file, json.encode(args)) if not file then file = get_opt() @@ -802,6 +891,10 @@ local M error("No box.* config given", 2) end + if cfg.box.remote_addr then + cfg.box.remote_addr = nil + end + if args.bypass_non_dynamic then cfg.box = prepare_box_cfg(cfg.box) end @@ -812,10 +905,6 @@ local M cfg.sys.boxcfg = nil cfg.sys.on_load = nil - -- if not cfg.box.custom_proc_title and args.instance_name then - -- cfg.box.custom_proc_title = args.instance_name - -- end - -- latest modifications and fixups if args.on_load then args.on_load(M,cfg) @@ -823,7 +912,7 @@ local M return cfg end - local cfg = load_config() + local cfg = load_config() --[[@as table]] M._flat = flatten(cfg) @@ -852,16 +941,10 @@ local M end end - if cfg.box.remote_addr then - cfg.box.remote_addr = nil - end - - - -- print(string.format("Starting app: %s", yaml.encode(cfg.box))) - local boxcfg + local boxcfg = box.cfg if args.boxcfg then - args.boxcfg( cfg.box ) + do_cfg(args.boxcfg, cfg.box) else if args.wrap_box_cfg then boxcfg = args.wrap_box_cfg @@ -877,7 +960,7 @@ local M snap_dir = "." end end - local bootstrapped = false + local bootstrapped for _,v in pairs(fio.glob(snap_dir..'/*.snap')) do bootstrapped = v end @@ -886,13 +969,15 @@ local M print("Have etcd, use tidy load") local ro = cfg.box.read_only cfg.box.read_only = true - if not ro then - -- Only if node should be master - cfg.box.replication_connect_quorum = 1 - cfg.box.replication_connect_timeout = M.default_replication_connect_timeout - elseif not cfg.box.replication_connect_quorum then - -- For replica tune up to N/2+1 - cfg.box.replication_connect_quorum = optimal_rcq(cfg.box.replication) + if cfg.box.bootstrap_strategy ~= 'auto' then + if not ro then + -- Only if node should be master + cfg.box.replication_connect_quorum = 1 + cfg.box.replication_connect_timeout = M.default_replication_connect_timeout + elseif not cfg.box.replication_connect_quorum then + -- For replica tune up to N/2+1 + cfg.box.replication_connect_quorum = optimal_rcq(cfg.box.replication) + end end log.info("Start tidy loading with ro=true%s rcq=%s rct=%s (snap=%s)", ro ~= true and string.format(' (would be %s)',ro) or '', @@ -900,17 +985,31 @@ local M bootstrapped ) else - if not cfg.box.replication_connect_quorum then - cfg.box.replication_connect_quorum = optimal_rcq(cfg.box.replication) + -- not bootstraped yet cluster + + -- if cfg.box.bootstrap_strategy == 'auto' then -- ≥ Tarantool 2.11 + -- local ro = cfg.box.read_only + -- local is_candidate = cfg.box.election_mode == 'candidate' + -- if not ro and not is_candidate then + -- -- master but not Raft/candidate + -- -- we decrease replication for master, + -- -- to allow him bootstrap himself + -- cfg.box.replication = {cfg.box.remote_addr or cfg.box.listen} + -- end + if cfg.box.bootstrap_strategy ~= 'auto' then -- < Tarantool 2.11 + if cfg.box.replication_connect_quorum == nil then + cfg.box.replication_connect_quorum = optimal_rcq(cfg.box.replication) + end end + log.info("Start non-bootstrapped tidy loading with ro=%s rcq=%s rct=%s (dir=%s)", - cfg.box.read_only, cfg.box.replication_connect_quorum, cfg.box.replication_connect_timeout, snap_dir) + cfg.box.read_only, cfg.box.replication_connect_quorum, + cfg.box.replication_connect_timeout, snap_dir + ) end end - log.info("Just before box.cfg %s", yaml.encode( cfg.box )) - - ;(boxcfg or box.cfg)( cfg.box ) + do_cfg(boxcfg, cfg.box) log.info("Reloading config after start") @@ -932,14 +1031,14 @@ local M if diff_box then log.info("Reconfigure after load with %s",require'json'.encode(diff_box)) - ;(boxcfg or box.cfg)(diff_box) + do_cfg(boxcfg, diff_box) else log.info("Config is actual after load") end M._flat = flatten(new_cfg) else - (boxcfg or box.cfg)( cfg.box ) + do_cfg(boxcfg, cfg.box) end else local replication = cfg.box.replication_source or cfg.box.replication @@ -951,12 +1050,12 @@ local M cfg.box.replication = nil cfg.box.replication_source = nil - (boxcfg or box.cfg)( cfg.box ) + do_cfg(boxcfg, cfg.box) cfg.box.replication = r cfg.box.replication_source = rs else - (boxcfg or box.cfg)( cfg.box ) + do_cfg(boxcfg, cfg.box) end end end @@ -969,7 +1068,7 @@ local M local msp = config.get('sys.master_selection_policy') if type(cfg.etcd) == 'table' and config.get('etcd.fencing_enabled') - and msp == 'etcd.cluster.master' + and (msp == 'etcd.cluster.master' or msp == 'etcd.cluster.vshard') and type(cfg.cluster) == 'string' and cfg.cluster ~= '' and config.get('etcd.reduce_listing_quorum') ~= true then diff --git a/config/etcd.lua b/config/etcd.lua index f6d5ee1..6eac97d 100644 --- a/config/etcd.lua +++ b/config/etcd.lua @@ -1,11 +1,34 @@ local json = require 'json' local log = require 'log' +if log.new then + log = log.new('moonlibs.config') +end local fiber = require 'fiber' local clock = require 'clock' local http_client = require 'http.client' local digest = require 'digest' +---@class moonlibs.config.etcd.opts +---@field endpoints? string[] (default: {'http://127.0.0.1:4001','http://127.0.0.1:2379'}) list of clientURLs to etcd +---@field timeout? number (default: 1) timeout of request to each node to etcd +---@field boolean_auto? boolean (default: false) when true each string value `true`, `false` is converted to boolean value +---@field print_config? boolean (default: false) when true loaded configuration from etcd is printed out +---@field discover_endpoints? boolean (default: true) when false connector does not automatically discovers etcd endpoints +---@field reduce_listing_quorum? boolean (default: false) when true connector does not request etcd:list with quorum +---@field login? string allows to specify username for each request (Basic-auth) +---@field password? string allows to specify password for each request (Basic-auth) + +---@class moonlibs.config.etcd +---@field endpoints string[] (default: {'http://127.0.0.1:4001','http://127.0.0.1:2379'}) list of clientURLs to etcd +---@field client http +---@field timeout number (default: 1) timeout of request to each node to etcd +---@field boolean_auto? boolean (default: false) when true each string value `true`, `false` is converted to boolean value +---@field print_config? boolean (default: false) when true loaded configuration from etcd is printed out +---@field discover_endpoints boolean (default: true) when false connector does not automatically discovers etcd endpoints +---@field reduce_listing_quorum? boolean (default: false) when true connector does not request etcd:list with quorum +---@field authorization? string Authorization header for Basic-auth (is set only when login is present) +---@field headers? table headers which are provided on each request local M = {} M.err = {} @@ -30,6 +53,10 @@ function M.errstr(code) return M.err[ tonumber(code) ] or string.format("Unknown error %s",code) end +---Creates new etcd connector +---@param mod moonlibs.config.etcd +---@param options moonlibs.config.etcd.opts +---@return moonlibs.config.etcd function M.new(mod,options) local self = setmetatable({},{__index=mod}) self.endpoints = options.endpoints or {'http://127.0.0.1:4001','http://127.0.0.1:2379'} @@ -51,6 +78,8 @@ end setmetatable(M,{ __call = M.new }) +---Discovers every ETCD endpoint by requesting clientURLs (/v2/members) +--- ?: make it parallel function M:discovery() local start_at = clock.time() local timeout = self.timeout or 1 @@ -97,6 +126,16 @@ function M:discovery() self.current = math.random(#self.endpoints) end +---@class moonlibs.etcd.request.opts +---@field deadline? number deadline of request (in seconds, fractional) +---@field timeout? number timeout of request to each node (in seconds, fractional) +---@field body? string request body (for PUT) + +---Performs etcd request +---@param method 'PUT'|'GET'|'DELETE'|'HEAD' http_method +---@param path string etcd path after /v2/ +---@param args? moonlibs.etcd.request.opts +---@return table, HTTPResponse function M:request(method, path, args ) -- path must be prefixed outside -- TODO: auth @@ -108,6 +147,8 @@ function M:request(method, path, args ) table.insert(query, '=') table.insert(query, tostring(v)) end + else + args = {} end local qs if #query > 0 then qs = '?'..table.concat(query) else qs = '' end @@ -127,9 +168,20 @@ function M:request(method, path, args ) if deadline then request_timeout = math.min(deadline-fiber.time(), request_timeout) end + local s = clock.time() local x = self.client.request(method,uri,body,{timeout = request_timeout; headers = self.headers}) lastresponse = x local status,reply = pcall(json.decode,x and x.body) + local logger = log.verbose + if x.status >= 500 then + logger = log.error + end + logger("%s %s (to:%.3fs) finished with %s%s %s (in %.3fs)", + method, uri, request_timeout, x.status, + status and reply and reply.errorCode and (reply.message or M.err[reply.errorCode] or reply.errorCode), + (x.headers or {})['X-Etcd-Index'], + clock.time()-s + ) -- 408 for timeout if x.status < 500 and x.status ~= 408 then @@ -193,6 +245,15 @@ function M:recursive_extract(cut, node, storage) if not storage then return _storage[''] end end +---@class moonlibs.config.etcd.list.opts:moonlibs.etcd.request.opts +---@field recursive? boolean (default: true) should listing be recursive +---@field quorum? boolean (default: not reduce_listing_quorum) when true requests quorum read + +---Performs listing by given path +---@param keyspath string path inside etcd +---@param opts moonlibs.config.etcd.list.opts +---@return unknown +---@return HTTPResponse function M:list(keyspath, opts) if type(opts) ~= 'table' then opts = {} @@ -218,6 +279,14 @@ function M:list(keyspath, opts) end end +---@class moonlibs.config.etcd.wait.opts +---@field timeout? number (default: etcd.timeout) timeout for each node to await changes +---@field index number etcd-index that should be awaited + +---Awaits any change in subtree recursively +---@param keyspath string +---@param args moonlibs.config.etcd.wait.opts +---@return boolean not_timed_out, HTTPResponse function M:wait(keyspath, args) args = args or {} local _, response = self:request("GET","keys"..keyspath, { diff --git a/run_test_in_docker.sh b/run_test_in_docker.sh index b9bff2e..d8bd305 100755 --- a/run_test_in_docker.sh +++ b/run_test_in_docker.sh @@ -2,5 +2,5 @@ pwd rm -rf /root/.cache/ -cp -ar /root/.rocks /source/config/.rocks +cp -ar /root/.rocks /source/config/ /source/config/.rocks/bin/luatest --coverage -v spec/ diff --git a/spec/02_cluster_master_test.lua b/spec/02_cluster_master_test.lua index 93343f3..db3877c 100644 --- a/spec/02_cluster_master_test.lua +++ b/spec/02_cluster_master_test.lua @@ -1,5 +1,6 @@ local t = require 'luatest' --[[@as luatest]] local uuid = require 'uuid' +local fiber = require 'fiber' ---@class test.config.master:luatest.group local g = t.group('master', { @@ -32,6 +33,20 @@ local init_lua = fio.pathjoin(root, 'mock', 'single', 'init.lua') local base_env local h = require 'spec.helper' + +---@class moonlibs.config.test.tarantool +---@field server luatest.server +---@field net_box_port number +---@field env table +---@field name string + +---@class moonlibs.config.test.context +---@field tts moonlibs.config.test.tarantool[] +---@field env table +---@field etcd_config table +---@field params table + +---@type table local test_ctx = {} g.before_each(function(cg) @@ -47,12 +62,12 @@ g.before_each(function(cg) base_env.TT_MEMTX_DIR = working_dir local base_config = { - etcd = { - fencing_enabled = true, - }, apps = { single = { - common = { box = { log_level = 1 } }, + common = { + etcd = { fencing_enabled = true }, + box = { log_level = 5 }, + }, clusters = { single = { master = cg.params.master, @@ -188,3 +203,97 @@ function g.test_reload(cg) }, 'get("sys") has correct fields after restart') end end + +function g.test_fencing(cg) + local ctx = test_ctx[cg.name] + t.skip_if(not ctx.etcd_config.apps.single.common.etcd.fencing_enabled, "fencing disabled") + + -- Start tarantools + h.start_all_tarantools(ctx, init_lua, root, ctx.etcd_config.apps.single.instances) + + -- Check configuration + for _, tnt in ipairs(ctx.tts) do + tnt.server:connect_net_box() + local box_cfg = tnt.server:get_box_cfg() + t.assert_covers(box_cfg, { + log_level = ctx.etcd_config.apps.single.common.box.log_level, + listen = ctx.etcd_config.apps.single.instances[tnt.name].box.listen, + read_only = ctx.etcd_config.apps.single.clusters.single.master ~= tnt.name, + }, 'box.cfg is correct') + + local conn = tnt.server --[[@as luatest.server]] + local ret = conn:exec(function() + local r = table.deepcopy(config.get('sys')) + for k, v in pairs(r) do + if type(v) == 'function' then + r[k] = nil + end + end + return r + end) + + t.assert_covers(ret, { + instance_name = tnt.name, + master_selection_policy = 'etcd.cluster.master', + file = base_env.TT_CONFIG, + }, 'get("sys") has correct fields') + end + + local master_name = ctx.params.master + + ---@type moonlibs.config.test.tarantool + local master + for _, tt in ipairs(ctx.tts) do + if tt.name == master_name then + master = tt + break + end + end + + t.assert(master, 'master is not connected') + + local ret = master.server:exec(function() + return { cfg_ro = box.cfg.read_only, ro = box.info.ro } + end) + + t.assert_equals(ret.cfg_ro, false, 'box.cfg.read_only == false (before fencing)') + t.assert_equals(ret.ro, false, 'box.info.ro == false (before fencing)') + + ctx.etcd_config.apps.single.clusters.single.master = 'not_exists' + h.upload_to_etcd(ctx.etcd_config) + + local fencing_cfg = ctx.etcd_config.apps.single.common.etcd + local fencing_timeout = fencing_cfg.fencing_timeout or 10 + local fencing_pause = fencing_cfg.fencing_pause or fencing_timeout/2 + + t.helpers.retrying({ + timeout = fencing_pause, + delay = 0.1, + }, function () + local ret = master.server:exec(function() + return { cfg_ro = box.cfg.read_only, ro = box.info.ro } + end) + assert(ret.cfg_ro, "cfg.read_only must be true") + assert(ret.ro, "info.ro must be true") + end) + + local ret = master.server:exec(function() + return { cfg_ro = box.cfg.read_only, ro = box.info.ro } + end) + + t.assert_equals(ret.cfg_ro, true, 'box.cfg.read_only == true') + t.assert_equals(ret.ro, true, 'box.info.ro == true') + + ctx.etcd_config.apps.single.clusters.single.master = master_name + h.upload_to_etcd(ctx.etcd_config) + + local deadline = 2*fencing_timeout+fiber.time() + while fiber.time() < deadline do + local ret2 = master.server:exec(function() + return { cfg_ro = box.cfg.read_only, ro = box.info.ro } + end) + + t.assert_equals(ret2.cfg_ro, true, 'box.cfg.read_only == true (double check)') + t.assert_equals(ret2.ro, true, 'box.info.ro == true (double check)') + end +end diff --git a/test/Dockerfile b/test/Dockerfile index a3e071c..a5549e2 100644 --- a/test/Dockerfile +++ b/test/Dockerfile @@ -1,4 +1,4 @@ -FROM tarantool/tarantool:2.11 +FROM tarantool/tarantool:2.11.1 RUN apk add --no-cache -u iproute2 make bind-tools WORKDIR /opt/tarantool diff --git a/test/app/conf.lua b/test/app/conf.lua index d8cd24c..1e53e8d 100644 --- a/test/app/conf.lua +++ b/test/app/conf.lua @@ -2,19 +2,8 @@ etcd = { instance_name = os.getenv("TT_INSTANCE_NAME"), prefix = '/instance', endpoints = {"http://etcd0:2379","http://etcd1:2379","http://etcd2:2379"}, - fencing_enabled = true, + fencing_enabled = false, timeout = 2, -} - -box = { - background = false, - log_level = 6, - log_format = 'plain', - - memtx_dir = '/var/lib/tarantool/snaps/', - wal_dir = '/var/lib/tarantool/xlogs', -} - -app = { - + login = 'username', + password = 'password', } diff --git a/test/app/init.lua b/test/app/init.lua index 7059c9b..bd23cde 100644 --- a/test/app/init.lua +++ b/test/app/init.lua @@ -1,9 +1,9 @@ local fiber = require "fiber" require 'package.reload' + require 'config' { mkdir = true, - print_config = true, instance_name = os.getenv("TT_INSTANCE_NAME"), file = 'conf.lua', master_selection_policy = 'etcd.cluster.master', @@ -31,9 +31,10 @@ fiber.create(function() for _ = 1, 10 do local f = fiber.create(function() fiber.self():set_joinable(true) - for i = 1, 100 do - box.space.T:replace{i, box.info.id, box.info.vclock} + for _ = 1, 10 do + box.space.T:insert{box.space.T:len(), box.info.id, box.info.vclock} end + fiber.sleep(0.001) end) table.insert(fibers, f) end diff --git a/test/instance.etcd.yaml b/test/instance.etcd.yaml index d23f6ce..9261b50 100644 --- a/test/instance.etcd.yaml +++ b/test/instance.etcd.yaml @@ -9,18 +9,22 @@ instance: fencing_timeout: 10 fencing_pause: 5 box: - replication_connect_quorum: 1 + bootstrap_strategy: auto log_level: 5 + replication_connect_timeout: 3 + listen: 0.0.0.0:3301 memtx_memory: 268435456 + memtx_dir: /var/lib/tarantool/snaps/ + wal_dir: /var/lib/tarantool/xlogs/ instances: instance_01: cluster: instance box: instance_uuid: 91157a11-0000-0001-0000-000000000000 - listen: instance_01:3301 + remote_addr: instance_01:3301 instance_02: cluster: instance box: instance_uuid: 91157a11-0000-0002-0000-000000000000 - listen: instance_02:3302 + remote_addr: instance_02:3301 ... From 2568efba3dc934616464b0d1e2b47848a8e0da41 Mon Sep 17 00:00:00 2001 From: Vladislav Grubov Date: Wed, 13 Sep 2023 09:52:39 +0300 Subject: [PATCH 5/6] ci: adds publish to coveralls.io --- .github/workflows/test.yml | 31 +++++++++++++++++++++++-------- .luacov | 4 ++++ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index aded7c7..094017c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -28,19 +28,34 @@ jobs: - uses: docker/setup-buildx-action@v2 - name: run test suite for ${{matrix.version}} run: make test-${{matrix.version}} + - name: rename luacov.stats.out + run: mv luacov.stats.out luacov.stats.out-${{matrix.version}} + - uses: actions/upload-artifact@master + with: + name: luacov.stats.out + path: luacov.stats.out-${{matrix.version}} run-coverage-report: runs-on: ubuntu-latest + needs: ["run-unit-tests"] steps: + - uses: actions/checkout@master - uses: tarantool/setup-tarantool@v2 with: tarantool-version: '2.10.7' - - name: install luacov-console 1.1.0 - run: tarantoolctl rocks install luacov-console 1.1.0 - name: install luacov-coveralls 0.2.3 - run: tarantoolctl rocks install --server=http://luarocks.org luacov-coveralls 0.2.3 + run: tarantoolctl rocks install --server=https://luarocks.org luacov-coveralls 0.2.3 + - name: install luacov-console 1.2.0 + run: tarantoolctl rocks --server http://moonlibs.github.io/rocks install luacov-console 1.2.0 + - uses: actions/download-artifact@master + with: + name: luacov.stats.out + - name: debug + run: ls -la . + - name: merge luacov.stats.out + run: cat luacov.stats.out-* | >luacov.stats.out tarantool -e 'm={} for k in io.lines() do local vs=io.read():split(" ") vs[#vs]=nil local r = m[k] if r then for i, v in pairs(vs) do r[i]=r[i]+v end else m[k]=vs end end; for k, v in pairs(m) do print(k) print(table.concat(v, " ")) end' - name: prepare coverage report - run: .rocks/bin/luacov-console "$(pwd)" && .rocks/bin/luacov-console -s - # - name: publish coveralls report - # env: - # COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} - # run: .rocks/bin/luacov-coveralls -v + run: .rocks/bin/luacov-console . && .rocks/bin/luacov-console -s + - name: publish coveralls report + env: + COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} + run: .rocks/bin/luacov-coveralls -v diff --git a/.luacov b/.luacov index c03ef6c..130ed1c 100644 --- a/.luacov +++ b/.luacov @@ -9,6 +9,10 @@ exclude = { "builtin/", } +pathcorrect = { + { "^/source/config/", "" }, +} + coveralls = { root = "/", debug = true, From b540dd98c1381d297eb6520a90c890bcf8c1592f Mon Sep 17 00:00:00 2001 From: Vladislav Grubov Date: Tue, 3 Oct 2023 19:12:27 +0300 Subject: [PATCH 6/6] feat: support config.enforce_ro() * This method is needed for autofailover mechanisms mainly to enforce read_only=true during loading phase of tarantool. Before this patch race condition existed between autofailover and moonlibs/config recovering behaviour. If master crashes but fastly restarts then it initiates long running loading phase. Master recovers as read_only=true but after returning from box.cfg moonlibs/config retrieves config from ETCD and rechecks read_only option. The race happens when autofailover changes configuration in ETCD, but master just in time returns from loading phase and applies oldest configuration. This leads cluster to split-brain. With method config.enforce_ro it is possible for external coordinator firstly enforce_ro on loading leader and receive approval that leader will not be promoted to rw until next reload configuration. tarantool is enforcable to be ro only when all of the following conditions are met: 1) Tarantool is recovering from snapshot (it was already bootstrapped) 2) Client's code do not override box.cfg with passing args.boxcfg 3) args.tidy_load is enabled (default, but can be overriden by client) 4) config uses ETCD to retreive topology. --- config.lua | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/config.lua b/config.lua index d6a9862..5ffb25c 100644 --- a/config.lua +++ b/config.lua @@ -665,7 +665,7 @@ local function etcd_load( M, etcd_conf, local_cfg ) " with replication:"..table.concat(cfg.box.replication,", "), string.format("timeout: %s, quorum: %s, lag: %s", cfg.box.replication_connect_timeout - or ('def:%s'):format(load_cfg.default_cfg.replication_connect_quorum or 30), + or ('def:%s'):format(load_cfg.default_cfg.replication_connect_timeout or 30), cfg.box.replication_connect_quorum or 'def:full', cfg.box.replication_sync_lag or ('def:%s'):format(load_cfg.default_cfg.replication_sync_lag or 10) @@ -751,6 +751,7 @@ end ---@field public _load_cfg table ---@field public _flat table ---@field public _fencing_f? Fiber +---@field public _enforced_ro? boolean ---@operator call(moonlibs.config.opts): moonlibs.config ---@type moonlibs.config @@ -779,7 +780,18 @@ local M return end end - end + end, + enforce_ro = function() + if not M._ro_enforcable then + return false, 'cannot enforce readonly' + end + M._enforced_ro = true + return true, { + info_ro = box.info.ro, + cfg_ro = box.cfg.read_only, + enforce_ro = M._enforced_ro, + } + end, },{ ---Reinitiates moonlibs.config ---@param args moonlibs.config.opts @@ -877,6 +889,8 @@ local M -- subject to change, just a PoC local etcd_conf = args.etcd or cfg.etcd + -- we can enforce ro during recovery only if we have etcd config + M._ro_enforcable = M._ro_enforcable and etcd_conf ~= nil if etcd_conf then local s = clock.time() cfg = etcd_load(M, etcd_conf, cfg) @@ -912,6 +926,9 @@ local M return cfg end + -- We cannot enforce ro if any of theese conditions not satisfied + -- Tarantool must be bootstraping with tidy_load and do not overwraps personal boxcfg + M._ro_enforcable = args.boxcfg == nil and args.tidy_load and type(box.cfg) == 'function' local cfg = load_config() --[[@as table]] M._flat = flatten(cfg) @@ -1014,6 +1031,12 @@ local M log.info("Reloading config after start") local new_cfg = load_config() + if M._enforced_ro then + log.info("Enforcing RO (should be ro=%s) because told to", new_cfg.box.read_only) + new_cfg.box.read_only = true + end + M._enforced_ro = nil + M._ro_enforcable = false local diff_box = value_diff(cfg.box, new_cfg.box) -- since load_config loads config also for reloading it removes non-dynamic options