From fd2ebacba869cd2eb5add9ce4d64e355a3ae350f Mon Sep 17 00:00:00 2001 From: Jacek Lewandowski Date: Fri, 20 Aug 2021 10:25:19 +0200 Subject: [PATCH] STAR-843: Update dtests for ULID based generation ID --- scrub_test.py | 55 +++++++++++++++++++++++---------------------------- 1 file changed, 25 insertions(+), 30 deletions(-) diff --git a/scrub_test.py b/scrub_test.py index 04c09650f4..7e2eee4049 100644 --- a/scrub_test.py +++ b/scrub_test.py @@ -171,23 +171,25 @@ def standalonescrub(self, table, *indexes, acceptable_errors=None): self.launch_standalone_scrub(KEYSPACE, '{}.{}'.format(table, index)) return self.get_sstables(table, indexes) - def increment_generation_by(self, sstable, generation_increment): + def get_latest_generation(self, sstables): """ - Set the generation number for an sstable file name + Get the latest generation ID of the provided sstables """ - return re.sub('(\d(?!\d))\-', lambda x: str(int(x.group(1)) + generation_increment) + '-', sstable) + latest_gen = None + for table_or_index, table_sstables in list(sstables.items()): + gen = max(parse.search('{}-{generation}-{}.{}', s).named['generation'] for s in table_sstables) + latest_gen = gen if latest_gen is None else max([gen, latest_gen]) + return latest_gen - def increase_sstable_generations(self, sstables): + def get_earliest_generation(self, sstables): """ - After finding the number of existing sstables, increase all of the - generations by that amount. + Get the earliest generation ID of the provided sstables """ + earliest_gen = None for table_or_index, table_sstables in list(sstables.items()): - increment_by = len(set(parse.search('{}-{increment_by}-{suffix}.{file_extention}', s).named['increment_by'] for s in table_sstables)) - sstables[table_or_index] = [self.increment_generation_by(s, increment_by) for s in table_sstables] - - logger.debug('sstables after increment {}'.format(str(sstables))) - + gen = min(parse.search('{}-{generation}-{}.{}', s).named['generation'] for s in table_sstables) + earliest_gen = gen if earliest_gen is None else min([gen, earliest_gen]) + return earliest_gen @since('2.2') class TestScrubIndexes(TestHelper): @@ -240,16 +242,15 @@ def test_scrub_static_table(self): initial_sstables = self.flush('users', 'gender_idx', 'state_idx', 'birth_year_idx') scrubbed_sstables = self.scrub('users', 'gender_idx', 'state_idx', 'birth_year_idx') - self.increase_sstable_generations(initial_sstables) - assert initial_sstables == scrubbed_sstables + assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables) users = self.query_users(session) assert initial_users == users # Scrub and check sstables and data again + initial_sstables = scrubbed_sstables scrubbed_sstables = self.scrub('users', 'gender_idx', 'state_idx', 'birth_year_idx') - self.increase_sstable_generations(initial_sstables) - assert initial_sstables == scrubbed_sstables + assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables) users = self.query_users(session) assert initial_users == users @@ -281,8 +282,7 @@ def test_standalone_scrub(self): cluster.stop() scrubbed_sstables = self.standalonescrub('users', 'gender_idx', 'state_idx', 'birth_year_idx') - self.increase_sstable_generations(initial_sstables) - assert initial_sstables == scrubbed_sstables + assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables) cluster.start() session = self.patient_cql_connection(node1) @@ -315,16 +315,14 @@ def test_scrub_collections_table(self): initial_sstables = self.flush('users', 'user_uuids_idx') scrubbed_sstables = self.scrub('users', 'user_uuids_idx') - self.increase_sstable_generations(initial_sstables) - assert initial_sstables == scrubbed_sstables + assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables) users = list(session.execute(("SELECT * from users where uuids contains {some_uuid}").format(some_uuid=_id))) assert initial_users == users + initial_sstables = scrubbed_sstables scrubbed_sstables = self.scrub('users', 'user_uuids_idx') - - self.increase_sstable_generations(initial_sstables) - assert initial_sstables == scrubbed_sstables + assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables) users = list(session.execute(("SELECT * from users where uuids contains {some_uuid}").format(some_uuid=_id))) @@ -377,16 +375,15 @@ def test_nodetool_scrub(self): initial_sstables = self.flush('users') scrubbed_sstables = self.scrub('users') - self.increase_sstable_generations(initial_sstables) - assert initial_sstables == scrubbed_sstables + assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables) users = self.query_users(session) assert initial_users == users # Scrub and check sstables and data again + initial_sstables = scrubbed_sstables scrubbed_sstables = self.scrub('users') - self.increase_sstable_generations(initial_sstables) - assert initial_sstables == scrubbed_sstables + assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables) users = self.query_users(session) assert initial_users == users @@ -418,8 +415,7 @@ def test_standalone_scrub(self): cluster.stop() scrubbed_sstables = self.standalonescrub('users') - self.increase_sstable_generations(initial_sstables) - assert initial_sstables == scrubbed_sstables + assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables) cluster.start() session = self.patient_cql_connection(node1) @@ -447,8 +443,7 @@ def test_standalone_scrub_essential_files_only(self): self.delete_non_essential_sstable_files('users') scrubbed_sstables = self.standalonescrub(table='users', acceptable_errors=["WARN.*Could not recreate or deserialize existing bloom filter, continuing with a pass-through bloom filter but this will significantly impact reads performance"]) - self.increase_sstable_generations(initial_sstables) - assert initial_sstables == scrubbed_sstables + assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables) cluster.start() session = self.patient_cql_connection(node1)