From 074bd7c1f012f98468d89421951a7c0904efd036 Mon Sep 17 00:00:00 2001 From: qiye Date: Fri, 22 Nov 2024 10:13:54 +0800 Subject: [PATCH] [fix](bloom filter)Fix drop column with bloom filter index (#44361) Problem Summary: 1. When drop column with bloom filter, we modify the bloom filter column info 2. When replay editLog, we rebuild bloom filter info by table schema. Related PR: #41369 ### Release note Fix drop column with bloom filter index --- .../doris/alter/SchemaChangeHandler.java | 24 +++++++++++++++- .../test_bloom_filter_drop_column.out | 2 +- .../test_bloom_filter_drop_column.groovy | 28 ++++++++++++++----- 3 files changed, 45 insertions(+), 9 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java index b7a0fa5cfbc746..6eaf7d5522c96d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java @@ -434,9 +434,12 @@ private boolean processDropColumn(DropColumnClause alterClause, OlapTable olapTa // drop bloom filter column Set bfCols = olapTable.getCopiedBfColumns(); if (bfCols != null) { - Set newBfCols = new HashSet<>(); + Set newBfCols = null; for (String bfCol : bfCols) { if (!bfCol.equalsIgnoreCase(dropColName)) { + if (newBfCols == null) { + newBfCols = Sets.newHashSet(); + } newBfCols.add(bfCol); } } @@ -2953,6 +2956,25 @@ public void modifyTableLightSchemaChange(String rawSql, Database db, OlapTable o LOG.info("finished modify table's add or drop or modify columns. table: {}, job: {}, is replay: {}", olapTable.getName(), jobId, isReplay); } + // for bloom filter, rebuild bloom filter info by table schema in replay + if (isReplay) { + Set bfCols = olapTable.getCopiedBfColumns(); + if (bfCols != null) { + List columns = olapTable.getBaseSchema(); + Set newBfCols = null; + for (String bfCol : bfCols) { + for (Column column : columns) { + if (column.getName().equalsIgnoreCase(bfCol)) { + if (newBfCols == null) { + newBfCols = Sets.newHashSet(); + } + newBfCols.add(column.getName()); + } + } + } + olapTable.setBloomFilterInfo(newBfCols, olapTable.getBfFpp()); + } + } } public void replayModifyTableLightSchemaChange(TableAddOrDropColumnsInfo info) diff --git a/regression-test/data/bloom_filter_p0/test_bloom_filter_drop_column.out b/regression-test/data/bloom_filter_p0/test_bloom_filter_drop_column.out index 2c6ca8d224b728..14334dfb4b5c48 100644 --- a/regression-test/data/bloom_filter_p0/test_bloom_filter_drop_column.out +++ b/regression-test/data/bloom_filter_p0/test_bloom_filter_drop_column.out @@ -1,6 +1,6 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !select -- -1 1 +1 1 1 -- !select -- 1 \N diff --git a/regression-test/suites/bloom_filter_p0/test_bloom_filter_drop_column.groovy b/regression-test/suites/bloom_filter_p0/test_bloom_filter_drop_column.groovy index f426d4fca10a79..d83c70af30c709 100644 --- a/regression-test/suites/bloom_filter_p0/test_bloom_filter_drop_column.groovy +++ b/regression-test/suites/bloom_filter_p0/test_bloom_filter_drop_column.groovy @@ -21,13 +21,14 @@ suite("test_bloom_filter_drop_column") { sql """CREATE TABLE IF NOT EXISTS ${table_name} ( `a` varchar(150) NULL, - `c1` varchar(10) + `c1` varchar(10), + `c2` varchar(10) ) ENGINE=OLAP DUPLICATE KEY(`a`) DISTRIBUTED BY HASH(`a`) BUCKETS 1 PROPERTIES ( "replication_allocation" = "tag.location.default: 1", - "bloom_filter_columns" = "c1", + "bloom_filter_columns" = "c1, c2", "in_memory" = "false", "storage_format" = "V2" )""" @@ -51,12 +52,12 @@ suite("test_bloom_filter_drop_column") { assertTrue(useTime <= OpTimeout, "wait_for_latest_op_on_table_finish timeout") } - def assertShowCreateTableWithRetry = { tableName, expectedCondition, maxRetries, waitSeconds -> + def assertShowCreateTableWithRetry = { tableName, expectedCondition, contains, maxRetries, waitSeconds -> int attempt = 0 while (attempt < maxRetries) { def res = sql """SHOW CREATE TABLE ${tableName}""" log.info("Attempt ${attempt + 1}: show table: ${res}") - if (res && res.size() > 0 && res[0][1].contains(expectedCondition)) { + if (res && res.size() > 0 && ((contains && res[0][1].contains(expectedCondition)) || (!contains && !res[0][1].contains(expectedCondition)))) { logger.info("Attempt ${attempt + 1}: Condition met.") return } else { @@ -70,21 +71,34 @@ suite("test_bloom_filter_drop_column") { def finalRes = sql """SHOW CREATE TABLE ${tableName}""" log.info("Final attempt: show table: ${finalRes}") assertTrue(finalRes && finalRes.size() > 0, "SHOW CREATE TABLE return empty or null") - assertTrue(finalRes[0][1].contains(expectedCondition), "expected\"${expectedCondition}\",actural: ${finalRes[0][1]}") + if (contains) { + assertTrue(finalRes[0][1].contains(expectedCondition), "expected to contain \"${expectedCondition}\", actual: ${finalRes[0][1]}") + } else { + assertTrue(!finalRes[0][1].contains(expectedCondition), "expected not to contain \"${expectedCondition}\", actual: ${finalRes[0][1]}") + } } - sql """INSERT INTO ${table_name} values ('1', '1')""" + sql """INSERT INTO ${table_name} values ('1', '1', '1')""" sql "sync" qt_select """select * from ${table_name} order by a""" + assertShowCreateTableWithRetry(table_name, "\"bloom_filter_columns\" = \"c1, c2\"", true, 3, 30) // drop column c1 sql """ALTER TABLE ${table_name} DROP COLUMN c1""" wait_for_latest_op_on_table_finish(table_name, timeout) sql "sync" // show create table with retry logic - assertShowCreateTableWithRetry(table_name, "\"bloom_filter_columns\" = \"\"", 3, 30) + assertShowCreateTableWithRetry(table_name, "\"bloom_filter_columns\" = \"c2\"", true, 3, 30) + + // drop column c2 + sql """ALTER TABLE ${table_name} DROP COLUMN c2""" + wait_for_latest_op_on_table_finish(table_name, timeout) + sql "sync" + + // show create table with retry logic + assertShowCreateTableWithRetry(table_name, "\"bloom_filter_columns\" = \"\"", false, 3, 30) // add new column c1 sql """ALTER TABLE ${table_name} ADD COLUMN c1 ARRAY"""