Skip to content

Commit

Permalink
[Fix](full compaction) Full compaction should not do ordered data com…
Browse files Browse the repository at this point in the history
…paction (#44359)


Problem: For a duplicate table with the following distribution, if it
has already completed cumulative compaction and then undergoes full
compaction, it will cause a BE core issue. Check failed: new_point ==
Tablet::K_INVALID_CUMULATIVE_POINT || new_point >= _cumulative_point
Unexpected cumulative point: 1087, origin: 2801.
"rowsets": [
"[0-386] 0 DATA NONOVERLAPPING
02000000000198aabe4290f2b0f5f35610c08a233a061892 0",
"[387-387] 0 DELETE OVERLAP_UNKNOWN
0200000000541310ac4d76e7580a708a2823a4d7a4f06090 0",
"[388-388] 0 DELETE OVERLAP_UNKNOWN
0200000000541d76ac4d76e7580a708a2823a4d7a4f06090 0",
"[389-389] 0 DELETE OVERLAP_UNKNOWN
0200000000543b4dac4d76e7580a708a2823a4d7a4f06090 0",
"[390-390] 0 DELETE OVERLAP_UNKNOWN
02000000005453aeac4d76e7580a708a2823a4d7a4f06090 0",
"[391-391] 0 DELETE OVERLAP_UNKNOWN
0200000000546a44ac4d76e7580a708a2823a4d7a4f06090 0",
"[392-392] 0 DELETE OVERLAP_UNKNOWN
02000000005480dbac4d76e7580a708a2823a4d7a4f06090 0",
"[393-393] 0 DELETE OVERLAP_UNKNOWN
0200000000548cb3ac4d76e7580a708a2823a4d7a4f06090 0",
"[394-394] 0 DELETE OVERLAP_UNKNOWN
0200000000549a25ac4d76e7580a708a2823a4d7a4f06090 0",
"[395-395] 0 DELETE OVERLAP_UNKNOWN
020000000054b359ac4d76e7580a708a2823a4d7a4f06090 0",
"[396-396] 0 DELETE OVERLAP_UNKNOWN
020000000054c19dac4d76e7580a708a2823a4d7a4f06090 0",
"[397-397] 0 DELETE OVERLAP_UNKNOWN
020000000054d757ac4d76e7580a708a2823a4d7a4f06090 0",
        ...
"[1085-1085] 0 DELETE OVERLAP_UNKNOWN
02000000002a0b20bd4798638f237008ff42fbca276b52a2 0",
"[1087-1506] 1 DATA NONOVERLAPPING
020000000000047e3b452de14ceaad2e78a87526026d2290 326.10 KB",
"[1087-1506] 1 DATA NONOVERLAPPING
020000000000047e3b452de14ceaad2e78a87526026d2290 326.10 KB",
        ...
"[2800-2800] 0 DELETE OVERLAP_UNKNOWN
02000000002f12d6bd4798638f237008ff42fbca276b52a2 0"

Reason: The duplicate table will go through ordered data compaction. Due
to the special distribution of the table, the input rowset will be cut
by the ordered data compaction, resulting in the full compaction only
being performed on a part of the rowsets.

Solution: For full compaction, prohibit ordered data compaction.
  • Loading branch information
Yukang-Lian authored Nov 21, 2024
1 parent 867bd15 commit ca579c1
Show file tree
Hide file tree
Showing 3 changed files with 215 additions and 2 deletions.
6 changes: 4 additions & 2 deletions be/src/olap/compaction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include "io/fs/file_system.h"
#include "io/fs/file_writer.h"
#include "io/fs/remote_file_system.h"
#include "io/io_common.h"
#include "olap/cumulative_compaction_policy.h"
#include "olap/cumulative_compaction_time_series_policy.h"
#include "olap/data_dir.h"
Expand Down Expand Up @@ -345,8 +346,9 @@ bool CompactionMixin::handle_ordered_data_compaction() {
if (!config::enable_ordered_data_compaction) {
return false;
}
if (compaction_type() == ReaderType::READER_COLD_DATA_COMPACTION) {
// The remote file system does not support to link files.
if (compaction_type() == ReaderType::READER_COLD_DATA_COMPACTION ||
compaction_type() == ReaderType::READER_FULL_COMPACTION) {
// The remote file system and full compaction does not support to link files.
return false;
}
if (_tablet->keys_type() == KeysType::UNIQUE_KEYS &&
Expand Down
3 changes: 3 additions & 0 deletions be/src/olap/full_compaction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ Status FullCompaction::prepare_compact() {
std::unique_lock cumu_lock(tablet()->get_cumulative_compaction_lock());
tablet()->set_is_full_compaction_running(true);

DBUG_EXECUTE_IF("FullCompaction.prepare_compact.set_cumu_point",
{ tablet()->set_cumulative_layer_point(tablet()->max_version_unlocked() + 1); })

// 1. pick rowsets to compact
RETURN_IF_ERROR(pick_rowsets_to_compact());

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

import org.codehaus.groovy.runtime.IOGroovyMethods

suite("test_full_compaction_with_ordered_data","nonConcurrent") {
if (isCloudMode()) {
return
}
def tableName = "test_full_compaction_with_ordered_data"

sql """ DROP TABLE IF EXISTS ${tableName} """

String backend_id;

def backendId_to_backendIP = [:]
def backendId_to_backendHttpPort = [:]
getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort);

backend_id = backendId_to_backendIP.keySet()[0]

sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
`k` int ,
`v` int ,
) engine=olap
DUPLICATE KEY(k)
DISTRIBUTED BY HASH(k)
BUCKETS 3
properties(
"replication_num" = "1",
"disable_auto_compaction" = "true")
"""
sql """ INSERT INTO ${tableName} VALUES (0,0),(1,1),(2,2)"""
sql """ delete from ${tableName} where k=0"""
sql """ delete from ${tableName} where k=1"""
sql """ delete from ${tableName} where k=2"""

def exception = false;
try {
def tablets = sql_return_maparray """ show tablets from ${tableName}; """

def replicaNum = get_table_replica_num(tableName)
logger.info("get table replica num: " + replicaNum)
// before full compaction, there are 12 rowsets.
int rowsetCount = 0
for (def tablet in tablets) {
String tablet_id = tablet.TabletId
(code, out, err) = curl("GET", tablet.CompactionStatus)
logger.info("Show tablets status: code=" + code + ", out=" + out + ", err=" + err)
assertEquals(code, 0)
def tabletJson = parseJson(out.trim())
assert tabletJson.rowsets instanceof List
rowsetCount +=((List<String>) tabletJson.rowsets).size()
}
assert (rowsetCount == 5 * replicaNum * 3)

// trigger full compactions for all tablets in ${tableName}
for (def tablet in tablets) {
String tablet_id = tablet.TabletId
backend_id = tablet.BackendId
times = 1

do{
(code, out, err) = be_run_full_compaction(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id)
logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err)
++times
sleep(2000)
} while (parseJson(out.trim()).status.toLowerCase()!="success" && times<=10)

}

// wait for full compaction done
for (def tablet in tablets) {
boolean running = true
do {
Thread.sleep(1000)
String tablet_id = tablet.TabletId
backend_id = tablet.BackendId
(code, out, err) = be_get_compaction_status(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id)
logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err)
assertEquals(code, 0)
def compactionStatus = parseJson(out.trim())
assertEquals("success", compactionStatus.status.toLowerCase())
running = compactionStatus.run_status
} while (running)
}

// after full compaction, there is only 1 rowset.

rowsetCount = 0
for (def tablet in tablets) {
String tablet_id = tablet.TabletId
(code, out, err) = curl("GET", tablet.CompactionStatus)
logger.info("Show tablets status: code=" + code + ", out=" + out + ", err=" + err)
assertEquals(code, 0)
def tabletJson = parseJson(out.trim())
assert tabletJson.rowsets instanceof List
rowsetCount +=((List<String>) tabletJson.rowsets).size()
}
assert (rowsetCount == 1 * replicaNum * 3)
} catch (Exception e) {
logger.info(e.getMessage())
exception = true;
} finally {
assertFalse(exception)
}

sql """ delete from ${tableName} where k=0"""
sql """ delete from ${tableName} where k=1"""
sql """ delete from ${tableName} where k=2"""
sql """ delete from ${tableName} where k=3"""
sql """ delete from ${tableName} where k=4"""
sql """ delete from ${tableName} where k=5"""
sql """ delete from ${tableName} where k=6"""
sql """ delete from ${tableName} where k=7"""
sql """ delete from ${tableName} where k=8"""
sql """ delete from ${tableName} where k=9"""
sql """ INSERT INTO ${tableName} VALUES (10,10)"""

GetDebugPoint().clearDebugPointsForAllBEs()

exception = false;
try {
GetDebugPoint().enableDebugPointForAllBEs("FullCompaction.prepare_compact.set_cumu_point")
def tablets = sql_return_maparray """ show tablets from ${tableName}; """

def replicaNum = get_table_replica_num(tableName)
logger.info("get table replica num: " + replicaNum)
// before full compaction, there are 12 rowsets.
int rowsetCount = 0
for (def tablet in tablets) {
String tablet_id = tablet.TabletId
(code, out, err) = curl("GET", tablet.CompactionStatus)
logger.info("Show tablets status: code=" + code + ", out=" + out + ", err=" + err)
assertEquals(code, 0)
def tabletJson = parseJson(out.trim())
assert tabletJson.rowsets instanceof List
rowsetCount +=((List<String>) tabletJson.rowsets).size()
}
assert (rowsetCount == 12 * replicaNum * 3)

// trigger full compactions for all tablets in ${tableName}
for (def tablet in tablets) {
String tablet_id = tablet.TabletId
backend_id = tablet.BackendId
times = 1

do{
(code, out, err) = be_run_full_compaction(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id)
logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err)
++times
sleep(2000)
} while (parseJson(out.trim()).status.toLowerCase()!="success" && times<=10)

}

// wait for full compaction done
for (def tablet in tablets) {
boolean running = true
do {
Thread.sleep(1000)
String tablet_id = tablet.TabletId
backend_id = tablet.BackendId
(code, out, err) = be_get_compaction_status(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id)
logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err)
assertEquals(code, 0)
def compactionStatus = parseJson(out.trim())
assertEquals("success", compactionStatus.status.toLowerCase())
running = compactionStatus.run_status
} while (running)
}

// after full compaction, there is only 1 rowset.

rowsetCount = 0
for (def tablet in tablets) {
String tablet_id = tablet.TabletId
(code, out, err) = curl("GET", tablet.CompactionStatus)
logger.info("Show tablets status: code=" + code + ", out=" + out + ", err=" + err)
assertEquals(code, 0)
def tabletJson = parseJson(out.trim())
assert tabletJson.rowsets instanceof List
rowsetCount +=((List<String>) tabletJson.rowsets).size()
}
assert (rowsetCount == 1 * replicaNum * 3)
} catch (Exception e) {
logger.info(e.getMessage())
exception = true;
} finally {
GetDebugPoint().disableDebugPointForAllBEs("FullCompaction.prepare_compact.set_cumu_point")
assertFalse(exception)
}
}

0 comments on commit ca579c1

Please sign in to comment.