Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BugFix] It is strictly forbidden to use read-write locks inside Intend Lock #48530

Merged
merged 3 commits into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,7 @@ public void processAlterTable(AlterTableStmt stmt) throws UserException {
OlapTable olapTable = (OlapTable) table;

Locker locker = new Locker();
locker.lockTablesWithIntensiveDbLock(db, Lists.newArrayList(table.getId()), LockType.WRITE);
locker.lockDatabase(db, LockType.WRITE);
try {
if (olapTable.getState() != OlapTableState.NORMAL) {
throw InvalidOlapTableStateException.of(olapTable.getState(), olapTable.getName());
Expand Down Expand Up @@ -646,7 +646,7 @@ public void processAlterTable(AlterTableStmt stmt) throws UserException {
throw new DdlException("Invalid alter operations: " + currentAlterOps);
}
} finally {
locker.unLockTablesWithIntensiveDbLock(db, Lists.newArrayList(table.getId()), LockType.WRITE);
locker.unLockDatabase(db, LockType.WRITE);
}

// the following ops should be done outside db lock. because it contains synchronized create operation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@
import com.starrocks.common.FeConstants;
import com.starrocks.common.io.Text;
import com.starrocks.common.util.TimeUtils;
import com.starrocks.common.util.concurrent.lock.LockType;
import com.starrocks.common.util.concurrent.lock.Locker;
import com.starrocks.lake.LakeTable;
import com.starrocks.persist.gson.GsonPostProcessable;
import com.starrocks.persist.gson.GsonUtils;
Expand Down Expand Up @@ -86,7 +84,7 @@ public void setIndexTabletSchema(long indexId, String indexName, SchemaInfo sche

@Override
protected TabletMetadataUpdateAgentTask createTask(PhysicalPartition partition, MaterializedIndex index, long nodeId,
Set<Long> tablets) {
Set<Long> tablets) {
String tag = String.format("%d_%d", partition.getId(), index.getId());
TabletMetadataUpdateAgentTask task = null;
for (IndexSchemaInfo info : schemaInfos) {
Expand All @@ -103,13 +101,7 @@ protected TabletMetadataUpdateAgentTask createTask(PhysicalPartition partition,

@Override
protected void updateCatalog(Database db, LakeTable table) {
Locker locker = new Locker();
locker.lockDatabase(db, LockType.WRITE);
try {
updateCatalogUnprotected(db, table);
} finally {
locker.unLockDatabase(db, LockType.WRITE);
}
updateCatalogUnprotected(db, table);
}

private void updateCatalogUnprotected(Database db, LakeTable table) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
// limitations under the License.
package com.starrocks.common.util.concurrent.lock;

public class DeadlockException extends IllegalLockStateException {
public class DeadlockException extends LockException {
public DeadlockException(String msg) {
super(msg);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
import java.util.Set;

public abstract class Lock {
public abstract LockGrantType lock(Locker locker, LockType lockType);
public abstract LockGrantType lock(Locker locker, LockType lockType) throws LockException;

public abstract Set<Locker> release(Locker locker, LockType lockType);
public abstract Set<Locker> release(Locker locker, LockType lockType) throws LockException;

public abstract boolean isOwner(Locker locker, LockType lockType);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
// limitations under the License.
package com.starrocks.common.util.concurrent.lock;

public class IllegalLockStateException extends Exception {
public IllegalLockStateException(String msg) {
public class LockException extends Exception {
public LockException(String msg) {
super(msg);
}

public IllegalLockStateException(String msg, Exception e) {
public LockException(String msg, Exception e) {
super(msg, e);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
// limitations under the License.
package com.starrocks.common.util.concurrent.lock;

public class LockInterruptException extends IllegalLockStateException {
public class LockInterruptException extends LockException {
public LockInterruptException(Exception throwable) {
super("", throwable);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,12 @@ public LockManager() {
* @param locker The Locker to lock this on behalf of.
* @param lockType Then lock type requested
* @param timeout milliseconds to time out after if lock couldn't be obtained. 0 means block indefinitely.
* @throws LockTimeoutException when the transaction time limit was exceeded.
* @throws DeadlockException when deadlock was detected
* @throws LockTimeoutException when the transaction time limit was exceeded.
* @throws DeadlockException when deadlock was detected
* @throws LockInterruptException when catch InterruptedException
* @throws NotSupportLockException when lock request not support, such as request (S or X) lock in (IS or IX) scope
*/

public void lock(long rid, Locker locker, LockType lockType, long timeout)
throws LockInterruptException, LockTimeoutException, DeadlockException {

public void lock(long rid, Locker locker, LockType lockType, long timeout) throws LockException {
final long startTime = System.currentTimeMillis();
locker.setLockRequestTimeMs(startTime);

Expand Down Expand Up @@ -262,7 +261,7 @@ private boolean notifyVictim(Locker targetedVictim, Locker currentLocker, Long r
}
}

public void release(long rid, Locker locker, LockType lockType) {
public void release(long rid, Locker locker, LockType lockType) throws LockException {
Set<Locker> newOwners;

int lockTableIdx = getLockTableIndex(rid);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,8 @@

import com.google.common.base.Strings;

public class LockTimeoutException extends RuntimeException {

public class LockTimeoutException extends LockException {
public LockTimeoutException(String msg) {
super(Strings.nullToEmpty(msg));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ public Locker() {
* @throws LockTimeoutException when the transaction time limit was exceeded.
* @throws NotSupportLockException when not support param or operation
*/
public void lock(long rid, LockType lockType, long timeout) throws IllegalLockStateException {
public void lock(long rid, LockType lockType, long timeout) throws LockException {
if (timeout < 0) {
throw new NotSupportLockException("lock timeout value cannot be less than 0");
}
Expand All @@ -84,7 +84,7 @@ public void lock(long rid, LockType lockType, long timeout) throws IllegalLockSt
LOG.debug(this + " | LockManager acquire lock : rid " + rid + ", lock type " + lockType);
}

public void lock(long rid, LockType lockType) throws IllegalLockStateException {
public void lock(long rid, LockType lockType) throws LockException {
this.lock(rid, lockType, 0);
}

Expand All @@ -97,7 +97,11 @@ public void lock(long rid, LockType lockType) throws IllegalLockStateException {
public void release(long rid, LockType lockType) {
LockManager lockManager = GlobalStateMgr.getCurrentState().getLockManager();
LOG.debug(this + " | LockManager release lock : rid " + rid + ", lock type " + lockType);
lockManager.release(rid, this, lockType);
try {
lockManager.release(rid, this, lockType);
} catch (LockException e) {
ErrorReportException.report(ErrorCode.ERR_LOCK_ERROR, e.getMessage());
}
}

// --------------- Database locking API ---------------
Expand All @@ -110,7 +114,7 @@ public void lockDatabase(Database database, LockType lockType) {
Preconditions.checkState(database != null);
try {
lock(database.getId(), lockType, 0);
} catch (IllegalLockStateException e) {
} catch (LockException e) {
ErrorReportException.report(ErrorCode.ERR_LOCK_ERROR, e.getMessage());
}
} else {
Expand Down Expand Up @@ -152,7 +156,7 @@ public boolean tryLockDatabase(Database database, LockType lockType, long timeou
return true;
} catch (LockTimeoutException e) {
return false;
} catch (IllegalLockStateException e) {
} catch (LockException e) {
ErrorReportException.report(ErrorCode.ERR_LOCK_ERROR, e.getMessage());
return false;
}
Expand Down Expand Up @@ -211,6 +215,7 @@ public void unlockDatabases(List<Database> dbs, LockType lockType) {

/**
* Try to lock databases in ascending order of id.
*
* @return: true if all databases are locked successfully, false otherwise.
*/
public boolean tryLockDatabases(List<Database> dbs, LockType lockType, long timeout, TimeUnit unit) {
Expand Down Expand Up @@ -293,7 +298,7 @@ public void lockTablesWithIntensiveDbLock(Database database, List<Long> tableLis
for (Long rid : tableListClone) {
this.lock(rid, lockType, 0);
}
} catch (IllegalLockStateException e) {
} catch (LockException e) {
ErrorReportException.report(ErrorCode.ERR_LOCK_ERROR, e.getMessage());
}
} else {
Expand All @@ -313,7 +318,7 @@ public boolean tryLockTablesWithIntensiveDbLock(Database database, List<Long> ta
} else {
this.lock(database.getId(), LockType.INTENTION_SHARED, timeout);
}
} catch (IllegalLockStateException e) {
} catch (LockException e) {
return false;
}

Expand All @@ -326,7 +331,7 @@ public boolean tryLockTablesWithIntensiveDbLock(Database database, List<Long> ta
}

return true;
} catch (IllegalLockStateException e) {
} catch (LockException e) {
if (lockType == LockType.WRITE) {
release(database.getId(), LockType.INTENTION_EXCLUSIVE);
} else {
Expand Down Expand Up @@ -368,8 +373,9 @@ public void unLockTablesWithIntensiveDbLock(Database database, List<Long> tableL

/**
* Lock database and table with intensive db lock.
*
* @param database database for intensive db lock
* @param table table to be locked
* @param table table to be locked
* @param lockType lock type
* @return true if database exits, false otherwise
*/
Expand Down Expand Up @@ -415,8 +421,9 @@ public void unLockDatabase(Database database, Long tableId, LockType lockType) {

/**
* Lock table with intensive db lock.
*
* @param database db for intensive db lock
* @param tableId table to be locked
* @param tableId table to be locked
* @param lockType lock type
*/
public void lockTableWithIntensiveDbLock(Database database, Long tableId, LockType lockType) {
Expand All @@ -429,7 +436,7 @@ public void lockTableWithIntensiveDbLock(Database database, Long tableId, LockTy
this.lock(database.getId(), LockType.INTENTION_SHARED, 0);
}
this.lock(tableId, lockType, 0);
} catch (IllegalLockStateException e) {
} catch (LockException e) {
ErrorReportException.report(ErrorCode.ERR_LOCK_ERROR, e.getMessage());
}
} else {
Expand All @@ -440,6 +447,7 @@ public void lockTableWithIntensiveDbLock(Database database, Long tableId, LockTy

/**
* Try lock database and table with intensive db lock.
*
* @return try if try lock success, false otherwise.
*/
public boolean tryLockTableWithIntensiveDbLock(Database db, Table table, LockType lockType, long timeout, TimeUnit unit) {
Expand All @@ -448,6 +456,7 @@ public boolean tryLockTableWithIntensiveDbLock(Database db, Table table, LockTyp

/**
* Try lock database and table id with intensive db lock.
*
* @return try if try lock success, false otherwise.
*/
public boolean tryLockTableWithIntensiveDbLock(Database db, Long tableId, LockType lockType, long timeout, TimeUnit unit) {
Expand All @@ -467,6 +476,7 @@ public boolean tryLockTableWithIntensiveDbLock(Database db, Long tableId, LockTy

/**
* Try lock database and tables with intensive db lock.
*
* @return try if try lock success, false otherwise.
*/
public boolean tryLockTableWithIntensiveDbLock(LockParams lockParams, LockType lockType, long timeout, TimeUnit unit) {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The most risky bug in this code is:
Catching LockException in the release method might suppress important exceptions and not handle them properly.

You can modify the code like this:

public void release(long rid, LockType lockType) {
    LockManager lockManager = GlobalStateMgr.getCurrentState().getLockManager();
    LOG.debug(this + " | LockManager release lock : rid " + rid + ", lock type " + lockType);
    try {
        lockManager.release(rid, this, lockType);
    } catch (NotSupportLockException e) {
        ErrorReportException.report(ErrorCode.ERR_LOCK_ERROR, e.getMessage());
        throw e;
    } catch (LockTimeoutException e) {
        ErrorReportException.report(ErrorCode.ERR_LOCK_TIMEOUT, e.getMessage());
        throw e;
    } catch (LockException e) {
        ErrorReportException.report(ErrorCode.ERR_LOCK_ERROR, e.getMessage());
        throw e;
    }
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public MultiUserLock(LockHolder lockHolder) {
}

@Override
public LockGrantType lock(Locker locker, LockType lockType) {
public LockGrantType lock(Locker locker, LockType lockType) throws LockException {
LockHolder lockHolderRequest = new LockHolder(locker, lockType);
LockGrantType lockGrantType = tryLock(lockHolderRequest);
if (lockGrantType == LockGrantType.NEW) {
Expand All @@ -58,7 +58,7 @@ public LockGrantType lock(Locker locker, LockType lockType) {
return lockGrantType;
}

private LockGrantType tryLock(LockHolder lockHolderRequest) {
private LockGrantType tryLock(LockHolder lockHolderRequest) throws LockException {
if (ownerNum() == 0) {
return LockGrantType.NEW;
}
Expand Down Expand Up @@ -91,10 +91,24 @@ private LockGrantType tryLock(LockHolder lockHolderRequest) {
lockOwner.getLocker(), lockOwner.getLockType(), lockHolderRequest.getLockType(), this);
}

if (lockHolderRequest.getLockType().equals(lockOwner.getLockType())) {
LockType lockOwnerLockType = lockOwner.getLockType();
LockType lockRequestLockType = lockHolderRequest.getLockType();

if (lockRequestLockType.equals(lockOwnerLockType)) {
lockOwner.increaseRefCount();
return LockGrantType.EXISTING;
} else {
/*
* This does not conform to the use of hierarchical locks.
* The outer layer has already obtained the intention lock,
* and the inner layer code should not apply for read-write locks.
*/
if ((lockOwnerLockType == LockType.INTENTION_SHARED || lockOwnerLockType == LockType.INTENTION_EXCLUSIVE)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should mark the lock table with intention or not

&& (lockRequestLockType == LockType.READ || lockRequestLockType == LockType.WRITE)) {
throw new NotSupportLockException("Can't request " + lockRequestLockType
+ " in the scope of " + lockOwnerLockType + ", " + lockOwner.getLocker().getLockerStackTrace());
}

/*
* The same Locker can upgrade or degrade locks when it requests different types of locks
*
Expand Down Expand Up @@ -128,7 +142,7 @@ private LockGrantType tryLock(LockHolder lockHolderRequest) {
}

@Override
public Set<Locker> release(Locker locker, LockType lockType) {
public Set<Locker> release(Locker locker, LockType lockType) throws LockException {
boolean hasOwner = false;
boolean reentrantLock = false;
LockHolder lockHolder = new LockHolder(locker, lockType);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The most risky bug in this code is:
There's potential for an exception to be thrown without being handled, which could propagate and cause the system to crash unexpectedly. Specifically, the NotSupportLockException may be unchecked and could result in a runtime crash if not appropriately documented and anticipated by calling methods.

You can modify the code like this:

public MultiUserLock(LockHolder lockHolder) { }

@Override
public LockGrantType lock(Locker locker, LockType lockType) throws LockException {
    LockHolder lockHolderRequest = new LockHolder(locker, lockType);
    LockGrantType lockGrantType = tryLock(lockHolderRequest);
    if (lockGrantType == LockGrantType.NEW) {
        addHolder(locker, lockType);
    }
    return lockGrantType;
}

private LockGrantType tryLock(LockHolder lockHolderRequest) throws LockException, NotSupportLockException {
    if (ownerNum() == 0) {
        return LockGrantType.NEW;
    }

    for (LockHolder lockOwner : currentOwners()) {
        if (!lockOwner.getLocker().equals(lockHolderRequest.getLocker())) {
            checkCompatibility(lockOwner.getLocker(), lockOwner.getLockType(), lockHolderRequest.getLockType(), this);
        }

        LockType lockOwnerLockType = lockOwner.getLockType();
        LockType lockRequestLockType = lockHolderRequest.getLockType();

        if (lockRequestLockType.equals(lockOwnerLockType)) {
            lockOwner.increaseRefCount();
            return LockGrantType.EXISTING;
        } else {
            if ((lockOwnerLockType == LockType.INTENTION_SHARED || lockOwnerLockType == LockType.INTENTION_EXCLUSIVE)
                    && (lockRequestLockType == LockType.READ || lockRequestLockType == LockType.WRITE)) {
                throw new NotSupportLockException("Can't request " + lockRequestLockType
                        + " in the scope of " + lockOwnerLockType + ", " + lockOwner);
            }

            // Handle upgrading or downgrading locks here
        }
    }
    return LockGrantType.WAIT;
}

@Override
public Set<Locker> release(Locker locker, LockType lockType) throws LockException {
    boolean hasOwner = false;
    boolean reentrantLock = false;
    LockHolder lockHolder = new LockHolder(locker, lockType);

    // Implementation of release logic

    return new HashSet<>();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
// limitations under the License.
package com.starrocks.common.util.concurrent.lock;

public class NotSupportLockException extends IllegalLockStateException {
public class NotSupportLockException extends LockException {
public NotSupportLockException() {
super("Lock operations under the new framework are currently not supported.");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,13 @@
import com.starrocks.catalog.TabletInvertedIndex;
import com.starrocks.catalog.TabletMeta;
import com.starrocks.common.Config;
import com.starrocks.common.ErrorCode;
import com.starrocks.common.ErrorReportException;
import com.starrocks.common.MetaNotFoundException;
import com.starrocks.common.NotImplementedException;
import com.starrocks.common.Pair;
import com.starrocks.common.UserException;
import com.starrocks.common.util.concurrent.lock.LockTimeoutException;
import com.starrocks.common.util.concurrent.lock.LockType;
import com.starrocks.common.util.concurrent.lock.Locker;
import com.starrocks.lake.LakeTablet;
Expand Down Expand Up @@ -1301,6 +1304,8 @@ public TCommitRemoteTxnResponse commitRemoteTxn(TCommitRemoteTxnRequest request)
status.setError_msgs(Lists.newArrayList(e.getMessage()));
response.setStatus(status);
return response;
} catch (LockTimeoutException e) {
ErrorReportException.report(ErrorCode.ERR_LOCK_ERROR, e.getMessage());
}

TStatus status = new TStatus(TStatusCode.OK);
Expand Down
14 changes: 11 additions & 3 deletions fe/fe-core/src/main/java/com/starrocks/load/OlapDeleteJob.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,14 @@
import com.starrocks.catalog.TabletInvertedIndex;
import com.starrocks.common.Config;
import com.starrocks.common.DdlException;
import com.starrocks.common.ErrorCode;
import com.starrocks.common.ErrorReportException;
import com.starrocks.common.FeConstants;
import com.starrocks.common.MetaNotFoundException;
import com.starrocks.common.Status;
import com.starrocks.common.UserException;
import com.starrocks.common.util.concurrent.MarkedCountDownLatch;
import com.starrocks.common.util.concurrent.lock.LockTimeoutException;
import com.starrocks.common.util.concurrent.lock.LockType;
import com.starrocks.common.util.concurrent.lock.Locker;
import com.starrocks.qe.QueryStateException;
Expand Down Expand Up @@ -387,9 +390,14 @@ TTaskType.REALTIME_PUSH, getTransactionId(),
public boolean commitImpl(Database db, long timeoutMs) throws UserException {
long transactionId = getTransactionId();
GlobalTransactionMgr globalTransactionMgr = GlobalStateMgr.getCurrentState().getGlobalTransactionMgr();
return globalTransactionMgr.commitAndPublishTransaction(db, transactionId, getTabletCommitInfos(),
getTabletFailInfos(), timeoutMs,
new InsertTxnCommitAttachment());
try {
return globalTransactionMgr.commitAndPublishTransaction(db, transactionId, getTabletCommitInfos(),
getTabletFailInfos(), timeoutMs,
new InsertTxnCommitAttachment());
} catch (LockTimeoutException e) {
ErrorReportException.report(ErrorCode.ERR_LOCK_ERROR, e.getMessage());
return false;
}
}

@Override
Expand Down
Loading
Loading