Skip to content

Commit

Permalink
[fix](inverted index) Conduct More Checks on dict_compression
Browse files Browse the repository at this point in the history
  • Loading branch information
zzzxl1993 committed Dec 23, 2024
1 parent f01f759 commit 16a3160
Show file tree
Hide file tree
Showing 9 changed files with 203 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2752,7 +2752,8 @@ private boolean processAddIndex(CreateIndexClause alterClause, OlapTable olapTab
if (column != null) {
indexDef.checkColumn(column, olapTable.getKeysType(),
olapTable.getTableProperty().getEnableUniqueKeyMergeOnWrite(),
disableInvertedIndexV1ForVariant);
olapTable.getInvertedIndexFileStorageFormat(),
disableInvertedIndexV1ForVariant);
indexDef.getColumnUniqueIds().add(column.getUniqueId());
} else {
throw new DdlException("index column does not exist in table. invalid column: " + col);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -599,8 +599,10 @@ public void analyze(Analyzer analyzer) throws UserException {
if (CollectionUtils.isNotEmpty(indexDefs)) {
Set<String> distinct = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
Set<Pair<IndexType, List<String>>> distinctCol = new HashSet<>();
boolean disableInvertedIndexV1ForVariant = PropertyAnalyzer.analyzeInvertedIndexFileStorageFormat(
new HashMap<>(properties)) == TInvertedIndexFileStorageFormat.V1
TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat = PropertyAnalyzer
.analyzeInvertedIndexFileStorageFormat(new HashMap<>(properties));
boolean disableInvertedIndexV1ForVariant =
(invertedIndexFileStorageFormat == TInvertedIndexFileStorageFormat.V1)
&& ConnectContext.get().getSessionVariable().getDisableInvertedIndexV1ForVaraint();
for (IndexDef indexDef : indexDefs) {
indexDef.analyze();
Expand All @@ -611,8 +613,11 @@ public void analyze(Analyzer analyzer) throws UserException {
boolean found = false;
for (Column column : columns) {
if (column.getName().equalsIgnoreCase(indexColName)) {
indexDef.checkColumn(column, getKeysDesc().getKeysType(),
enableUniqueKeyMergeOnWrite, disableInvertedIndexV1ForVariant);
indexDef.checkColumn(column,
getKeysDesc().getKeysType(),
enableUniqueKeyMergeOnWrite,
invertedIndexFileStorageFormat,
disableInvertedIndexV1ForVariant);
found = true;
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.thrift.TInvertedIndexFileStorageFormat;

import com.google.common.base.Strings;
import com.google.common.collect.Lists;
Expand Down Expand Up @@ -218,7 +219,8 @@ public boolean isInvertedIndex() {
}

public void checkColumn(Column column, KeysType keysType, boolean enableUniqueKeyMergeOnWrite,
boolean disableInvertedIndexV1ForVariant) throws AnalysisException {
TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat,
boolean disableInvertedIndexV1ForVariant) throws AnalysisException {
if (indexType == IndexType.BITMAP || indexType == IndexType.INVERTED || indexType == IndexType.BLOOMFILTER
|| indexType == IndexType.NGRAM_BF) {
String indexColName = column.getName();
Expand Down Expand Up @@ -251,7 +253,8 @@ public void checkColumn(Column column, KeysType keysType, boolean enableUniqueKe
}

if (indexType == IndexType.INVERTED) {
InvertedIndexUtil.checkInvertedIndexParser(indexColName, colType, properties);
InvertedIndexUtil.checkInvertedIndexParser(indexColName, colType, properties,
invertedIndexFileStorageFormat);
} else if (indexType == IndexType.NGRAM_BF) {
if (colType != PrimitiveType.CHAR && colType != PrimitiveType.VARCHAR
&& colType != PrimitiveType.STRING) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.thrift.TInvertedIndexFileStorageFormat;

import java.util.Arrays;
import java.util.HashMap;
Expand Down Expand Up @@ -116,11 +117,12 @@ public static String getInvertedIndexParserStopwords(Map<String, String> propert
}

public static void checkInvertedIndexParser(String indexColName, PrimitiveType colType,
Map<String, String> properties) throws AnalysisException {
Map<String, String> properties,
TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat) throws AnalysisException {
String parser = null;
if (properties != null) {
parser = properties.get(INVERTED_INDEX_PARSER_KEY);
checkInvertedIndexProperties(properties);
checkInvertedIndexProperties(properties, colType, invertedIndexFileStorageFormat);
}

// default is "none" if not set
Expand Down Expand Up @@ -149,7 +151,8 @@ public static void checkInvertedIndexParser(String indexColName, PrimitiveType c
}
}

public static void checkInvertedIndexProperties(Map<String, String> properties) throws AnalysisException {
public static void checkInvertedIndexProperties(Map<String, String> properties, PrimitiveType colType,
TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat) throws AnalysisException {
Set<String> allowedKeys = new HashSet<>(Arrays.asList(
INVERTED_INDEX_PARSER_KEY,
INVERTED_INDEX_PARSER_MODE_KEY,
Expand Down Expand Up @@ -226,10 +229,22 @@ public static void checkInvertedIndexProperties(Map<String, String> properties)
+ ", stopWords must be none");
}

if (dictCompression != null && !dictCompression.matches("true|false")) {
throw new AnalysisException(
"Invalid inverted index 'dict_compression' value: "
+ dictCompression + ", dict_compression must be true or false");
if (dictCompression != null) {
if (!colType.isStringType()) {
throw new AnalysisException("dict_compression can only be set for StringType columns. type: "
+ colType);
}

if (!dictCompression.matches("true|false")) {
throw new AnalysisException(
"Invalid inverted index 'dict_compression' value: "
+ dictCompression + ", dict_compression must be true or false");
}

if (invertedIndexFileStorageFormat != TInvertedIndexFileStorageFormat.V3) {
throw new AnalysisException(
"dict_compression can only be set when storage format is V3");
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -643,10 +643,12 @@ public void validate(ConnectContext ctx) {
Set<String> distinct = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
Set<Pair<IndexDef.IndexType, List<String>>> distinctCol = new HashSet<>();
boolean disableInvertedIndexV1ForVariant = false;
TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat;
try {
disableInvertedIndexV1ForVariant = PropertyAnalyzer.analyzeInvertedIndexFileStorageFormat(
new HashMap<>(properties)) == TInvertedIndexFileStorageFormat.V1
&& ConnectContext.get().getSessionVariable().getDisableInvertedIndexV1ForVaraint();
invertedIndexFileStorageFormat = PropertyAnalyzer.analyzeInvertedIndexFileStorageFormat(
new HashMap<>(properties));
disableInvertedIndexV1ForVariant = invertedIndexFileStorageFormat == TInvertedIndexFileStorageFormat.V1
&& ConnectContext.get().getSessionVariable().getDisableInvertedIndexV1ForVaraint();
} catch (Exception e) {
throw new AnalysisException(e.getMessage(), e.getCause());
}
Expand All @@ -662,7 +664,7 @@ public void validate(ConnectContext ctx) {
for (ColumnDefinition column : columns) {
if (column.getName().equalsIgnoreCase(indexColName)) {
indexDef.checkColumn(column, keysType, isEnableMergeOnWrite,
disableInvertedIndexV1ForVariant);
invertedIndexFileStorageFormat, disableInvertedIndexV1ForVariant);
found = true;
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.types.DataType;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.thrift.TInvertedIndexFileStorageFormat;

import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
Expand Down Expand Up @@ -93,7 +94,9 @@ public IndexDefinition(String name, List<String> cols, String indexTypeName,
* checkColumn
*/
public void checkColumn(ColumnDefinition column, KeysType keysType,
boolean enableUniqueKeyMergeOnWrite, boolean disableInvertedIndexV1ForVariant) throws AnalysisException {
boolean enableUniqueKeyMergeOnWrite,
TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat,
boolean disableInvertedIndexV1ForVariant) throws AnalysisException {
if (indexType == IndexType.BITMAP || indexType == IndexType.INVERTED
|| indexType == IndexType.BLOOMFILTER || indexType == IndexType.NGRAM_BF) {
String indexColName = column.getName();
Expand Down Expand Up @@ -129,7 +132,8 @@ public void checkColumn(ColumnDefinition column, KeysType keysType,
if (indexType == IndexType.INVERTED) {
try {
InvertedIndexUtil.checkInvertedIndexParser(indexColName,
colType.toCatalogDataType().getPrimitiveType(), properties);
colType.toCatalogDataType().getPrimitiveType(), properties,
invertedIndexFileStorageFormat);
} catch (Exception ex) {
throw new AnalysisException("invalid INVERTED index:" + ex.getMessage(), ex);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.thrift.TInvertedIndexFileStorageFormat;

import com.google.common.collect.Lists;
import org.junit.Assert;
Expand Down Expand Up @@ -67,7 +68,8 @@ public void testAnalyzeExpection() throws AnalysisException {
def = new IndexDef("variant_index", false, Lists.newArrayList("col1"),
IndexDef.IndexType.INVERTED, null, "comment");
boolean isIndexFormatV1 = true;
def.checkColumn(new Column("col1", PrimitiveType.VARIANT), KeysType.UNIQUE_KEYS, true, isIndexFormatV1);
def.checkColumn(new Column("col1", PrimitiveType.VARIANT), KeysType.UNIQUE_KEYS, true, null,
isIndexFormatV1);
Assert.fail("No exception throws.");
} catch (AnalysisException e) {
Assert.assertTrue(e instanceof AnalysisException);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@

package org.apache.doris.analysis;

import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.thrift.TInvertedIndexFileStorageFormat;

import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
Expand All @@ -31,11 +33,13 @@ public void testCheckInvertedIndexProperties() throws AnalysisException {
Map<String, String> properties = new HashMap<>();
properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY, "true");

InvertedIndexUtil.checkInvertedIndexProperties(properties);
PrimitiveType colType = PrimitiveType.STRING;
TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat = TInvertedIndexFileStorageFormat.V3;
InvertedIndexUtil.checkInvertedIndexProperties(properties, colType, invertedIndexFileStorageFormat);

properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY, "invalid_value");
try {
InvertedIndexUtil.checkInvertedIndexProperties(properties);
InvertedIndexUtil.checkInvertedIndexProperties(properties, colType, invertedIndexFileStorageFormat);
Assertions.fail("Expected AnalysisException was not thrown");
} catch (AnalysisException e) {
Assertions.assertEquals(
Expand All @@ -44,4 +48,146 @@ public void testCheckInvertedIndexProperties() throws AnalysisException {
e.getMessage());
}
}

@Test
public void testDictCompressionValidTrue() throws AnalysisException {
Map<String, String> properties = new HashMap<>();
properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY, "true");

PrimitiveType colType = PrimitiveType.STRING;
TInvertedIndexFileStorageFormat storageFormat = TInvertedIndexFileStorageFormat.V3;

InvertedIndexUtil.checkInvertedIndexProperties(properties, colType, storageFormat);
}

@Test
public void testDictCompressionValidFalse() throws AnalysisException {
Map<String, String> properties = new HashMap<>();
properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY, "false");

PrimitiveType colType = PrimitiveType.STRING;
TInvertedIndexFileStorageFormat storageFormat = TInvertedIndexFileStorageFormat.V3;

InvertedIndexUtil.checkInvertedIndexProperties(properties, colType, storageFormat);
}

@Test
public void testDictCompressionNonStringType() {
Map<String, String> properties = new HashMap<>();
properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY, "true");

PrimitiveType colType = PrimitiveType.INT;
TInvertedIndexFileStorageFormat storageFormat = TInvertedIndexFileStorageFormat.V3;

AnalysisException thrown = Assertions.assertThrows(AnalysisException.class, () -> {
InvertedIndexUtil.checkInvertedIndexProperties(properties, colType, storageFormat);
});

Assertions.assertEquals(
"errCode = 2, detailMessage = dict_compression can only be set for StringType columns. type: INT",
thrown.getMessage()
);
}

@Test
public void testDictCompressionInvalidStorageFormat() {
Map<String, String> properties = new HashMap<>();
properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY, "true");

PrimitiveType colType = PrimitiveType.STRING;
TInvertedIndexFileStorageFormat storageFormat = TInvertedIndexFileStorageFormat.V2;

AnalysisException thrown = Assertions.assertThrows(AnalysisException.class, () -> {
InvertedIndexUtil.checkInvertedIndexProperties(properties, colType, storageFormat);
});

Assertions.assertEquals(
"errCode = 2, detailMessage = dict_compression can only be set when storage format is V3",
thrown.getMessage()
);
}

@Test
public void testDictCompressionInvalidValue() {
Map<String, String> properties = new HashMap<>();
properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY, "invalid_value");

PrimitiveType colType = PrimitiveType.STRING;
TInvertedIndexFileStorageFormat storageFormat = TInvertedIndexFileStorageFormat.V3;

AnalysisException thrown = Assertions.assertThrows(AnalysisException.class, () -> {
InvertedIndexUtil.checkInvertedIndexProperties(properties, colType, storageFormat);
});

Assertions.assertEquals(
"errCode = 2, detailMessage = Invalid inverted index 'dict_compression' value: invalid_value, "
+ "dict_compression must be true or false",
thrown.getMessage()
);
}

@Test
public void testDictCompressionCaseSensitivity() throws AnalysisException {
Map<String, String> properties = new HashMap<>();
properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY, "True");

PrimitiveType colType = PrimitiveType.STRING;
TInvertedIndexFileStorageFormat storageFormat = TInvertedIndexFileStorageFormat.V3;

AnalysisException thrown = Assertions.assertThrows(AnalysisException.class, () -> {
InvertedIndexUtil.checkInvertedIndexProperties(properties, colType, storageFormat);
});

Assertions.assertEquals(
"errCode = 2, detailMessage = Invalid inverted index 'dict_compression' value: True, "
+ "dict_compression must be true or false",
thrown.getMessage()
);
}

@Test
public void testDictCompressionNullValue() throws AnalysisException {
Map<String, String> properties = new HashMap<>();

PrimitiveType colType = PrimitiveType.STRING;
TInvertedIndexFileStorageFormat storageFormat = TInvertedIndexFileStorageFormat.V3;

InvertedIndexUtil.checkInvertedIndexProperties(properties, colType, storageFormat);
}

@Test
public void testDictCompressionWhenStorageFormatIsNull() {
Map<String, String> properties = new HashMap<>();
properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY, "true");

PrimitiveType colType = PrimitiveType.STRING;
TInvertedIndexFileStorageFormat storageFormat = null;

AnalysisException thrown = Assertions.assertThrows(AnalysisException.class, () -> {
InvertedIndexUtil.checkInvertedIndexProperties(properties, colType, storageFormat);
});

Assertions.assertEquals(
"errCode = 2, detailMessage = dict_compression can only be set when storage format is V3",
thrown.getMessage()
);
}

@Test
public void testDictCompressionWithNonV3AndValidValue() {
Map<String, String> properties = new HashMap<>();
properties.put(InvertedIndexUtil.INVERTED_INDEX_DICT_COMPRESSION_KEY, "false");

PrimitiveType colType = PrimitiveType.STRING;
TInvertedIndexFileStorageFormat storageFormat = TInvertedIndexFileStorageFormat.V2;

AnalysisException thrown = Assertions.assertThrows(AnalysisException.class, () -> {
InvertedIndexUtil.checkInvertedIndexProperties(properties, colType, storageFormat);
});

Assertions.assertEquals(
"errCode = 2, detailMessage = dict_compression can only be set when storage format is V3",
thrown.getMessage()
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.doris.nereids.trees.plans.commands.info.ColumnDefinition;
import org.apache.doris.nereids.trees.plans.commands.info.IndexDefinition;
import org.apache.doris.nereids.types.VariantType;
import org.apache.doris.thrift.TInvertedIndexFileStorageFormat;

import com.google.common.collect.Lists;
import org.junit.jupiter.api.Assertions;
Expand All @@ -36,7 +37,7 @@ void testVariantIndexFormatV1() throws AnalysisException {
try {
boolean isIndexFormatV1 = true;
def.checkColumn(new ColumnDefinition("col1", VariantType.INSTANCE, false, AggregateType.NONE, true,
null, "comment"), KeysType.UNIQUE_KEYS, true, isIndexFormatV1);
null, "comment"), KeysType.UNIQUE_KEYS, true, null, isIndexFormatV1);
Assertions.fail("No exception throws.");
} catch (AnalysisException e) {
Assertions.assertTrue(e instanceof AnalysisException);
Expand Down

0 comments on commit 16a3160

Please sign in to comment.