Skip to content

Commit

Permalink
[fix](ES Catalog)Only like on keyword can be applied to wildcard query (
Browse files Browse the repository at this point in the history
  • Loading branch information
qidaye authored Nov 8, 2024
1 parent 71694f5 commit e6b5108
Show file tree
Hide file tree
Showing 9 changed files with 299 additions and 86 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ public class EsTable extends Table {
// Periodically pull es metadata
private EsMetaStateTracker esMetaStateTracker;

// column name -> elasticsearch field data type
private Map<String, String> column2typeMap = new HashMap<>();

public EsTable() {
super(TableType.ELASTICSEARCH);
}
Expand Down Expand Up @@ -340,6 +343,6 @@ public void syncTableMetaData() {
}

public List<Column> genColumnsFromEs() {
return EsUtil.genColumnsFromEs(client, indexName, mappingType, false);
return EsUtil.genColumnsFromEs(client, indexName, mappingType, false, column2typeMap);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,17 @@
import org.apache.doris.thrift.TTableDescriptor;
import org.apache.doris.thrift.TTableType;

import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* Elasticsearch external table.
*/
public class EsExternalTable extends ExternalTable {

private EsTable esTable;
private Map<String, String> column2typeMap = new HashMap<>();

/**
* Create elasticsearch external table.
Expand Down Expand Up @@ -78,9 +81,11 @@ public TTableDescriptor toThrift() {
@Override
public List<Column> initSchema() {
EsRestClient restClient = ((EsExternalCatalog) catalog).getEsRestClient();
return EsUtil.genColumnsFromEs(restClient, name, null, ((EsExternalCatalog) catalog).enableMappingEsId());
return EsUtil.genColumnsFromEs(restClient, name, null, ((EsExternalCatalog) catalog).enableMappingEsId(),
column2typeMap);
}


private EsTable toEsTable() {
List<Column> schema = getFullSchema();
EsExternalCatalog esCatalog = (EsExternalCatalog) catalog;
Expand All @@ -98,6 +103,7 @@ private EsTable toEsTable() {
esTable.setHosts(String.join(",", esCatalog.getNodes()));
esTable.syncTableMetaData();
esTable.setIncludeHiddenIndex(esCatalog.enableIncludeHiddenIndex());
esTable.setColumn2typeMap(column2typeMap);
return esTable;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -189,18 +189,18 @@ public static ObjectNode getMappingProps(String sourceIndex, String indexMapping
* Add mappingEsId config in es external catalog.
**/
public static List<Column> genColumnsFromEs(EsRestClient client, String indexName, String mappingType,
boolean mappingEsId) {
boolean mappingEsId, Map<String, String> column2typeMap) {
String mapping = client.getMapping(indexName);
ObjectNode mappings = getMapping(mapping);
// Get array_fields while removing _meta property.
List<String> arrayFields = new ArrayList<>();
ObjectNode rootSchema = getRootSchema(mappings, mappingType, arrayFields);
return genColumnsFromEs(indexName, mappingType, rootSchema, mappingEsId, arrayFields);
return genColumnsFromEs(indexName, mappingType, rootSchema, mappingEsId, arrayFields, column2typeMap);
}

@VisibleForTesting
public static List<Column> genColumnsFromEs(String indexName, String mappingType, ObjectNode rootSchema,
boolean mappingEsId, List<String> arrayFields) {
boolean mappingEsId, List<String> arrayFields, Map<String, String> column2typeMap) {
List<Column> columns = new ArrayList<>();
if (mappingEsId) {
Column column = new Column();
Expand All @@ -220,7 +220,8 @@ public static List<Column> genColumnsFromEs(String indexName, String mappingType
while (iterator.hasNext()) {
String fieldName = iterator.next();
ObjectNode fieldValue = (ObjectNode) mappingProps.get(fieldName);
Column column = parseEsField(fieldName, replaceFieldAlias(mappingProps, fieldValue), arrayFields);
Column column = parseEsField(fieldName, replaceFieldAlias(mappingProps, fieldValue), arrayFields,
column2typeMap);
columns.add(column);
}
return columns;
Expand All @@ -245,7 +246,8 @@ private static ObjectNode replaceFieldAlias(ObjectNode mappingProps, ObjectNode
return fieldValue;
}

private static Column parseEsField(String fieldName, ObjectNode fieldValue, List<String> arrayFields) {
private static Column parseEsField(String fieldName, ObjectNode fieldValue, List<String> arrayFields,
Map<String, String> column2typeMap) {
Column column = new Column();
column.setName(fieldName);
column.setIsKey(true);
Expand All @@ -256,6 +258,7 @@ private static Column parseEsField(String fieldName, ObjectNode fieldValue, List
if (fieldValue.has("type")) {
String typeStr = fieldValue.get("type").asText();
column.setComment("Elasticsearch type is " + typeStr);
column2typeMap.put(fieldName, typeStr);
// reference https://www.elastic.co/guide/en/elasticsearch/reference/8.3/sql-data-types.html
switch (typeStr) {
case "null":
Expand Down Expand Up @@ -298,15 +301,17 @@ private static Column parseEsField(String fieldName, ObjectNode fieldValue, List
type = ScalarType.createStringType();
break;
case "nested":
case "object":
type = Type.JSONB;
break;
default:
type = Type.UNSUPPORTED;
}
} else {
// When there is no explicit type in mapping, it indicates this type is an `object` in Elasticsearch.
// reference: https://www.elastic.co/guide/en/elasticsearch/reference/current/object.html
type = Type.JSONB;
column.setComment("Elasticsearch no type");
column.setComment("Elasticsearch type is object");
column2typeMap.put(fieldName, "object");
}
if (arrayFields.contains(fieldName)) {
column.setType(ArrayType.create(type, true));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,14 @@ public final class QueryBuilders {
* Generate dsl from compound expr.
**/
private static QueryBuilder toCompoundEsDsl(Expr expr, List<Expr> notPushDownList,
Map<String, String> fieldsContext, BuilderOptions builderOptions) {
Map<String, String> fieldsContext, BuilderOptions builderOptions, Map<String, String> column2typeMap) {
CompoundPredicate compoundPredicate = (CompoundPredicate) expr;
switch (compoundPredicate.getOp()) {
case AND: {
QueryBuilder left = toEsDsl(compoundPredicate.getChild(0), notPushDownList, fieldsContext,
builderOptions);
builderOptions, column2typeMap);
QueryBuilder right = toEsDsl(compoundPredicate.getChild(1), notPushDownList, fieldsContext,
builderOptions);
builderOptions, column2typeMap);
if (left != null && right != null) {
return QueryBuilders.boolQuery().must(left).must(right);
}
Expand All @@ -86,9 +86,9 @@ private static QueryBuilder toCompoundEsDsl(Expr expr, List<Expr> notPushDownLis
case OR: {
int beforeSize = notPushDownList.size();
QueryBuilder left = toEsDsl(compoundPredicate.getChild(0), notPushDownList, fieldsContext,
builderOptions);
builderOptions, column2typeMap);
QueryBuilder right = toEsDsl(compoundPredicate.getChild(1), notPushDownList, fieldsContext,
builderOptions);
builderOptions, column2typeMap);
int afterSize = notPushDownList.size();
if (left != null && right != null) {
return QueryBuilders.boolQuery().should(left).should(right);
Expand All @@ -101,7 +101,7 @@ private static QueryBuilder toCompoundEsDsl(Expr expr, List<Expr> notPushDownLis
}
case NOT: {
QueryBuilder child = toEsDsl(compoundPredicate.getChild(0), notPushDownList, fieldsContext,
builderOptions);
builderOptions, column2typeMap);
if (child != null) {
return QueryBuilders.boolQuery().mustNot(child);
}
Expand All @@ -122,10 +122,10 @@ private static Expr exprWithoutCast(Expr expr) {
return expr;
}

public static QueryBuilder toEsDsl(Expr expr) {
public static QueryBuilder toEsDsl(Expr expr, Map<String, String> column2typeMap) {
return toEsDsl(expr, new ArrayList<>(), new HashMap<>(),
BuilderOptions.builder().likePushDown(Boolean.parseBoolean(EsResource.LIKE_PUSH_DOWN_DEFAULT_VALUE))
.build());
.build(), column2typeMap);
}

private static TExprOpcode flipOpCode(TExprOpcode opCode) {
Expand Down Expand Up @@ -185,32 +185,44 @@ private static QueryBuilder parseIsNullPredicate(Expr expr, String column) {
return QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery(column));
}

private static QueryBuilder parseLikePredicate(Expr expr, String column) {
LikePredicate likePredicate = (LikePredicate) expr;
if (likePredicate.getOp().equals(Operator.LIKE)) {
char[] chars = likePredicate.getChild(1).getStringValue().toCharArray();
// example of translation :
// abc_123 ===> abc?123
// abc%ykz ===> abc*123
// %abc123 ===> *abc123
// _abc123 ===> ?abc123
// \\_abc1 ===> \\_abc1
// abc\\_123 ===> abc\\_123
// abc\\%123 ===> abc\\%123
// NOTE. user must input sql like 'abc\\_123' or 'abc\\%ykz'
for (int i = 0; i < chars.length; i++) {
if (chars[i] == '_' || chars[i] == '%') {
if (i == 0) {
chars[i] = (chars[i] == '_') ? '?' : '*';
} else if (chars[i - 1] != '\\') {
chars[i] = (chars[i] == '_') ? '?' : '*';
}
}
private static QueryBuilder parseLikeExpression(Expr expr, String column) {
String pattern;
if (expr instanceof LikePredicate) {
LikePredicate likePredicate = (LikePredicate) expr;
if (!likePredicate.getOp().equals(Operator.LIKE)) {
return QueryBuilders.wildcardQuery(column, likePredicate.getChild(1).getStringValue());
}
pattern = likePredicate.getChild(1).getStringValue();
} else if (expr instanceof FunctionCallExpr) {
FunctionCallExpr functionCallExpr = (FunctionCallExpr) expr;
String fnName = functionCallExpr.getFnName().getFunction();
if (!fnName.equalsIgnoreCase("like")) {
return QueryBuilders.wildcardQuery(column, functionCallExpr.getChild(1).getStringValue());
}
return QueryBuilders.wildcardQuery(column, new String(chars));
pattern = functionCallExpr.getChild(1).getStringValue();
} else {
return QueryBuilders.wildcardQuery(column, likePredicate.getChild(1).getStringValue());
throw new IllegalArgumentException("Unsupported expression type");
}
char[] chars = pattern.toCharArray();
// example of translation :
// abc_123 ===> abc?123
// abc%ykz ===> abc*123
// %abc123 ===> *abc123
// _abc123 ===> ?abc123
// \\_abc1 ===> \\_abc1
// abc\\_123 ===> abc\\_123
// abc\\%123 ===> abc\\%123
// NOTE. user must input sql like 'abc\\_123' or 'abc\\%ykz'
for (int i = 0; i < chars.length; i++) {
if (chars[i] == '_' || chars[i] == '%') {
if (i == 0) {
chars[i] = (chars[i] == '_') ? '?' : '*';
} else if (chars[i - 1] != '\\') {
chars[i] = (chars[i] == '_') ? '?' : '*';
}
}
}
return QueryBuilders.wildcardQuery(column, new String(chars));
}

private static QueryBuilder parseInPredicate(Expr expr, String column, boolean needDateCompat) {
Expand Down Expand Up @@ -257,18 +269,18 @@ private static String getColumnFromExpr(Expr expr) {
* Doris expr to es dsl.
**/
public static QueryBuilder toEsDsl(Expr expr, List<Expr> notPushDownList, Map<String, String> fieldsContext,
BuilderOptions builderOptions) {
BuilderOptions builderOptions, Map<String, String> column2typeMap) {
if (expr == null) {
return null;
}
// esquery functionCallExpr will be rewritten to castExpr in where clause rewriter,
// so we get the functionCallExpr here.
if (expr instanceof CastExpr) {
return toEsDsl(expr.getChild(0), notPushDownList, fieldsContext, builderOptions);
return toEsDsl(expr.getChild(0), notPushDownList, fieldsContext, builderOptions, column2typeMap);
}
// CompoundPredicate, `between` also converted to CompoundPredicate.
if (expr instanceof CompoundPredicate) {
return toCompoundEsDsl(expr, notPushDownList, fieldsContext, builderOptions);
return toCompoundEsDsl(expr, notPushDownList, fieldsContext, builderOptions, column2typeMap);
}
TExprOpcode opCode = expr.getOpcode();
boolean isFlip = false;
Expand All @@ -287,6 +299,7 @@ public static QueryBuilder toEsDsl(Expr expr, List<Expr> notPushDownList, Map<St
return null;
}

String type = column2typeMap.get(column);
// Check whether the date type need compat, it must before keyword replace.
List<String> needCompatDateFields = builderOptions.getNeedCompatDateFields();
boolean needDateCompat = needCompatDateFields != null && needCompatDateFields.contains(column);
Expand All @@ -313,24 +326,29 @@ public static QueryBuilder toEsDsl(Expr expr, List<Expr> notPushDownList, Map<St
return parseIsNullPredicate(expr, column);
}
if (expr instanceof LikePredicate) {
if (!builderOptions.isLikePushDown()) {
if (builderOptions.isLikePushDown() && "keyword".equals(type)) {
// only keyword can apply wildcard query
return parseLikeExpression(expr, column);
} else {
notPushDownList.add(expr);
return null;
} else {
return parseLikePredicate(expr, column);
}
}
if (expr instanceof InPredicate) {
return parseInPredicate(expr, column, needDateCompat);
}
if (expr instanceof FunctionCallExpr) {
FunctionCallExpr functionCallExpr = (FunctionCallExpr) expr;
// current only esquery functionCallExpr can be push down to ES
if (!"esquery".equals(functionCallExpr.getFnName().getFunction())) {
// current only esquery and like applied in keyword functionCallExpr can be push down to ES
String fnName = ((FunctionCallExpr) expr).getFnName().getFunction();
if ("esquery".equals(fnName)) {
return parseFunctionCallExpr(expr);
} else if (builderOptions.isLikePushDown() && "like".equalsIgnoreCase(fnName) && "keyword".equals(type)) {
return parseLikeExpression(expr, column);
} else if (builderOptions.isLikePushDown() && "regexp".equalsIgnoreCase(fnName)) {
return parseLikeExpression(expr, column);
} else {
notPushDownList.add(expr);
return null;
} else {
return parseFunctionCallExpr(expr);
}
}
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -350,10 +350,14 @@ private void buildQuery() throws UserException {
boolean hasFilter = false;
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
List<Expr> notPushDownList = new ArrayList<>();
if (table.getColumn2typeMap() == null) {
table.genColumnsFromEs();
}
for (Expr expr : conjuncts) {
QueryBuilder queryBuilder = QueryBuilders.toEsDsl(expr, notPushDownList, fieldsContext,
BuilderOptions.builder().likePushDown(table.isLikePushDown())
.needCompatDateFields(table.needCompatDateFields()).build());
.needCompatDateFields(table.needCompatDateFields()).build(),
table.getColumn2typeMap());
if (queryBuilder != null) {
hasFilter = true;
boolQueryBuilder.must(queryBuilder);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

/**
Expand Down Expand Up @@ -221,7 +222,7 @@ public void testEs8Mapping() throws IOException, URISyntaxException {
public void testDateType() throws IOException, URISyntaxException {
ObjectNode testDateFormat = EsUtil.getRootSchema(
EsUtil.getMapping(loadJsonFromFile("data/es/test_date_format.json")), null, new ArrayList<>());
List<Column> parseColumns = EsUtil.genColumnsFromEs("test_date_format", null, testDateFormat, false, new ArrayList<>());
List<Column> parseColumns = EsUtil.genColumnsFromEs("test_date_format", null, testDateFormat, false, new ArrayList<>(), new HashMap<>());
Assertions.assertEquals(8, parseColumns.size());
for (Column column : parseColumns) {
String name = column.getName();
Expand Down Expand Up @@ -254,7 +255,7 @@ public void testDateType() throws IOException, URISyntaxException {
public void testFieldAlias() throws IOException, URISyntaxException {
ObjectNode testFieldAlias = EsUtil.getRootSchema(
EsUtil.getMapping(loadJsonFromFile("data/es/test_field_alias.json")), null, new ArrayList<>());
List<Column> parseColumns = EsUtil.genColumnsFromEs("test_field_alias", null, testFieldAlias, true, new ArrayList<>());
List<Column> parseColumns = EsUtil.genColumnsFromEs("test_field_alias", null, testFieldAlias, true, new ArrayList<>(), new HashMap<>());
Assertions.assertEquals("datetimev2(0)", parseColumns.get(2).getType().toSql());
Assertions.assertEquals("text", parseColumns.get(4).getType().toSql());
}
Expand All @@ -264,7 +265,7 @@ public void testComplexType() throws IOException, URISyntaxException {
ObjectNode testFieldAlias = EsUtil.getRootSchema(
EsUtil.getMapping(loadJsonFromFile("data/es/es6_dynamic_complex_type.json")), null, new ArrayList<>());
List<Column> columns = EsUtil.genColumnsFromEs("test_complex_type", "complex_type", testFieldAlias, true,
new ArrayList<>());
new ArrayList<>(), new HashMap<>());
Assertions.assertEquals(3, columns.size());
}

Expand Down
Loading

0 comments on commit e6b5108

Please sign in to comment.