Skip to content

Commit

Permalink
HIVE-28408: Support ARRAY field access in CBO
Browse files Browse the repository at this point in the history
  • Loading branch information
ramesh0201 committed Dec 12, 2024
1 parent dc28391 commit ef2d276
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
Expand Down Expand Up @@ -73,11 +72,9 @@
import org.apache.hadoop.hive.ql.QueryProperties;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveValues;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.jdbc.HiveJdbcConverter;
Expand All @@ -92,6 +89,7 @@
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.ParseDriver;
import org.apache.hadoop.hive.ql.parse.ParseException;
import org.apache.hadoop.hive.ql.parse.type.RexNodeExprFactory;
import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper;
import org.apache.hadoop.hive.ql.util.DirectionUtils;
import org.apache.hadoop.hive.ql.util.NullOrdering;
Expand Down Expand Up @@ -1078,6 +1076,9 @@ public ASTNode visitCall(RexCall call) {
}
// fall-through
default:
if (op.equals(RexNodeExprFactory.COMPONENT_ACCESS)) {
return call.operands.get(0).accept(this);
}
for (RexNode operand : call.operands) {
astNodeLst.add(operand.accept(this));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.time.Instant;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
Expand All @@ -39,8 +40,11 @@
import org.apache.calcite.rex.RexSubQuery;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.sql.SqlCollation;
import org.apache.calcite.sql.SqlFunction;
import org.apache.calcite.sql.SqlFunctionCategory;
import org.apache.calcite.sql.SqlIntervalQualifier;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.fun.SqlQuantifyOperator;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.parser.SqlParserPos;
Expand Down Expand Up @@ -613,6 +617,14 @@ protected RexNode createConstantExpr(TypeInfo typeInfo, Object constantValue)
TypeConverter.convert(typeInfo, rexBuilder.getTypeFactory()), false);
}

/**
* Special operator that is used as syntactic sugar to change the type of collection
* expressions in order to perform field access over them.
*/
public static final SqlOperator COMPONENT_ACCESS =
new SqlFunction("COMPONENT_ACCESS", SqlKind.OTHER_FUNCTION, null,
null, null, SqlFunctionCategory.SYSTEM);

/**
* {@inheritDoc}
*/
Expand All @@ -622,11 +634,13 @@ protected RexNode createNestedColumnRefExpr(
if (expr.getType().isStruct()) {
// regular case of accessing nested field in a column
return rexBuilder.makeFieldAccess(expr, fieldName, true);
} else if (expr.getType().getComponentType() != null){
RexNode wrap =
rexBuilder.makeCall(expr.getType().getComponentType(), COMPONENT_ACCESS, Collections.singletonList(expr));
return createNestedColumnRefExpr(typeInfo, wrap, fieldName, isList);
} else {
// This may happen for schema-less tables, where columns are dynamically
// supplied by serdes.
throw new CalciteSemanticException("Unexpected rexnode : "
+ expr.getClass().getCanonicalName(), UnsupportedFeature.Schema_less_table);
// Safe exception. Shouldn't Ideally come here.
throw new CalciteSemanticException("Unexpected rexnode : " + expr.getClass().getCanonicalName());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1966,15 +1966,15 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: nested_tbl_1_n1
Pruned Column Paths: s1.f6, s5.f16
Pruned Column Paths: s5.f16, s1.f6
Statistics: Num rows: 1 Data size: 3196 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: s1 (type: struct<f1:boolean,f2:string,f3:struct<f4:int,f5:double>,f6:int>), s5 (type: struct<f16:array<struct<f17:string,f18:struct<f19:int>>>>)
outputColumnNames: s1, s5
expressions: s5.f16.f18.f19 (type: array<int>), s1.f6 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 3196 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(s1.f6)
keys: s5.f16.f18.f19 (type: array<int>)
aggregations: count(_col1)
keys: _col0 (type: array<int>)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1
Expand Down Expand Up @@ -2153,12 +2153,12 @@ STAGE PLANS:
Pruned Column Paths: s1.f6
Statistics: Num rows: 1 Data size: 2012 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: s1 (type: struct<f1:boolean,f2:string,f3:struct<f4:int,f5:double>,f6:int>), s6 (type: map<string,struct<f20:array<struct<f21:struct<f22:int>>>>>)
outputColumnNames: s1, s6
expressions: s6['key1'].f20.f21.f22 (type: array<int>), s1.f6 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 2012 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(s1.f6)
keys: s6['key1'].f20.f21.f22 (type: array<int>)
aggregations: count(_col1)
keys: _col0 (type: array<int>)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1
Expand Down

0 comments on commit ef2d276

Please sign in to comment.