diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js index ac7c66182d9e4..3bb6734aaddef 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js +++ b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js @@ -3225,6 +3225,9 @@ export class BaseQuery { not: 'NOT ({{ expr }})', true: 'TRUE', false: 'FALSE', + like: '{{ expr }} {% if negated %}NOT {% endif %}LIKE {{ pattern }}', + ilike: '{{ expr }} {% if negated %}NOT {% endif %}ILIKE {{ pattern }}', + like_escape: '{{ like_expr }} ESCAPE {{ escape_char }}', }, quotes: { identifiers: '"', diff --git a/packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts b/packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts index 9f5b83e3df3cd..7c34a4a0d8e9a 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts +++ b/packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts @@ -249,6 +249,8 @@ export class BigqueryQuery extends BaseQuery { templates.expressions.interval = 'INTERVAL {{ interval }}'; templates.expressions.extract = 'EXTRACT({% if date_part == \'DOW\' %}DAYOFWEEK{% elif date_part == \'DOY\' %}DAYOFYEAR{% else %}{{ date_part }}{% endif %} FROM {{ expr }})'; templates.expressions.timestamp_literal = 'TIMESTAMP(\'{{ value }}\')'; + delete templates.expressions.ilike; + delete templates.expressions.like_escape; templates.types.boolean = 'BOOL'; templates.types.float = 'FLOAT64'; templates.types.double = 'FLOAT64'; diff --git a/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts b/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts index 761ccc9acaae1..ed393b971a453 100644 --- a/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts +++ b/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts @@ -272,6 +272,7 @@ export class ClickHouseQuery extends BaseQuery { // TODO: Introduce additional filter in jinja? or parseDateTimeBestEffort? // https://github.com/ClickHouse/ClickHouse/issues/19351 templates.expressions.timestamp_literal = 'parseDateTimeBestEffort(\'{{ value }}\')'; + delete templates.expressions.like_escape; templates.quotes.identifiers = '`'; templates.quotes.escape = '\\`'; templates.types.boolean = 'BOOL'; diff --git a/packages/cubejs-schema-compiler/src/adapter/MssqlQuery.ts b/packages/cubejs-schema-compiler/src/adapter/MssqlQuery.ts index 12e7af9091c1b..9b1bae70a04a9 100644 --- a/packages/cubejs-schema-compiler/src/adapter/MssqlQuery.ts +++ b/packages/cubejs-schema-compiler/src/adapter/MssqlQuery.ts @@ -223,6 +223,7 @@ export class MssqlQuery extends BaseQuery { const templates = super.sqlTemplates(); templates.functions.LEAST = 'LEAST({{ args_concat }})'; templates.functions.GREATEST = 'GREATEST({{ args_concat }})'; + delete templates.expressions.ilike; templates.types.string = 'VARCHAR'; templates.types.boolean = 'BIT'; templates.types.integer = 'INT'; diff --git a/packages/cubejs-schema-compiler/src/adapter/MysqlQuery.ts b/packages/cubejs-schema-compiler/src/adapter/MysqlQuery.ts index a2584f2163c82..6a439af332e69 100644 --- a/packages/cubejs-schema-compiler/src/adapter/MysqlQuery.ts +++ b/packages/cubejs-schema-compiler/src/adapter/MysqlQuery.ts @@ -158,6 +158,7 @@ export class MysqlQuery extends BaseQuery { const templates = super.sqlTemplates(); templates.quotes.identifiers = '`'; templates.quotes.escape = '\\`'; + delete templates.expressions.ilike; templates.types.string = 'VARCHAR'; templates.types.boolean = 'TINYINT'; templates.types.timestamp = 'DATETIME'; diff --git a/packages/cubejs-schema-compiler/src/adapter/PrestodbQuery.ts b/packages/cubejs-schema-compiler/src/adapter/PrestodbQuery.ts index 22be0221a1e9e..6852f94e52f0a 100644 --- a/packages/cubejs-schema-compiler/src/adapter/PrestodbQuery.ts +++ b/packages/cubejs-schema-compiler/src/adapter/PrestodbQuery.ts @@ -121,6 +121,7 @@ export class PrestodbQuery extends BaseQuery { templates.expressions.extract = 'EXTRACT({{ date_part }} FROM {{ expr }})'; templates.expressions.interval_single_date_part = 'INTERVAL \'{{ num }}\' {{ date_part }}'; templates.expressions.timestamp_literal = 'from_iso8601_timestamp(\'{{ value }}\')'; + delete templates.expressions.ilike; templates.types.string = 'VARCHAR'; templates.types.float = 'REAL'; // Presto intervals have a YearMonth or DayTime type variants, but no universal type diff --git a/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs b/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs index 73123c38d7ced..b66fa831cab02 100644 --- a/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs +++ b/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs @@ -7,7 +7,7 @@ use crate::{ filters::Decimal, utils::{DecomposedDayTime, DecomposedMonthDayNano}, }, - WrappedSelectType, + LikeType, WrappedSelectType, }, }, config::ConfigObj, @@ -1285,8 +1285,96 @@ impl CubeScanWrapperNode { Ok((resulting_sql, sql_query)) } // Expr::AnyExpr { .. } => {} - // Expr::Like(_) => {}-= - // Expr::ILike(_) => {} + Expr::Like(like) => { + let (expr, sql_query) = Self::generate_sql_for_expr( + plan.clone(), + sql_query, + sql_generator.clone(), + *like.expr, + ungrouped_scan_node.clone(), + subqueries.clone(), + ) + .await?; + let (pattern, sql_query) = Self::generate_sql_for_expr( + plan.clone(), + sql_query, + sql_generator.clone(), + *like.pattern, + ungrouped_scan_node.clone(), + subqueries.clone(), + ) + .await?; + let (escape_char, sql_query) = match like.escape_char { + Some(escape_char) => { + let (escape_char, sql_query) = Self::generate_sql_for_expr( + plan.clone(), + sql_query, + sql_generator.clone(), + Expr::Literal(ScalarValue::Utf8(Some(escape_char.to_string()))), + ungrouped_scan_node.clone(), + subqueries.clone(), + ) + .await?; + (Some(escape_char), sql_query) + } + None => (None, sql_query), + }; + let resulting_sql = sql_generator + .get_sql_templates() + .like_expr(LikeType::Like, expr, like.negated, pattern, escape_char) + .map_err(|e| { + DataFusionError::Internal(format!( + "Can't generate SQL for like expr: {}", + e + )) + })?; + Ok((resulting_sql, sql_query)) + } + Expr::ILike(ilike) => { + let (expr, sql_query) = Self::generate_sql_for_expr( + plan.clone(), + sql_query, + sql_generator.clone(), + *ilike.expr, + ungrouped_scan_node.clone(), + subqueries.clone(), + ) + .await?; + let (pattern, sql_query) = Self::generate_sql_for_expr( + plan.clone(), + sql_query, + sql_generator.clone(), + *ilike.pattern, + ungrouped_scan_node.clone(), + subqueries.clone(), + ) + .await?; + let (escape_char, sql_query) = match ilike.escape_char { + Some(escape_char) => { + let (escape_char, sql_query) = Self::generate_sql_for_expr( + plan.clone(), + sql_query, + sql_generator.clone(), + Expr::Literal(ScalarValue::Utf8(Some(escape_char.to_string()))), + ungrouped_scan_node.clone(), + subqueries.clone(), + ) + .await?; + (Some(escape_char), sql_query) + } + None => (None, sql_query), + }; + let resulting_sql = sql_generator + .get_sql_templates() + .like_expr(LikeType::ILike, expr, ilike.negated, pattern, escape_char) + .map_err(|e| { + DataFusionError::Internal(format!( + "Can't generate SQL for ilike expr: {}", + e + )) + })?; + Ok((resulting_sql, sql_query)) + } // Expr::SimilarTo(_) => {} Expr::Not(expr) => { let (expr, sql_query) = Self::generate_sql_for_expr( diff --git a/rust/cubesql/cubesql/src/compile/mod.rs b/rust/cubesql/cubesql/src/compile/mod.rs index 4824a8957a5b9..d74f2c30625a3 100644 --- a/rust/cubesql/cubesql/src/compile/mod.rs +++ b/rust/cubesql/cubesql/src/compile/mod.rs @@ -18440,4 +18440,56 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), Ok(()) } + + #[tokio::test] + async fn test_thoughtspot_like_escape_push_down() { + if !Rewriter::sql_push_down_enabled() { + return; + } + init_testing_logger(); + + let query_plan = convert_select_to_query_plan( + r#" + SELECT CAST("customer_gender" AS TEXT) AS "customer_gender" + FROM "public"."KibanaSampleDataEcommerce" + WHERE + "customer_gender" LIKE ( + '%' || replace( + replace( + replace( + 'ale', + '!', + '!!' + ), + '%', + '!%' + ), + '_', + '!_' + ) || '%' + ) ESCAPE '!' + GROUP BY 1 + ORDER BY 1 + LIMIT 100 + "# + .to_string(), + DatabaseProtocol::PostgreSQL, + ) + .await; + + let logical_plan = query_plan.as_logical_plan(); + let sql = logical_plan + .find_cube_scan_wrapper() + .wrapped_sql + .unwrap() + .sql; + assert!(sql.contains("LIKE ")); + assert!(sql.contains("ESCAPE ")); + + let physical_plan = query_plan.as_physical_plan().await.unwrap(); + println!( + "Physical plan: {}", + displayable(physical_plan.as_ref()).indent() + ); + } } diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/like_expr.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/like_expr.rs new file mode 100644 index 0000000000000..64f89af867070 --- /dev/null +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/like_expr.rs @@ -0,0 +1,136 @@ +use crate::{ + compile::rewrite::{ + analysis::LogicalPlanAnalysis, like_expr, rewrite, rules::wrapper::WrapperRules, + transforming_rewrite, wrapper_pullup_replacer, wrapper_pushdown_replacer, + LikeExprEscapeChar, LikeExprLikeType, LikeType, LogicalPlanLanguage, + WrapperPullupReplacerAliasToCube, + }, + var, var_iter, +}; +use egg::{EGraph, Rewrite, Subst}; + +impl WrapperRules { + pub fn like_expr_rules( + &self, + rules: &mut Vec>, + ) { + rules.extend(vec![ + rewrite( + "wrapper-push-down-like-expr", + wrapper_pushdown_replacer( + like_expr( + "?like_type", + "?negated", + "?expr", + "?pattern", + "?escape_char", + ), + "?alias_to_cube", + "?ungrouped", + "?in_projection", + "?cube_members", + ), + like_expr( + "?like_type", + "?negated", + wrapper_pushdown_replacer( + "?expr", + "?alias_to_cube", + "?ungrouped", + "?in_projection", + "?cube_members", + ), + wrapper_pushdown_replacer( + "?pattern", + "?alias_to_cube", + "?ungrouped", + "?in_projection", + "?cube_members", + ), + "?escape_char", + ), + ), + transforming_rewrite( + "wrapper-pull-up-like-expr", + like_expr( + "?like_type", + "?negated", + wrapper_pullup_replacer( + "?expr", + "?alias_to_cube", + "?ungrouped", + "?in_projection", + "?cube_members", + ), + wrapper_pullup_replacer( + "?pattern", + "?alias_to_cube", + "?ungrouped", + "?in_projection", + "?cube_members", + ), + "?escape_char", + ), + wrapper_pullup_replacer( + like_expr( + "?like_type", + "?negated", + "?expr", + "?pattern", + "?escape_char", + ), + "?alias_to_cube", + "?ungrouped", + "?in_projection", + "?cube_members", + ), + self.transform_like_expr("?alias_to_cube", "?like_type", "?escape_char"), + ), + ]); + } + + fn transform_like_expr( + &self, + alias_to_cube_var: &'static str, + like_type_var: &'static str, + escape_char_var: &'static str, + ) -> impl Fn(&mut EGraph, &mut Subst) -> bool { + let alias_to_cube_var = var!(alias_to_cube_var); + let like_type_var = var!(like_type_var); + let escape_char_var = var!(escape_char_var); + let meta = self.meta_context.clone(); + move |egraph, subst| { + for alias_to_cube in var_iter!( + egraph[subst[alias_to_cube_var]], + WrapperPullupReplacerAliasToCube + ) { + let Some(sql_generator) = meta.sql_generator_by_alias_to_cube(&alias_to_cube) + else { + continue; + }; + + let templates = &sql_generator.get_sql_templates().templates; + + for escape_char in var_iter!(egraph[subst[escape_char_var]], LikeExprEscapeChar) { + if escape_char.is_some() { + if !templates.contains_key("expressions/like_escape") { + continue; + } + } + + for like_type in var_iter!(egraph[subst[like_type_var]], LikeExprLikeType) { + let expression_name = match like_type { + LikeType::Like => "like", + LikeType::ILike => "ilike", + _ => continue, + }; + if templates.contains_key(&format!("expressions/{}", expression_name)) { + return true; + } + } + } + } + false + } + } +} diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/mod.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/mod.rs index 89b650719e66b..54f6f26933fe2 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/mod.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/mod.rs @@ -12,6 +12,7 @@ mod filter; mod in_list_expr; mod in_subquery_expr; mod is_null_expr; +mod like_expr; mod limit; mod literal; mod negative_expr; @@ -82,6 +83,7 @@ impl RewriteRules for WrapperRules { self.negative_expr_rules(&mut rules); self.not_expr_rules(&mut rules); self.distinct_rules(&mut rules); + self.like_expr_rules(&mut rules); rules } diff --git a/rust/cubesql/cubesql/src/compile/test/mod.rs b/rust/cubesql/cubesql/src/compile/test/mod.rs index 1d33e4aa29c4a..23dee919bf52a 100644 --- a/rust/cubesql/cubesql/src/compile/test/mod.rs +++ b/rust/cubesql/cubesql/src/compile/test/mod.rs @@ -532,6 +532,9 @@ OFFSET {{ offset }}{% endif %}"#.to_string(), ("expressions/true".to_string(), "TRUE".to_string()), ("expressions/false".to_string(), "FALSE".to_string()), ("expressions/timestamp_literal".to_string(), "timestamptz '{{ value }}'".to_string()), + ("expressions/like".to_string(), "{{ expr }} {% if negated %}NOT {% endif %}LIKE {{ pattern }}".to_string()), + ("expressions/ilike".to_string(), "{{ expr }} {% if negated %}NOT {% endif %}ILIKE {{ pattern }}".to_string()), + ("expressions/like_escape".to_string(), "{{ like_expr }} ESCAPE {{ escape_char }}".to_string()), ("quotes/identifiers".to_string(), "\"".to_string()), ("quotes/escape".to_string(), "\"\"".to_string()), ("params/param".to_string(), "${{ param_index + 1 }}".to_string()), diff --git a/rust/cubesql/cubesql/src/transport/service.rs b/rust/cubesql/cubesql/src/transport/service.rs index e1561d074b261..df0b384600594 100644 --- a/rust/cubesql/cubesql/src/transport/service.rs +++ b/rust/cubesql/cubesql/src/transport/service.rs @@ -26,9 +26,12 @@ use tokio::{ use uuid::Uuid; use crate::{ - compile::engine::df::{ - scan::MemberField, - wrapper::{GroupingSetDesc, GroupingSetType, SqlQuery}, + compile::{ + engine::df::{ + scan::MemberField, + wrapper::{GroupingSetDesc, GroupingSetType, SqlQuery}, + }, + rewrite::LikeType, }, sql::{AuthContextRef, HttpAuthContext}, transport::{ @@ -832,6 +835,39 @@ impl SqlTemplates { self.render_template("expressions/timestamp_literal", context! { value => value }) } + pub fn like_expr( + &self, + like_type: LikeType, + expr: String, + negated: bool, + pattern: String, + escape_char: Option, + ) -> Result { + let expression_name = match like_type { + LikeType::Like => "like", + LikeType::ILike => "ilike", + _ => { + return Err(CubeError::internal(format!( + "Error rendering template: like type {} is not supported", + like_type + ))) + } + }; + + let rendered_like = self.render_template( + &format!("expressions/{}", expression_name), + context! { expr => expr, negated => negated, pattern => pattern }, + )?; + + let Some(escape_char) = escape_char else { + return Ok(rendered_like); + }; + self.render_template( + "expressions/like_escape", + context! { like_expr => rendered_like, escape_char => escape_char }, + ) + } + pub fn param(&self, param_index: usize) -> Result { self.render_template("params/param", context! { param_index => param_index }) }