From abb4c85362368348e5cf848f6f1017de94bff48a Mon Sep 17 00:00:00 2001 From: Jim Crist-Harif Date: Tue, 24 Sep 2024 11:18:01 -0500 Subject: [PATCH] feat(bigquery, impala, mssql, oracle, postgres): compile `Table.sample` to native `TABLESAMPLE` syntax when possible --- .../sql/compilers/bigquery/__init__.py | 9 +++++++++ ibis/backends/sql/compilers/druid.py | 1 + ibis/backends/sql/compilers/impala.py | 8 +++++++- ibis/backends/sql/compilers/mssql.py | 7 +++++++ ibis/backends/sql/compilers/oracle.py | 2 ++ ibis/backends/sql/compilers/postgres.py | 4 +++- ibis/backends/sql/compilers/risingwave.py | 1 + ibis/backends/sql/dialects.py | 12 ++++++++++++ .../test_sample/bigquery-False/block.sql | 3 +++ .../test_sample/bigquery-False/row.sql | 5 +++++ .../test_sample/bigquery-True/block.sql | 11 +++++++++++ .../test_sql/test_sample/bigquery-True/row.sql | 11 +++++++++++ .../test_sample/clickhouse-False/block.sql | 5 +++++ .../test_sample/clickhouse-False/row.sql | 5 +++++ .../test_sample/clickhouse-True/block.sql | 11 +++++++++++ .../test_sample/clickhouse-True/row.sql | 11 +++++++++++ .../test_sample/datafusion-False/block.sql | 5 +++++ .../test_sample/datafusion-False/row.sql | 5 +++++ .../test_sample/datafusion-True/block.sql | 11 +++++++++++ .../test_sample/datafusion-True/row.sql | 11 +++++++++++ .../test_sample/duckdb-False/block.sql | 3 +++ .../test_sql/test_sample/duckdb-False/row.sql | 3 +++ .../test_sql/test_sample/duckdb-True/block.sql | 9 +++++++++ .../test_sql/test_sample/duckdb-True/row.sql | 9 +++++++++ .../test_sample/exasol-False/block.sql | 5 +++++ .../test_sql/test_sample/exasol-False/row.sql | 5 +++++ .../test_sql/test_sample/exasol-True/block.sql | 11 +++++++++++ .../test_sql/test_sample/exasol-True/row.sql | 11 +++++++++++ .../test_sql/test_sample/flink-False/block.sql | 5 +++++ .../test_sql/test_sample/flink-False/row.sql | 5 +++++ .../test_sql/test_sample/flink-True/block.sql | 11 +++++++++++ .../test_sql/test_sample/flink-True/row.sql | 11 +++++++++++ .../test_sample/impala-False/block.sql | 3 +++ .../test_sql/test_sample/impala-False/row.sql | 5 +++++ .../test_sql/test_sample/impala-True/block.sql | 11 +++++++++++ .../test_sql/test_sample/impala-True/row.sql | 11 +++++++++++ .../test_sql/test_sample/mssql-False/block.sql | 3 +++ .../test_sql/test_sample/mssql-False/row.sql | 6 ++++++ .../test_sql/test_sample/mssql-True/block.sql | 13 +++++++++++++ .../test_sql/test_sample/mssql-True/row.sql | 13 +++++++++++++ .../test_sql/test_sample/mysql-False/block.sql | 5 +++++ .../test_sql/test_sample/mysql-False/row.sql | 5 +++++ .../test_sql/test_sample/mysql-True/block.sql | 11 +++++++++++ .../test_sql/test_sample/mysql-True/row.sql | 11 +++++++++++ .../test_sample/oracle-False/block.sql | 3 +++ .../test_sql/test_sample/oracle-False/row.sql | 3 +++ .../test_sql/test_sample/oracle-True/block.sql | 11 +++++++++++ .../test_sql/test_sample/oracle-True/row.sql | 11 +++++++++++ .../test_sample/postgres-False/block.sql | 3 +++ .../test_sample/postgres-False/row.sql | 3 +++ .../test_sample/postgres-True/block.sql | 11 +++++++++++ .../test_sql/test_sample/postgres-True/row.sql | 11 +++++++++++ .../test_sample/pyspark-False/block.sql | 3 +++ .../test_sql/test_sample/pyspark-False/row.sql | 3 +++ .../test_sample/pyspark-True/block.sql | 9 +++++++++ .../test_sql/test_sample/pyspark-True/row.sql | 9 +++++++++ .../test_sample/snowflake-False/block.sql | 3 +++ .../test_sample/snowflake-False/row.sql | 3 +++ .../test_sample/snowflake-True/block.sql | 9 +++++++++ .../test_sample/snowflake-True/row.sql | 9 +++++++++ .../test_sample/sqlite-False/block.sql | 9 +++++++++ .../test_sql/test_sample/sqlite-False/row.sql | 9 +++++++++ .../test_sql/test_sample/sqlite-True/block.sql | 15 +++++++++++++++ .../test_sql/test_sample/sqlite-True/row.sql | 15 +++++++++++++++ .../test_sql/test_sample/trino-False/block.sql | 3 +++ .../test_sql/test_sample/trino-False/row.sql | 3 +++ .../test_sql/test_sample/trino-True/block.sql | 9 +++++++++ .../test_sql/test_sample/trino-True/row.sql | 9 +++++++++ ibis/backends/tests/test_generic.py | 1 - ibis/backends/tests/test_sql.py | 18 ++++++++++++++++++ ibis/expr/types/relations.py | 13 +++++++------ 71 files changed, 522 insertions(+), 9 deletions(-) create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/bigquery-False/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/bigquery-False/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/bigquery-True/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/bigquery-True/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/clickhouse-False/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/clickhouse-False/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/clickhouse-True/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/clickhouse-True/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/datafusion-False/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/datafusion-False/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/datafusion-True/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/datafusion-True/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/duckdb-False/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/duckdb-False/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/duckdb-True/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/duckdb-True/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/exasol-False/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/exasol-False/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/exasol-True/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/exasol-True/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/flink-False/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/flink-False/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/flink-True/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/flink-True/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/impala-False/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/impala-False/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/impala-True/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/impala-True/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/mssql-False/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/mssql-False/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/mssql-True/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/mssql-True/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/mysql-False/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/mysql-False/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/mysql-True/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/mysql-True/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/oracle-False/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/oracle-False/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/oracle-True/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/oracle-True/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/postgres-False/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/postgres-False/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/postgres-True/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/postgres-True/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/pyspark-False/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/pyspark-False/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/pyspark-True/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/pyspark-True/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/snowflake-False/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/snowflake-False/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/snowflake-True/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/snowflake-True/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/sqlite-False/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/sqlite-False/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/sqlite-True/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/sqlite-True/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/trino-False/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/trino-False/row.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/trino-True/block.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_sample/trino-True/row.sql diff --git a/ibis/backends/sql/compilers/bigquery/__init__.py b/ibis/backends/sql/compilers/bigquery/__init__.py index 0e8f3a7d30178..50100e790f920 100644 --- a/ibis/backends/sql/compilers/bigquery/__init__.py +++ b/ibis/backends/sql/compilers/bigquery/__init__.py @@ -22,6 +22,7 @@ exclude_unsupported_window_frame_from_ops, exclude_unsupported_window_frame_from_rank, exclude_unsupported_window_frame_from_row_number, + lower_sample, split_select_distinct_with_order_by, ) from ibis.common.temporal import DateUnit, IntervalUnit, TimestampUnit, TimeUnit @@ -118,6 +119,14 @@ class BigQueryCompiler(SQLGlotCompiler): supports_qualify = True + LOWERED_OPS = { + ops.Sample: lower_sample( + supports_methods=("block",), + supports_seed=False, + physical_tables_only=True, + ), + } + UNSUPPORTED_OPS = ( ops.DateDiff, ops.ExtractAuthority, diff --git a/ibis/backends/sql/compilers/druid.py b/ibis/backends/sql/compilers/druid.py index 11a5bd536f62b..4e2710b399925 100644 --- a/ibis/backends/sql/compilers/druid.py +++ b/ibis/backends/sql/compilers/druid.py @@ -65,6 +65,7 @@ class DruidCompiler(SQLGlotCompiler): ops.TypeOf, ops.Unnest, ops.Variance, + ops.Sample, ) SIMPLE_OPS = { diff --git a/ibis/backends/sql/compilers/impala.py b/ibis/backends/sql/compilers/impala.py index a21d89c3872b7..eed11e2d83e1a 100644 --- a/ibis/backends/sql/compilers/impala.py +++ b/ibis/backends/sql/compilers/impala.py @@ -10,7 +10,7 @@ from ibis.backends.sql.compilers.base import NULL, STAR, SQLGlotCompiler from ibis.backends.sql.datatypes import ImpalaType from ibis.backends.sql.dialects import Impala -from ibis.backends.sql.rewrites import rewrite_empty_order_by_window +from ibis.backends.sql.rewrites import lower_sample, rewrite_empty_order_by_window class ImpalaCompiler(SQLGlotCompiler): @@ -23,6 +23,12 @@ class ImpalaCompiler(SQLGlotCompiler): *SQLGlotCompiler.rewrites, ) + LOWERED_OPS = { + ops.Sample: lower_sample( + supports_methods=("block",), physical_tables_only=True + ), + } + UNSUPPORTED_OPS = ( ops.ArgMax, ops.ArgMin, diff --git a/ibis/backends/sql/compilers/mssql.py b/ibis/backends/sql/compilers/mssql.py index dbb0e3f9fe2c3..195c7f9c1056f 100644 --- a/ibis/backends/sql/compilers/mssql.py +++ b/ibis/backends/sql/compilers/mssql.py @@ -22,6 +22,7 @@ from ibis.backends.sql.rewrites import ( exclude_unsupported_window_frame_from_ops, exclude_unsupported_window_frame_from_row_number, + lower_sample, p, replace, split_select_distinct_with_order_by, @@ -73,6 +74,12 @@ class MSSQLCompiler(SQLGlotCompiler): post_rewrites = (split_select_distinct_with_order_by,) copy_func_args = True + LOWERED_OPS = { + ops.Sample: lower_sample( + supports_methods=("block",), physical_tables_only=True + ), + } + UNSUPPORTED_OPS = ( ops.ApproxMedian, ops.ArgMax, diff --git a/ibis/backends/sql/compilers/oracle.py b/ibis/backends/sql/compilers/oracle.py index faee258b10e46..737a7515001af 100644 --- a/ibis/backends/sql/compilers/oracle.py +++ b/ibis/backends/sql/compilers/oracle.py @@ -16,6 +16,7 @@ exclude_unsupported_window_frame_from_row_number, lower_log2, lower_log10, + lower_sample, rewrite_empty_order_by_window, ) @@ -46,6 +47,7 @@ class OracleCompiler(SQLGlotCompiler): LOWERED_OPS = { ops.Log2: lower_log2, ops.Log10: lower_log10, + ops.Sample: lower_sample(physical_tables_only=True), } UNSUPPORTED_OPS = ( diff --git a/ibis/backends/sql/compilers/postgres.py b/ibis/backends/sql/compilers/postgres.py index 7197d6fd03df9..ec9f4d3740663 100644 --- a/ibis/backends/sql/compilers/postgres.py +++ b/ibis/backends/sql/compilers/postgres.py @@ -17,7 +17,7 @@ from ibis.backends.sql.compilers.base import NULL, STAR, AggGen, SQLGlotCompiler from ibis.backends.sql.datatypes import PostgresType from ibis.backends.sql.dialects import Postgres -from ibis.backends.sql.rewrites import split_select_distinct_with_order_by +from ibis.backends.sql.rewrites import lower_sample, split_select_distinct_with_order_by from ibis.common.exceptions import InvalidDecoratorError from ibis.util import gen_name @@ -50,6 +50,8 @@ class PostgresCompiler(SQLGlotCompiler): POS_INF = sge.Literal.number("'Inf'::double precision") NEG_INF = sge.Literal.number("'-Inf'::double precision") + LOWERED_OPS = {ops.Sample: lower_sample(physical_tables_only=True)} + UNSUPPORTED_OPS = ( ops.RowID, ops.TimeDelta, diff --git a/ibis/backends/sql/compilers/risingwave.py b/ibis/backends/sql/compilers/risingwave.py index 73f013eeb96ab..c4baf94d6723f 100644 --- a/ibis/backends/sql/compilers/risingwave.py +++ b/ibis/backends/sql/compilers/risingwave.py @@ -26,6 +26,7 @@ class RisingWaveCompiler(PostgresCompiler): ops.RandomUUID, ops.MultiQuantile, ops.ApproxMultiQuantile, + ops.Sample, *( op for op in ALL_OPERATIONS diff --git a/ibis/backends/sql/dialects.py b/ibis/backends/sql/dialects.py index f7a2eb38dca90..8f62c3c57f599 100644 --- a/ibis/backends/sql/dialects.py +++ b/ibis/backends/sql/dialects.py @@ -307,11 +307,22 @@ class Tokenizer(Hive.Tokenizer): STRING_ESCAPES = ["'"] +def tablesample_percent_to_int(self, expr): + """Impala's TABLESAMPLE only supports integer percentages.""" + expr = expr.copy() + expr.args["percent"] = sge.convert(round(float(expr.args["percent"].this))) + return self.tablesample_sql(expr) + + class Impala(Hive): NULL_ORDERING = "nulls_are_large" REGEXP_EXTRACT_DEFAULT_GROUP = 0 + TABLESAMPLE_SIZE_IS_PERCENT = True + ALIAS_POST_TABLESAMPLE = False class Generator(Hive.Generator): + TABLESAMPLE_WITH_METHOD = True + TRANSFORMS = Hive.Generator.TRANSFORMS.copy() | { sge.ApproxDistinct: rename_func("ndv"), sge.IsNan: rename_func("is_nan"), @@ -319,6 +330,7 @@ class Generator(Hive.Generator): sge.DayOfWeek: rename_func("dayofweek"), sge.Interval: lambda self, e: _interval(self, e, quote_arg=False), sge.CurrentDate: rename_func("current_date"), + sge.TableSample: tablesample_percent_to_int, } diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/bigquery-False/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/bigquery-False/block.sql new file mode 100644 index 0000000000000..0f6cc00d35ee3 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/bigquery-False/block.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM `test` AS `t0` TABLESAMPLE system (50.0 PERCENT) \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/bigquery-False/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/bigquery-False/row.sql new file mode 100644 index 0000000000000..41fafb2da62dd --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/bigquery-False/row.sql @@ -0,0 +1,5 @@ +SELECT + * +FROM `test` AS `t0` +WHERE + RAND() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/bigquery-True/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/bigquery-True/block.sql new file mode 100644 index 0000000000000..0e8e7838e323b --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/bigquery-True/block.sql @@ -0,0 +1,11 @@ +SELECT + * +FROM ( + SELECT + * + FROM `test` AS `t0` + WHERE + `t0`.`x` > 10 +) AS `t1` +WHERE + RAND() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/bigquery-True/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/bigquery-True/row.sql new file mode 100644 index 0000000000000..0e8e7838e323b --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/bigquery-True/row.sql @@ -0,0 +1,11 @@ +SELECT + * +FROM ( + SELECT + * + FROM `test` AS `t0` + WHERE + `t0`.`x` > 10 +) AS `t1` +WHERE + RAND() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/clickhouse-False/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/clickhouse-False/block.sql new file mode 100644 index 0000000000000..93ab234de3b5e --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/clickhouse-False/block.sql @@ -0,0 +1,5 @@ +SELECT + * +FROM "test" AS "t0" +WHERE + randCanonical() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/clickhouse-False/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/clickhouse-False/row.sql new file mode 100644 index 0000000000000..93ab234de3b5e --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/clickhouse-False/row.sql @@ -0,0 +1,5 @@ +SELECT + * +FROM "test" AS "t0" +WHERE + randCanonical() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/clickhouse-True/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/clickhouse-True/block.sql new file mode 100644 index 0000000000000..7f4590759a56a --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/clickhouse-True/block.sql @@ -0,0 +1,11 @@ +SELECT + * +FROM ( + SELECT + * + FROM "test" AS "t0" + WHERE + "t0"."x" > 10 +) AS "t1" +WHERE + randCanonical() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/clickhouse-True/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/clickhouse-True/row.sql new file mode 100644 index 0000000000000..7f4590759a56a --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/clickhouse-True/row.sql @@ -0,0 +1,11 @@ +SELECT + * +FROM ( + SELECT + * + FROM "test" AS "t0" + WHERE + "t0"."x" > 10 +) AS "t1" +WHERE + randCanonical() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/datafusion-False/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/datafusion-False/block.sql new file mode 100644 index 0000000000000..f885113afef8f --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/datafusion-False/block.sql @@ -0,0 +1,5 @@ +SELECT + * +FROM "test" AS "t0" +WHERE + RANDOM() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/datafusion-False/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/datafusion-False/row.sql new file mode 100644 index 0000000000000..f885113afef8f --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/datafusion-False/row.sql @@ -0,0 +1,5 @@ +SELECT + * +FROM "test" AS "t0" +WHERE + RANDOM() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/datafusion-True/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/datafusion-True/block.sql new file mode 100644 index 0000000000000..0307d641ffea5 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/datafusion-True/block.sql @@ -0,0 +1,11 @@ +SELECT + * +FROM ( + SELECT + * + FROM "test" AS "t0" + WHERE + "t0"."x" > 10 +) AS "t1" +WHERE + RANDOM() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/datafusion-True/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/datafusion-True/row.sql new file mode 100644 index 0000000000000..0307d641ffea5 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/datafusion-True/row.sql @@ -0,0 +1,11 @@ +SELECT + * +FROM ( + SELECT + * + FROM "test" AS "t0" + WHERE + "t0"."x" > 10 +) AS "t1" +WHERE + RANDOM() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/duckdb-False/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/duckdb-False/block.sql new file mode 100644 index 0000000000000..95f441e596113 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/duckdb-False/block.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM "test" AS "t0" TABLESAMPLE system (50.0 PERCENT) \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/duckdb-False/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/duckdb-False/row.sql new file mode 100644 index 0000000000000..e22aab901495d --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/duckdb-False/row.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM "test" AS "t0" TABLESAMPLE bernoulli (50.0 PERCENT) \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/duckdb-True/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/duckdb-True/block.sql new file mode 100644 index 0000000000000..5b495ba2d0f19 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/duckdb-True/block.sql @@ -0,0 +1,9 @@ +SELECT + * +FROM ( + SELECT + * + FROM "test" AS "t0" + WHERE + "t0"."x" > 10 +) AS "t1" TABLESAMPLE system (50.0 PERCENT) \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/duckdb-True/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/duckdb-True/row.sql new file mode 100644 index 0000000000000..0542a60cfdeca --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/duckdb-True/row.sql @@ -0,0 +1,9 @@ +SELECT + * +FROM ( + SELECT + * + FROM "test" AS "t0" + WHERE + "t0"."x" > 10 +) AS "t1" TABLESAMPLE bernoulli (50.0 PERCENT) \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/exasol-False/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/exasol-False/block.sql new file mode 100644 index 0000000000000..f885113afef8f --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/exasol-False/block.sql @@ -0,0 +1,5 @@ +SELECT + * +FROM "test" AS "t0" +WHERE + RANDOM() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/exasol-False/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/exasol-False/row.sql new file mode 100644 index 0000000000000..f885113afef8f --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/exasol-False/row.sql @@ -0,0 +1,5 @@ +SELECT + * +FROM "test" AS "t0" +WHERE + RANDOM() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/exasol-True/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/exasol-True/block.sql new file mode 100644 index 0000000000000..0307d641ffea5 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/exasol-True/block.sql @@ -0,0 +1,11 @@ +SELECT + * +FROM ( + SELECT + * + FROM "test" AS "t0" + WHERE + "t0"."x" > 10 +) AS "t1" +WHERE + RANDOM() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/exasol-True/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/exasol-True/row.sql new file mode 100644 index 0000000000000..0307d641ffea5 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/exasol-True/row.sql @@ -0,0 +1,11 @@ +SELECT + * +FROM ( + SELECT + * + FROM "test" AS "t0" + WHERE + "t0"."x" > 10 +) AS "t1" +WHERE + RANDOM() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/flink-False/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/flink-False/block.sql new file mode 100644 index 0000000000000..41fafb2da62dd --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/flink-False/block.sql @@ -0,0 +1,5 @@ +SELECT + * +FROM `test` AS `t0` +WHERE + RAND() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/flink-False/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/flink-False/row.sql new file mode 100644 index 0000000000000..41fafb2da62dd --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/flink-False/row.sql @@ -0,0 +1,5 @@ +SELECT + * +FROM `test` AS `t0` +WHERE + RAND() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/flink-True/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/flink-True/block.sql new file mode 100644 index 0000000000000..0e8e7838e323b --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/flink-True/block.sql @@ -0,0 +1,11 @@ +SELECT + * +FROM ( + SELECT + * + FROM `test` AS `t0` + WHERE + `t0`.`x` > 10 +) AS `t1` +WHERE + RAND() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/flink-True/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/flink-True/row.sql new file mode 100644 index 0000000000000..0e8e7838e323b --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/flink-True/row.sql @@ -0,0 +1,11 @@ +SELECT + * +FROM ( + SELECT + * + FROM `test` AS `t0` + WHERE + `t0`.`x` > 10 +) AS `t1` +WHERE + RAND() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/impala-False/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/impala-False/block.sql new file mode 100644 index 0000000000000..83bd5d40d36dc --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/impala-False/block.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM `test` AS `t0` TABLESAMPLE system (50) \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/impala-False/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/impala-False/row.sql new file mode 100644 index 0000000000000..48e9d653b3702 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/impala-False/row.sql @@ -0,0 +1,5 @@ +SELECT + * +FROM `test` AS `t0` +WHERE + RAND(UTC_TO_UNIX_MICROS(UTC_TIMESTAMP())) <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/impala-True/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/impala-True/block.sql new file mode 100644 index 0000000000000..5b84da7919ecb --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/impala-True/block.sql @@ -0,0 +1,11 @@ +SELECT + * +FROM ( + SELECT + * + FROM `test` AS `t0` + WHERE + `t0`.`x` > 10 +) AS `t1` +WHERE + RAND(UTC_TO_UNIX_MICROS(UTC_TIMESTAMP())) <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/impala-True/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/impala-True/row.sql new file mode 100644 index 0000000000000..5b84da7919ecb --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/impala-True/row.sql @@ -0,0 +1,11 @@ +SELECT + * +FROM ( + SELECT + * + FROM `test` AS `t0` + WHERE + `t0`.`x` > 10 +) AS `t1` +WHERE + RAND(UTC_TO_UNIX_MICROS(UTC_TIMESTAMP())) <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/mssql-False/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/mssql-False/block.sql new file mode 100644 index 0000000000000..5427966160cd8 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/mssql-False/block.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM [test] AS [t0] TABLESAMPLE system (50.0 PERCENT) \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/mssql-False/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/mssql-False/row.sql new file mode 100644 index 0000000000000..1624a907034f8 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/mssql-False/row.sql @@ -0,0 +1,6 @@ +SELECT + [t0].[x], + [t0].[y] +FROM [test] AS [t0] +WHERE + RAND(CHECKSUM(NEWID())) <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/mssql-True/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/mssql-True/block.sql new file mode 100644 index 0000000000000..a8b9fc1804149 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/mssql-True/block.sql @@ -0,0 +1,13 @@ +SELECT + [t1].[x], + [t1].[y] +FROM ( + SELECT + [t0].[x], + [t0].[y] + FROM [test] AS [t0] + WHERE + [t0].[x] > 10 +) AS [t1] +WHERE + RAND(CHECKSUM(NEWID())) <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/mssql-True/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/mssql-True/row.sql new file mode 100644 index 0000000000000..a8b9fc1804149 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/mssql-True/row.sql @@ -0,0 +1,13 @@ +SELECT + [t1].[x], + [t1].[y] +FROM ( + SELECT + [t0].[x], + [t0].[y] + FROM [test] AS [t0] + WHERE + [t0].[x] > 10 +) AS [t1] +WHERE + RAND(CHECKSUM(NEWID())) <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/mysql-False/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/mysql-False/block.sql new file mode 100644 index 0000000000000..41fafb2da62dd --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/mysql-False/block.sql @@ -0,0 +1,5 @@ +SELECT + * +FROM `test` AS `t0` +WHERE + RAND() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/mysql-False/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/mysql-False/row.sql new file mode 100644 index 0000000000000..41fafb2da62dd --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/mysql-False/row.sql @@ -0,0 +1,5 @@ +SELECT + * +FROM `test` AS `t0` +WHERE + RAND() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/mysql-True/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/mysql-True/block.sql new file mode 100644 index 0000000000000..0e8e7838e323b --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/mysql-True/block.sql @@ -0,0 +1,11 @@ +SELECT + * +FROM ( + SELECT + * + FROM `test` AS `t0` + WHERE + `t0`.`x` > 10 +) AS `t1` +WHERE + RAND() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/mysql-True/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/mysql-True/row.sql new file mode 100644 index 0000000000000..0e8e7838e323b --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/mysql-True/row.sql @@ -0,0 +1,11 @@ +SELECT + * +FROM ( + SELECT + * + FROM `test` AS `t0` + WHERE + `t0`.`x` > 10 +) AS `t1` +WHERE + RAND() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/oracle-False/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/oracle-False/block.sql new file mode 100644 index 0000000000000..36f81d7681a5f --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/oracle-False/block.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM "test" SAMPLE system (50.0) "t0" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/oracle-False/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/oracle-False/row.sql new file mode 100644 index 0000000000000..860e3a6803388 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/oracle-False/row.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM "test" SAMPLE bernoulli (50.0) "t0" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/oracle-True/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/oracle-True/block.sql new file mode 100644 index 0000000000000..7e85433149478 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/oracle-True/block.sql @@ -0,0 +1,11 @@ +SELECT + * +FROM ( + SELECT + * + FROM "test" "t0" + WHERE + "t0"."x" > 10 +) "t1" +WHERE + DBMS_RANDOM.VALUE() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/oracle-True/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/oracle-True/row.sql new file mode 100644 index 0000000000000..7e85433149478 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/oracle-True/row.sql @@ -0,0 +1,11 @@ +SELECT + * +FROM ( + SELECT + * + FROM "test" "t0" + WHERE + "t0"."x" > 10 +) "t1" +WHERE + DBMS_RANDOM.VALUE() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/postgres-False/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/postgres-False/block.sql new file mode 100644 index 0000000000000..27b5ab3d50468 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/postgres-False/block.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM "test" AS "t0" TABLESAMPLE system (50.0) \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/postgres-False/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/postgres-False/row.sql new file mode 100644 index 0000000000000..72f6ef9781243 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/postgres-False/row.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM "test" AS "t0" TABLESAMPLE bernoulli (50.0) \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/postgres-True/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/postgres-True/block.sql new file mode 100644 index 0000000000000..0307d641ffea5 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/postgres-True/block.sql @@ -0,0 +1,11 @@ +SELECT + * +FROM ( + SELECT + * + FROM "test" AS "t0" + WHERE + "t0"."x" > 10 +) AS "t1" +WHERE + RANDOM() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/postgres-True/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/postgres-True/row.sql new file mode 100644 index 0000000000000..0307d641ffea5 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/postgres-True/row.sql @@ -0,0 +1,11 @@ +SELECT + * +FROM ( + SELECT + * + FROM "test" AS "t0" + WHERE + "t0"."x" > 10 +) AS "t1" +WHERE + RANDOM() <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/pyspark-False/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/pyspark-False/block.sql new file mode 100644 index 0000000000000..c9099652a9141 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/pyspark-False/block.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM `test` TABLESAMPLE (50.0 PERCENT) AS `t0` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/pyspark-False/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/pyspark-False/row.sql new file mode 100644 index 0000000000000..c9099652a9141 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/pyspark-False/row.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM `test` TABLESAMPLE (50.0 PERCENT) AS `t0` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/pyspark-True/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/pyspark-True/block.sql new file mode 100644 index 0000000000000..27955a82792f5 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/pyspark-True/block.sql @@ -0,0 +1,9 @@ +SELECT + * +FROM ( + SELECT + * + FROM `test` AS `t0` + WHERE + `t0`.`x` > 10 +) TABLESAMPLE (50.0 PERCENT) AS `t1` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/pyspark-True/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/pyspark-True/row.sql new file mode 100644 index 0000000000000..27955a82792f5 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/pyspark-True/row.sql @@ -0,0 +1,9 @@ +SELECT + * +FROM ( + SELECT + * + FROM `test` AS `t0` + WHERE + `t0`.`x` > 10 +) TABLESAMPLE (50.0 PERCENT) AS `t1` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/snowflake-False/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/snowflake-False/block.sql new file mode 100644 index 0000000000000..27b5ab3d50468 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/snowflake-False/block.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM "test" AS "t0" TABLESAMPLE system (50.0) \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/snowflake-False/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/snowflake-False/row.sql new file mode 100644 index 0000000000000..72f6ef9781243 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/snowflake-False/row.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM "test" AS "t0" TABLESAMPLE bernoulli (50.0) \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/snowflake-True/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/snowflake-True/block.sql new file mode 100644 index 0000000000000..2c9987d23ddf3 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/snowflake-True/block.sql @@ -0,0 +1,9 @@ +SELECT + * +FROM ( + SELECT + * + FROM "test" AS "t0" + WHERE + "t0"."x" > 10 +) AS "t1" TABLESAMPLE system (50.0) \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/snowflake-True/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/snowflake-True/row.sql new file mode 100644 index 0000000000000..38eb636312777 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/snowflake-True/row.sql @@ -0,0 +1,9 @@ +SELECT + * +FROM ( + SELECT + * + FROM "test" AS "t0" + WHERE + "t0"."x" > 10 +) AS "t1" TABLESAMPLE bernoulli (50.0) \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/sqlite-False/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/sqlite-False/block.sql new file mode 100644 index 0000000000000..929dbdeaf4648 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/sqlite-False/block.sql @@ -0,0 +1,9 @@ +SELECT + * +FROM "test" AS "t0" +WHERE + ( + 0.5 + ( + CAST(RANDOM() AS REAL) / -1.8446744073709552e+19 + ) + ) <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/sqlite-False/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/sqlite-False/row.sql new file mode 100644 index 0000000000000..929dbdeaf4648 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/sqlite-False/row.sql @@ -0,0 +1,9 @@ +SELECT + * +FROM "test" AS "t0" +WHERE + ( + 0.5 + ( + CAST(RANDOM() AS REAL) / -1.8446744073709552e+19 + ) + ) <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/sqlite-True/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/sqlite-True/block.sql new file mode 100644 index 0000000000000..d7cac89b0e389 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/sqlite-True/block.sql @@ -0,0 +1,15 @@ +SELECT + * +FROM ( + SELECT + * + FROM "test" AS "t0" + WHERE + "t0"."x" > 10 +) AS "t1" +WHERE + ( + 0.5 + ( + CAST(RANDOM() AS REAL) / -1.8446744073709552e+19 + ) + ) <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/sqlite-True/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/sqlite-True/row.sql new file mode 100644 index 0000000000000..d7cac89b0e389 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/sqlite-True/row.sql @@ -0,0 +1,15 @@ +SELECT + * +FROM ( + SELECT + * + FROM "test" AS "t0" + WHERE + "t0"."x" > 10 +) AS "t1" +WHERE + ( + 0.5 + ( + CAST(RANDOM() AS REAL) / -1.8446744073709552e+19 + ) + ) <= 0.5 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/trino-False/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/trino-False/block.sql new file mode 100644 index 0000000000000..27b5ab3d50468 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/trino-False/block.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM "test" AS "t0" TABLESAMPLE system (50.0) \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/trino-False/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/trino-False/row.sql new file mode 100644 index 0000000000000..72f6ef9781243 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/trino-False/row.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM "test" AS "t0" TABLESAMPLE bernoulli (50.0) \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/trino-True/block.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/trino-True/block.sql new file mode 100644 index 0000000000000..2c9987d23ddf3 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/trino-True/block.sql @@ -0,0 +1,9 @@ +SELECT + * +FROM ( + SELECT + * + FROM "test" AS "t0" + WHERE + "t0"."x" > 10 +) AS "t1" TABLESAMPLE system (50.0) \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_sample/trino-True/row.sql b/ibis/backends/tests/snapshots/test_sql/test_sample/trino-True/row.sql new file mode 100644 index 0000000000000..38eb636312777 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_sample/trino-True/row.sql @@ -0,0 +1,9 @@ +SELECT + * +FROM ( + SELECT + * + FROM "test" AS "t0" + WHERE + "t0"."x" > 10 +) AS "t1" TABLESAMPLE bernoulli (50.0) \ No newline at end of file diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 71c5b2be0ef62..af8e3017c8898 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -2176,7 +2176,6 @@ def test_sample_memtable(con, backend): "mysql", "oracle", "polars", - "postgres", "risingwave", "sqlite", "trino", diff --git a/ibis/backends/tests/test_sql.py b/ibis/backends/tests/test_sql.py index fe30396c762ee..957eae518ff9f 100644 --- a/ibis/backends/tests/test_sql.py +++ b/ibis/backends/tests/test_sql.py @@ -230,3 +230,21 @@ def test_rewrite_context(snapshot, backend_name): expr = table.select(new_col=ibis.ntile(2).over(order_by=ibis.random())).limit(10) result = ibis.to_sql(expr, dialect=backend_name) snapshot.assert_match(result, "out.sql") + + +@pytest.mark.parametrize("subquery", [False, True]) +@pytest.mark.parametrize("backend_name", _get_backends_to_test()) +@pytest.mark.notimpl(["polars"], raises=ValueError, reason="not a SQL backend") +@pytest.mark.notimpl( + ["druid", "risingwave"], + raises=exc.OperationNotDefinedError, + reason="sample not supported", +) +def test_sample(backend_name, snapshot, subquery): + t = ibis.table({"x": "int64", "y": "int64"}, name="test") + if subquery: + t = t.filter(t.x > 10) + block = ibis.to_sql(t.sample(0.5, method="block"), dialect=backend_name) + row = ibis.to_sql(t.sample(0.5, method="row"), dialect=backend_name) + snapshot.assert_match(block, "block.sql") + snapshot.assert_match(row, "row.sql") diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index 7e58417609ce8..00450a44e837b 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -1224,12 +1224,13 @@ def sample( method The sampling method to use. The default is "row", which includes each row with a probability of `fraction`. If method is "block", - some backends may instead perform sampling a fraction of blocks of - rows (where "block" is a backend dependent definition). This is - identical to "row" for backends lacking a blockwise sampling - implementation. For those coming from SQL, "row" and "block" - correspond to "bernoulli" and "system" respectively in a - TABLESAMPLE clause. + some backends may instead sample a fraction of blocks of rows + (where "block" is a backend dependent definition), which may be + significantly more efficient (at the cost of a less statistically + random sample). This is identical to "row" for backends lacking a + blockwise sampling implementation. For those coming from SQL, "row" + and "block" correspond to "bernoulli" and "system" respectively in + a TABLESAMPLE clause. seed An optional random seed to use, for repeatable sampling. The range of possible seed values is backend specific (most support at least