From 0e3600fea62269ae7ea457198b9a89440c0f3658 Mon Sep 17 00:00:00 2001 From: Ante Kresic Date: Wed, 11 Dec 2024 15:38:15 +0100 Subject: [PATCH] Always use index for insert conflict resolution During insert conflict resolution, we find the index on the compressed chunk based on the unique constraint on the chunk. However, we can/should always use the index since we have all the index column values in the slot thats being inserted so create scan keys until we find a column that doesn't exist in the uncompressed chunk. --- .unreleased/pr_7529 | 1 + tsl/src/compression/compression_dml.h | 2 +- tsl/src/compression/compression_scankey.c | 10 +- tsl/test/expected/compression_conflicts.out | 125 ++++++++++++++------ tsl/test/sql/compression_conflicts.sql | 49 +++++++- 5 files changed, 148 insertions(+), 39 deletions(-) create mode 100644 .unreleased/pr_7529 diff --git a/.unreleased/pr_7529 b/.unreleased/pr_7529 new file mode 100644 index 00000000000..e2083c88192 --- /dev/null +++ b/.unreleased/pr_7529 @@ -0,0 +1 @@ +Fixes: #7529 Always use index for insert conflict resolution on compressed chunks diff --git a/tsl/src/compression/compression_dml.h b/tsl/src/compression/compression_dml.h index e59fef8bc47..71033750de6 100644 --- a/tsl/src/compression/compression_dml.h +++ b/tsl/src/compression/compression_dml.h @@ -36,7 +36,7 @@ ScanKeyData *build_mem_scankeys_from_slot(Oid ht_relid, CompressionSettings *set TupleTableSlot *slot, int *num_scankeys); ScanKeyData *build_index_scankeys(Relation index_rel, List *index_filters, int *num_scankeys); ScanKeyData *build_index_scankeys_using_slot(Oid hypertable_relid, Relation in_rel, - Relation out_rel, Bitmapset *key_columns, + Relation out_rel, Bitmapset *constraint_columns, TupleTableSlot *slot, Relation *result_index_rel, Bitmapset **index_columns, int *num_scan_keys); ScanKeyData *build_heap_scankeys(Oid hypertable_relid, Relation in_rel, Relation out_rel, diff --git a/tsl/src/compression/compression_scankey.c b/tsl/src/compression/compression_scankey.c index e8b8acd2b4e..08af702b6dc 100644 --- a/tsl/src/compression/compression_scankey.c +++ b/tsl/src/compression/compression_scankey.c @@ -269,7 +269,7 @@ build_index_scankeys(Relation index_rel, List *index_filters, int *num_scankeys) */ ScanKeyData * build_index_scankeys_using_slot(Oid hypertable_relid, Relation in_rel, Relation out_rel, - Bitmapset *key_columns, TupleTableSlot *slot, + Bitmapset *constraint_columns, TupleTableSlot *slot, Relation *result_index_rel, Bitmapset **index_columns, int *num_scan_keys) { @@ -322,10 +322,12 @@ build_index_scankeys_using_slot(Oid hypertable_relid, Relation in_rel, Relation const NameData *attname = attnumAttName(in_rel, in_attnum); AttrNumber column_attno = get_attnum(out_rel->rd_id, NameStr(*attname)); - /* Make sure we find columns in key columns in order to select the right index */ - if (!bms_is_member(column_attno, key_columns)) + /* Make sure we find columns in key columns in order to select the right index + * We skip over any non-constraint columns + */ + if (!bms_is_member(column_attno, constraint_columns)) { - break; + continue; } bool isnull; diff --git a/tsl/test/expected/compression_conflicts.out b/tsl/test/expected/compression_conflicts.out index f5f1a7e6151..6f05d93e555 100644 --- a/tsl/test/expected/compression_conflicts.out +++ b/tsl/test/expected/compression_conflicts.out @@ -289,7 +289,7 @@ BEGIN; DROP INDEX _timescaledb_internal.compress_hyper_6_6_chunk_device_label__ts_meta_min_1__ts_me_idx; CREATE INDEX covering_index ON _timescaledb_internal.compress_hyper_6_6_chunk (device, _ts_meta_min_1 DESC, _ts_meta_max_1 DESC, label); INSERT INTO comp_conflicts_3 VALUES ('2020-01-01','d1', 'label', 0.1); -INFO: Using index scan with scan keys: index 1, heap 3, memory 1. +INFO: Using index scan with scan keys: index 2, heap 2, memory 1. ERROR: duplicate key value violates unique constraint "5_3_comp_conflicts_3_time_device_label_key" ROLLBACK; -- ignore expression index @@ -564,6 +564,74 @@ SELECT count(*) FROM ONLY :CHUNK; 0 (1 row) +-- test conflict handling on compressed hypertables with unique constraints +set timescaledb.debug_compression_path_info to on; +-- test 5: multi-column primary key with partial segmentby coverage +-- we should be using the index scan in every conflict resolution case +CREATE TABLE comp_conflicts_5(time timestamptz NOT NULL, device text, label text DEFAULT 'label', value float, UNIQUE(time, label)); +SELECT table_name FROM create_hypertable('comp_conflicts_5','time'); + table_name +------------------ + comp_conflicts_5 +(1 row) + +ALTER TABLE comp_conflicts_5 SET (timescaledb.compress,timescaledb.compress_segmentby='device, label'); +NOTICE: default order by for hypertable "comp_conflicts_5" is set to ""time" DESC" +-- implicitly create chunk +INSERT INTO comp_conflicts_5 VALUES ('2020-01-01','d1', 'label1', 0.1); +INSERT INTO comp_conflicts_5 VALUES ('2020-01-01','d2', 'label2', 0.2); +INSERT INTO comp_conflicts_5 VALUES ('2020-01-01',NULL, 'label3', 0.3); +SELECT compress_chunk(c) AS "CHUNK" FROM show_chunks('comp_conflicts_5') c +\gset +INFO: using tuplesort to scan rows from "_hyper_9_9_chunk" for compression +-- after compression no data should be in uncompressed chunk +SELECT count(*) FROM ONLY :CHUNK; + count +------- + 0 +(1 row) + +-- should fail due to multiple entries with same time, device value +\set ON_ERROR_STOP 0 +INSERT INTO comp_conflicts_5 VALUES ('2020-01-01','d1', 'label1', 0.1); +INFO: Using index scan with scan keys: index 1, heap 2, memory 1. +ERROR: duplicate key value violates unique constraint "9_5_comp_conflicts_5_time_label_key" +INSERT INTO comp_conflicts_5 VALUES ('2020-01-01','d2', 'label2', 0.2); +INFO: Using index scan with scan keys: index 1, heap 2, memory 1. +ERROR: duplicate key value violates unique constraint "9_5_comp_conflicts_5_time_label_key" +INSERT INTO comp_conflicts_5 VALUES +('2020-01-01','d1', 'label', 0.1), +('2020-01-01','d2', 'label', 0.2), +('2020-01-01','d3', 'label', 0.3); +INFO: Using index scan with scan keys: index 1, heap 2, memory 1. +INFO: Number of compressed rows fetched from index: 0. Number of compressed rows filtered by heap filters: 0. +INFO: Using index scan with scan keys: index 1, heap 2, memory 1. +INFO: Number of compressed rows fetched from index: 0. Number of compressed rows filtered by heap filters: 0. +ERROR: duplicate key value violates unique constraint "9_5_comp_conflicts_5_time_label_key" +-- should work the same without the index present +BEGIN; + DROP INDEX _timescaledb_internal.compress_hyper_10_10_chunk_device_label__ts_meta_min_1__ts__idx; + INSERT INTO comp_conflicts_5 VALUES ('2020-01-01','d1', 'label1', 0.1); +INFO: Using table scan with scan keys: index 0, heap 3, memory 1. +ERROR: duplicate key value violates unique constraint "9_5_comp_conflicts_5_time_label_key" +ROLLBACK; +BEGIN; + DROP INDEX _timescaledb_internal.compress_hyper_10_10_chunk_device_label__ts_meta_min_1__ts__idx; + INSERT INTO comp_conflicts_5 VALUES ('2020-01-01','d2', 'label2', 0.2); +INFO: Using table scan with scan keys: index 0, heap 3, memory 1. +ERROR: duplicate key value violates unique constraint "9_5_comp_conflicts_5_time_label_key" +ROLLBACK; +BEGIN; + DROP INDEX _timescaledb_internal.compress_hyper_10_10_chunk_device_label__ts_meta_min_1__ts__idx; + INSERT INTO comp_conflicts_5 VALUES + ('2020-01-01','d1', 'label1', 0.1), + ('2020-01-01','d2', 'label2', 0.2), + ('2020-01-01','d3', 'label3', 0.3); +INFO: Using table scan with scan keys: index 0, heap 3, memory 1. +ERROR: duplicate key value violates unique constraint "9_5_comp_conflicts_5_time_label_key" +ROLLBACK; +\set ON_ERROR_STOP 1 +reset timescaledb.debug_compression_path_info; CREATE OR REPLACE VIEW compressed_chunk_info_view AS SELECT h.schema_name AS hypertable_schema, @@ -591,7 +659,7 @@ SELECT * FROM create_hypertable('compressed_ht', 'time', WARNING: column type "character varying" used for "name" does not follow best practices hypertable_id | schema_name | table_name | created ---------------+-------------+---------------+--------- - 9 | public | compressed_ht | t + 11 | public | compressed_ht | t (1 row) -- create chunk 1 @@ -611,14 +679,11 @@ ALTER TABLE compressed_ht SET ( ); NOTICE: default order by for hypertable "compressed_ht" is set to ""time" DESC" SELECT COMPRESS_CHUNK(SHOW_CHUNKS('compressed_ht')); -INFO: using tuplesort to scan rows from "_hyper_9_9_chunk" for compression -INFO: using tuplesort to scan rows from "_hyper_9_10_chunk" for compression -INFO: using tuplesort to scan rows from "_hyper_9_11_chunk" for compression - compress_chunk ------------------------------------------ - _timescaledb_internal._hyper_9_9_chunk - _timescaledb_internal._hyper_9_10_chunk - _timescaledb_internal._hyper_9_11_chunk + compress_chunk +------------------------------------------ + _timescaledb_internal._hyper_11_11_chunk + _timescaledb_internal._hyper_11_12_chunk + _timescaledb_internal._hyper_11_13_chunk (3 rows) -- check compression status @@ -626,11 +691,11 @@ SELECT chunk_status, chunk_name as "CHUNK_NAME" FROM compressed_chunk_info_view WHERE hypertable_name = 'compressed_ht' ORDER BY chunk_name; - chunk_status | CHUNK_NAME ---------------+------------------- - 1 | _hyper_9_10_chunk - 1 | _hyper_9_11_chunk - 1 | _hyper_9_9_chunk + chunk_status | CHUNK_NAME +--------------+-------------------- + 1 | _hyper_11_11_chunk + 1 | _hyper_11_12_chunk + 1 | _hyper_11_13_chunk (3 rows) -- should report 0 row @@ -643,8 +708,6 @@ SELECT COUNT(*) FROM compressed_ht WHERE name = 'ON CONFLICT DO UPDATE'; INSERT INTO compressed_ht VALUES ('2017-12-28 01:10:28.192199+05:30', '1', 0.876, 4.123, 'new insert row') ON conflict(sensor_id, time) DO UPDATE SET sensor_id = excluded.sensor_id , name = 'ON CONFLICT DO UPDATE'; -INFO: Using index scan with scan keys: index 1, heap 2, memory 1. -INFO: Number of compressed rows fetched from index: 1. Number of compressed rows filtered by heap filters: 0. -- should report 1 row SELECT COUNT(*) FROM compressed_ht WHERE name = 'ON CONFLICT DO UPDATE'; count @@ -657,18 +720,16 @@ SELECT chunk_status, chunk_name as "CHUNK_NAME" FROM compressed_chunk_info_view WHERE hypertable_name = 'compressed_ht' ORDER BY chunk_name; - chunk_status | CHUNK_NAME ---------------+------------------- - 1 | _hyper_9_10_chunk - 1 | _hyper_9_11_chunk - 9 | _hyper_9_9_chunk + chunk_status | CHUNK_NAME +--------------+-------------------- + 9 | _hyper_11_11_chunk + 1 | _hyper_11_12_chunk + 1 | _hyper_11_13_chunk (3 rows) INSERT INTO compressed_ht VALUES ('2022-01-24 01:10:28.192199+05:30', '6', 0.876, 4.123, 'new insert row') ON conflict(sensor_id, time) DO UPDATE SET sensor_id = excluded.sensor_id , name = 'ON CONFLICT DO UPDATE' RETURNING *; -INFO: Using index scan with scan keys: index 1, heap 2, memory 1. -INFO: Number of compressed rows fetched from index: 1. Number of compressed rows filtered by heap filters: 0. time | sensor_id | cpu | temperature | name -------------------------------------+-----------+-------+-------------+----------------------- Sun Jan 23 11:40:28.192199 2022 PST | 6 | 0.876 | 4.123 | ON CONFLICT DO UPDATE @@ -679,11 +740,11 @@ SELECT chunk_status, chunk_name as "CHUNK_NAME" FROM compressed_chunk_info_view WHERE hypertable_name = 'compressed_ht' ORDER BY chunk_name; - chunk_status | CHUNK_NAME ---------------+------------------- - 1 | _hyper_9_10_chunk - 9 | _hyper_9_11_chunk - 9 | _hyper_9_9_chunk + chunk_status | CHUNK_NAME +--------------+-------------------- + 9 | _hyper_11_11_chunk + 1 | _hyper_11_12_chunk + 9 | _hyper_11_13_chunk (3 rows) -- test for disabling DML decompression @@ -719,7 +780,7 @@ CREATE TABLE test_collation ( SELECT create_hypertable('test_collation', 'time', chunk_time_interval => 2419200000); create_hypertable ------------------------------ - (11,public,test_collation,t) + (13,public,test_collation,t) (1 row) ALTER TABLE test_collation @@ -735,10 +796,9 @@ VALUES (1609478100000, 41, 'val1') ON CONFLICT DO NOTHING; SELECT compress_chunk(ch) FROM show_chunks('test_collation') ch; -INFO: using tuplesort to scan rows from "_hyper_11_15_chunk" for compression compress_chunk ------------------------------------------ - _timescaledb_internal._hyper_11_15_chunk + _timescaledb_internal._hyper_13_17_chunk (1 row) INSERT INTO "test_collation" @@ -747,4 +807,3 @@ VALUES (41, 1609477200000, 'val1'), (41, 1609478100000, 'val1') ON CONFLICT DO NOTHING; -INFO: Using index scan with scan keys: index 1, heap 4, memory 2. diff --git a/tsl/test/sql/compression_conflicts.sql b/tsl/test/sql/compression_conflicts.sql index a54b9c67ede..976a6db9f39 100644 --- a/tsl/test/sql/compression_conflicts.sql +++ b/tsl/test/sql/compression_conflicts.sql @@ -385,11 +385,58 @@ SELECT count(*) FROM ONLY :CHUNK; INSERT INTO comp_conflicts_4 VALUES ('2020-01-01 0:00:01','d1',0.1) ON CONFLICT DO NOTHING; INSERT INTO comp_conflicts_4 VALUES ('2020-01-01 0:30:00','d1',0.1) ON CONFLICT DO NOTHING; - -- data should have move into uncompressed chunk for conflict check -- 2 segments (count = 2000) SELECT count(*) FROM ONLY :CHUNK; +-- test conflict handling on compressed hypertables with unique constraints +set timescaledb.debug_compression_path_info to on; +-- test 5: multi-column primary key with partial segmentby coverage +-- we should be using the index scan in every conflict resolution case +CREATE TABLE comp_conflicts_5(time timestamptz NOT NULL, device text, label text DEFAULT 'label', value float, UNIQUE(time, label)); + +SELECT table_name FROM create_hypertable('comp_conflicts_5','time'); +ALTER TABLE comp_conflicts_5 SET (timescaledb.compress,timescaledb.compress_segmentby='device, label'); + +-- implicitly create chunk +INSERT INTO comp_conflicts_5 VALUES ('2020-01-01','d1', 'label1', 0.1); +INSERT INTO comp_conflicts_5 VALUES ('2020-01-01','d2', 'label2', 0.2); +INSERT INTO comp_conflicts_5 VALUES ('2020-01-01',NULL, 'label3', 0.3); + +SELECT compress_chunk(c) AS "CHUNK" FROM show_chunks('comp_conflicts_5') c +\gset + +-- after compression no data should be in uncompressed chunk +SELECT count(*) FROM ONLY :CHUNK; + +-- should fail due to multiple entries with same time, device value +\set ON_ERROR_STOP 0 +INSERT INTO comp_conflicts_5 VALUES ('2020-01-01','d1', 'label1', 0.1); +INSERT INTO comp_conflicts_5 VALUES ('2020-01-01','d2', 'label2', 0.2); +INSERT INTO comp_conflicts_5 VALUES +('2020-01-01','d1', 'label', 0.1), +('2020-01-01','d2', 'label', 0.2), +('2020-01-01','d3', 'label', 0.3); +-- should work the same without the index present +BEGIN; + DROP INDEX _timescaledb_internal.compress_hyper_10_10_chunk_device_label__ts_meta_min_1__ts__idx; + INSERT INTO comp_conflicts_5 VALUES ('2020-01-01','d1', 'label1', 0.1); +ROLLBACK; +BEGIN; + DROP INDEX _timescaledb_internal.compress_hyper_10_10_chunk_device_label__ts_meta_min_1__ts__idx; + INSERT INTO comp_conflicts_5 VALUES ('2020-01-01','d2', 'label2', 0.2); +ROLLBACK; +BEGIN; + DROP INDEX _timescaledb_internal.compress_hyper_10_10_chunk_device_label__ts_meta_min_1__ts__idx; + INSERT INTO comp_conflicts_5 VALUES + ('2020-01-01','d1', 'label1', 0.1), + ('2020-01-01','d2', 'label2', 0.2), + ('2020-01-01','d3', 'label3', 0.3); +ROLLBACK; +\set ON_ERROR_STOP 1 +reset timescaledb.debug_compression_path_info; + + CREATE OR REPLACE VIEW compressed_chunk_info_view AS SELECT h.schema_name AS hypertable_schema,