sql: add transaction tests

erikgrinaker · Jul 22, 2024 · b7a736d · b7a736d
1 parent e04488c
commit b7a736d
Show file tree

Hide file tree

Showing 12 changed files with 391 additions and 32 deletions.
diff --git a/src/sql/mod.rs b/src/sql/mod.rs
@@ -74,34 +74,36 @@ pub mod types;
 
 #[cfg(test)]
 mod tests {
+    use super::engine::{Catalog as _, Session};
+    use super::parser::Parser;
+    use super::planner::{Node, Plan, OPTIMIZERS};
     use crate::encoding::format::{self, Formatter as _};
     use crate::sql::engine::{Engine, Local, StatementResult};
     use crate::sql::planner::{Planner, Scope};
-    use crate::storage::engine::test::{Emit, Mirror, Operation};
+    use crate::storage::engine::test::{self, Emit, Mirror, Operation};
     use crate::storage::{self, Engine as _};
+
     use crossbeam::channel::Receiver;
     use itertools::Itertools as _;
+    use std::collections::HashMap;
     use std::error::Error;
     use std::fmt::Write as _;
     use std::result::Result;
     use test_each_file::test_each_path;
 
-    use super::engine::{Catalog as _, Session};
-    use super::parser::Parser;
-    use super::planner::{Node, Plan, OPTIMIZERS};
-
     // Run goldenscript tests in src/sql/testscripts.
     test_each_path! { in "src/sql/testscripts/optimizer" as optimizer => test_goldenscript }
     test_each_path! { in "src/sql/testscripts/queries" as queries => test_goldenscript }
     test_each_path! { in "src/sql/testscripts/schema" as schema => test_goldenscript }
+    test_each_path! { in "src/sql/testscripts/transactions" as transactions => test_goldenscript }
     test_each_path! { in "src/sql/testscripts/writes" as writes => test_goldenscript }
     test_each_path! { in "src/sql/testscripts/expressions" as expressions => test_goldenscript_expr }
 
+    /// Runs SQL goldenscripts.
     fn test_goldenscript(path: &std::path::Path) {
-        // Since the runner's Session can't reference an Engine stored in the
-        // same struct, we borrow the engine. Use both a BitCask and a Memory
-        // engine, and mirror operations across them. Emit engine operations to
-        // op_rx.
+        // Since the runner's Session can't reference an Engine in the same
+        // struct, borrow the engine. Use both a BitCask and a Memory engine,
+        // and mirror operations across them. Emit engine operations to op_rx.
         let (op_tx, op_rx) = crossbeam::channel::unbounded();
         let tempdir = tempfile::TempDir::with_prefix("toydb").expect("tempdir failed");
         let bitcask =
@@ -113,30 +115,34 @@ mod tests {
         goldenscript::run(&mut runner, path).expect("goldenscript failed")
     }
 
+    /// Runs expression goldenscripts.
     fn test_goldenscript_expr(path: &std::path::Path) {
         goldenscript::run(&mut ExpressionRunner, path).expect("goldenscript failed")
     }
 
-    /// A SQL test runner.
+    /// The SQL test runner.
     struct SQLRunner<'a> {
         engine: &'a TestEngine,
-        session: Session<'a, TestEngine>,
+        sessions: HashMap<String, Session<'a, TestEngine>>,
         op_rx: Receiver<Operation>,
     }
 
-    type TestEngine = Local<Emit<Mirror<storage::BitCask, storage::Memory>>>;
+    type TestEngine = Local<test::Emit<test::Mirror<storage::BitCask, storage::Memory>>>;
 
     impl<'a> SQLRunner<'a> {
         fn new(engine: &'a TestEngine, op_rx: Receiver<Operation>) -> Self {
-            let session = engine.session();
-            Self { engine, session, op_rx }
+            Self { engine, sessions: HashMap::new(), op_rx }
         }
     }
 
     impl<'a> goldenscript::Runner for SQLRunner<'a> {
         fn run(&mut self, command: &goldenscript::Command) -> Result<String, Box<dyn Error>> {
             let mut output = String::new();
 
+            // Obtain a session for the command prefix.
+            let prefix = command.prefix.clone().unwrap_or_default();
+            let session = self.sessions.entry(prefix).or_insert_with(|| self.engine.session());
+
             // Handle runner commands.
             match command.name.as_str() {
                 // dump
@@ -162,11 +168,11 @@ mod tests {
                     args.reject_rest()?;
 
                     let schemas = if tables.is_empty() {
-                        self.session.with_txn(true, |txn| txn.list_tables())?
+                        session.with_txn(true, |txn| txn.list_tables())?
                     } else {
                         tables
                             .into_iter()
-                            .map(|t| self.session.with_txn(true, |txn| txn.must_get_table(&t)))
+                            .map(|t| session.with_txn(true, |txn| txn.must_get_table(&t)))
                             .try_collect()?
                     };
                     return Ok(schemas.into_iter().map(|s| s.to_string()).join("\n"));
@@ -184,15 +190,15 @@ mod tests {
             let mut tags = command.tags.clone();
 
             // Execute the statement.
-            let result = self.session.execute(input)?;
+            let result = session.execute(input)?;
 
             // Output optimizations if requested.
             if tags.remove("opt") {
                 if tags.contains("plan") {
                     return Err("no point using both plan and opt".into());
                 }
                 let ast = Parser::new(input).parse()?;
-                let plan = self.session.with_txn(true, |txn| Planner::new(txn).build(ast))?;
+                let plan = session.with_txn(true, |txn| Planner::new(txn).build(ast))?;
                 let Plan::Select(mut root) = plan else {
                     return Err("can only use opt with SELECT plans".into());
                 };
@@ -211,23 +217,31 @@ mod tests {
             // Output the plan if requested.
             if tags.remove("plan") {
                 let query = format!("EXPLAIN {input}");
-                let StatementResult::Explain(plan) = self.session.execute(&query)? else {
+                let StatementResult::Explain(plan) = session.execute(&query)? else {
                     return Err("unexpected explain response".into());
                 };
                 writeln!(output, "{plan}")?;
             }
 
-            // Output the result if requested. SELECT results are always output,
-            // but the column only if result is given.
-            if let StatementResult::Select { columns, rows } = result {
-                if tags.remove("header") {
-                    writeln!(output, "{}", columns.into_iter().map(|c| c.to_string()).join(", "))?;
+            // Output the result if requested. SELECT results are always output.
+            let show_result = tags.remove("result");
+            match result {
+                StatementResult::Select { columns, rows } => {
+                    if tags.remove("header") {
+                        writeln!(output, "{}", columns.into_iter().join(", "))?;
+                    }
+                    for row in rows {
+                        writeln!(output, "{}", row.into_iter().join(", "))?;
+                    }
                 }
-                for row in rows {
-                    writeln!(output, "{}", row.into_iter().map(|v| v.to_string()).join(", "))?;
+                StatementResult::Begin { state } if show_result => {
+                    let version = state.version;
+                    let kind = if state.read_only { "read-only" } else { "read-write" };
+                    let active = state.active.iter().join(",");
+                    writeln!(output, "v{version} {kind} active={{{active}}}")?;
                 }
-            } else if tags.remove("result") {
-                writeln!(output, "{result:?}")?;
+                result if show_result => writeln!(output, "{result:?}")?,
+                _ => {}
             }
 
             // Output engine ops if requested.

diff --git a/src/sql/testscripts/transactions/anomaly_dirty_read b/src/sql/testscripts/transactions/anomaly_dirty_read
@@ -0,0 +1,16 @@
+# A dirty read is when c2 can read an uncommitted value set by c1. Snapshot
+# isolation prevents this.
+
+> CREATE TABLE test (id INT PRIMARY KEY, value STRING)
+---
+ok
+
+c1:> BEGIN
+c1:> INSERT INTO test VALUES (1, 'a')
+---
+ok
+
+c2:> BEGIN
+c2:> SELECT * FROM test WHERE id = 1
+---
+ok
diff --git a/src/sql/testscripts/transactions/anomaly_dirty_write b/src/sql/testscripts/transactions/anomaly_dirty_write
@@ -0,0 +1,16 @@
+# A dirty write is when c2 overwrites an uncommitted value written by c1.
+# Snapshot isolation prevents this.
+
+> CREATE TABLE test (id INT PRIMARY KEY, value STRING)
+---
+ok
+
+c1:> BEGIN
+c1:> INSERT INTO test VALUES (1, 'a')
+---
+ok
+
+c2:> BEGIN
+c2:!> INSERT INTO test VALUES (1, 'a')
+---
+c2: Error: serialization failure, retry transaction
diff --git a/src/sql/testscripts/transactions/anomaly_fuzzy_read b/src/sql/testscripts/transactions/anomaly_fuzzy_read
@@ -0,0 +1,26 @@
+# A fuzzy (or unrepeatable) read is when c2 sees a value change after c1
+# updates it. Snapshot isolation prevents this.
+
+> CREATE TABLE test (id INT PRIMARY KEY, value STRING)
+> INSERT INTO test VALUES (1, 'a')
+---
+ok
+
+c1:> BEGIN
+c2:> BEGIN
+---
+ok
+
+c2:> SELECT * FROM test WHERE id = 1
+---
+c2: 1, 'a'
+
+c1:> UPDATE test SET value = 'b' WHERE id = 1
+c1:> COMMIT
+c1:> SELECT * FROM test
+---
+c1: 1, 'b'
+
+c2:> SELECT * FROM test WHERE id = 1
+---
+c2: 1, 'a'
diff --git a/src/sql/testscripts/transactions/anomaly_lost_update b/src/sql/testscripts/transactions/anomaly_lost_update
@@ -0,0 +1,26 @@
+# A lost update is when c1 and c2 both read a value and update it, where
+# c2's update replaces c1. Snapshot isolation prevents this.
+
+> CREATE TABLE test (id INT PRIMARY KEY, value STRING)
+---
+ok
+
+
+c1:> BEGIN
+c1:> SELECT * FROM test WHERE id = 1
+---
+ok
+
+c2:> BEGIN
+c2:> SELECT * FROM test WHERE id = 1
+---
+ok
+
+c1:> INSERT INTO test VALUES (1, 'a')
+c1:> COMMIT
+---
+ok
+
+c2:!> INSERT INTO test VALUES (1, 'a')
+---
+c2: Error: serialization failure, retry transaction
diff --git a/src/sql/testscripts/transactions/anomaly_phantom_read b/src/sql/testscripts/transactions/anomaly_phantom_read
@@ -0,0 +1,29 @@
+# A phantom read is when t1 reads entries matching some predicate, but a
+# modification by t2 changes which entries match the predicate such that a later
+# read by t1 returns them. Snapshot isolation prevents this.
+
+> CREATE TABLE test (id INT PRIMARY KEY, value STRING)
+> INSERT INTO test VALUES (1, 'a'), (2, 'b'), (3, 'c')
+---
+ok
+
+c1:> BEGIN
+c2:> BEGIN
+---
+ok
+
+c1:> SELECT * FROM test WHERE id > 1
+---
+c1: 2, 'b'
+c1: 3, 'c'
+
+c2:> DELETE FROM test WHERE id = 2
+c2:> INSERT INTO test VALUES (4, 'd')
+c2:> COMMIT
+---
+ok
+
+c1:> SELECT * FROM test WHERE id > 1
+---
+c1: 2, 'b'
+c1: 3, 'c'
diff --git a/src/sql/testscripts/transactions/anomaly_read_skew b/src/sql/testscripts/transactions/anomaly_read_skew
@@ -0,0 +1,26 @@
+# Read skew is when c1 reads a and b, but c2 modifies b in between the
+# reads. Snapshot isolation prevents this.
+
+> CREATE TABLE test (id INT PRIMARY KEY, value STRING)
+> INSERT INTO test VALUES (1, 'a'), (2, 'b')
+---
+ok
+
+c1:> BEGIN
+c2:> BEGIN
+---
+ok
+
+c1:> SELECT * FROM test WHERE id = 1
+---
+c1: 1, 'a'
+
+c2:> UPDATE test SET value = 'b' WHERE id = 1
+c2:> UPDATE test SET value = 'a' WHERE id = 2
+c2:> COMMIT
+---
+ok
+
+c1:> SELECT * FROM test WHERE id = 2
+---
+c1: 2, 'b'
diff --git a/src/sql/testscripts/transactions/anomaly_write_skew b/src/sql/testscripts/transactions/anomaly_write_skew
@@ -0,0 +1,35 @@
+# Write skew is when c1 reads a and writes it to b while c2 reads b and writes
+# it to a. Snapshot isolation does not prevent this, which is expected, so we
+# assert the anomalous behavior. Fixing this would require implementing
+# serializable snapshot isolation.
+
+> CREATE TABLE test (id INT PRIMARY KEY, value STRING)
+> INSERT INTO test VALUES (1, 'a'), (2, 'b')
+---
+ok
+
+c1:> BEGIN
+c2:> BEGIN
+---
+ok
+
+c1:> SELECT * FROM test WHERE id = 1
+c2:> SELECT * FROM test WHERE id = 2
+---
+c1: 1, 'a'
+c2: 2, 'b'
+
+c1:> UPDATE test SET value = 'a' WHERE id = 2
+c2:> UPDATE test SET value = 'b' WHERE id = 1
+---
+ok
+
+c1:> COMMIT
+c2:> COMMIT
+---
+ok
+
+> SELECT * FROM test
+---
+1, 'b'
+2, 'a'