Skip to content

Commit

Permalink
sql: add transaction tests
Browse files Browse the repository at this point in the history
  • Loading branch information
erikgrinaker committed Jul 22, 2024
1 parent e04488c commit b7a736d
Show file tree
Hide file tree
Showing 12 changed files with 391 additions and 32 deletions.
70 changes: 42 additions & 28 deletions src/sql/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,34 +74,36 @@ pub mod types;

#[cfg(test)]
mod tests {
use super::engine::{Catalog as _, Session};
use super::parser::Parser;
use super::planner::{Node, Plan, OPTIMIZERS};
use crate::encoding::format::{self, Formatter as _};
use crate::sql::engine::{Engine, Local, StatementResult};
use crate::sql::planner::{Planner, Scope};
use crate::storage::engine::test::{Emit, Mirror, Operation};
use crate::storage::engine::test::{self, Emit, Mirror, Operation};
use crate::storage::{self, Engine as _};

use crossbeam::channel::Receiver;
use itertools::Itertools as _;
use std::collections::HashMap;
use std::error::Error;
use std::fmt::Write as _;
use std::result::Result;
use test_each_file::test_each_path;

use super::engine::{Catalog as _, Session};
use super::parser::Parser;
use super::planner::{Node, Plan, OPTIMIZERS};

// Run goldenscript tests in src/sql/testscripts.
test_each_path! { in "src/sql/testscripts/optimizer" as optimizer => test_goldenscript }
test_each_path! { in "src/sql/testscripts/queries" as queries => test_goldenscript }
test_each_path! { in "src/sql/testscripts/schema" as schema => test_goldenscript }
test_each_path! { in "src/sql/testscripts/transactions" as transactions => test_goldenscript }
test_each_path! { in "src/sql/testscripts/writes" as writes => test_goldenscript }
test_each_path! { in "src/sql/testscripts/expressions" as expressions => test_goldenscript_expr }

/// Runs SQL goldenscripts.
fn test_goldenscript(path: &std::path::Path) {
// Since the runner's Session can't reference an Engine stored in the
// same struct, we borrow the engine. Use both a BitCask and a Memory
// engine, and mirror operations across them. Emit engine operations to
// op_rx.
// Since the runner's Session can't reference an Engine in the same
// struct, borrow the engine. Use both a BitCask and a Memory engine,
// and mirror operations across them. Emit engine operations to op_rx.
let (op_tx, op_rx) = crossbeam::channel::unbounded();
let tempdir = tempfile::TempDir::with_prefix("toydb").expect("tempdir failed");
let bitcask =
Expand All @@ -113,30 +115,34 @@ mod tests {
goldenscript::run(&mut runner, path).expect("goldenscript failed")
}

/// Runs expression goldenscripts.
fn test_goldenscript_expr(path: &std::path::Path) {
goldenscript::run(&mut ExpressionRunner, path).expect("goldenscript failed")
}

/// A SQL test runner.
/// The SQL test runner.
struct SQLRunner<'a> {
engine: &'a TestEngine,
session: Session<'a, TestEngine>,
sessions: HashMap<String, Session<'a, TestEngine>>,
op_rx: Receiver<Operation>,
}

type TestEngine = Local<Emit<Mirror<storage::BitCask, storage::Memory>>>;
type TestEngine = Local<test::Emit<test::Mirror<storage::BitCask, storage::Memory>>>;

impl<'a> SQLRunner<'a> {
fn new(engine: &'a TestEngine, op_rx: Receiver<Operation>) -> Self {
let session = engine.session();
Self { engine, session, op_rx }
Self { engine, sessions: HashMap::new(), op_rx }
}
}

impl<'a> goldenscript::Runner for SQLRunner<'a> {
fn run(&mut self, command: &goldenscript::Command) -> Result<String, Box<dyn Error>> {
let mut output = String::new();

// Obtain a session for the command prefix.
let prefix = command.prefix.clone().unwrap_or_default();
let session = self.sessions.entry(prefix).or_insert_with(|| self.engine.session());

// Handle runner commands.
match command.name.as_str() {
// dump
Expand All @@ -162,11 +168,11 @@ mod tests {
args.reject_rest()?;

let schemas = if tables.is_empty() {
self.session.with_txn(true, |txn| txn.list_tables())?
session.with_txn(true, |txn| txn.list_tables())?
} else {
tables
.into_iter()
.map(|t| self.session.with_txn(true, |txn| txn.must_get_table(&t)))
.map(|t| session.with_txn(true, |txn| txn.must_get_table(&t)))
.try_collect()?
};
return Ok(schemas.into_iter().map(|s| s.to_string()).join("\n"));
Expand All @@ -184,15 +190,15 @@ mod tests {
let mut tags = command.tags.clone();

// Execute the statement.
let result = self.session.execute(input)?;
let result = session.execute(input)?;

// Output optimizations if requested.
if tags.remove("opt") {
if tags.contains("plan") {
return Err("no point using both plan and opt".into());
}
let ast = Parser::new(input).parse()?;
let plan = self.session.with_txn(true, |txn| Planner::new(txn).build(ast))?;
let plan = session.with_txn(true, |txn| Planner::new(txn).build(ast))?;
let Plan::Select(mut root) = plan else {
return Err("can only use opt with SELECT plans".into());
};
Expand All @@ -211,23 +217,31 @@ mod tests {
// Output the plan if requested.
if tags.remove("plan") {
let query = format!("EXPLAIN {input}");
let StatementResult::Explain(plan) = self.session.execute(&query)? else {
let StatementResult::Explain(plan) = session.execute(&query)? else {
return Err("unexpected explain response".into());
};
writeln!(output, "{plan}")?;
}

// Output the result if requested. SELECT results are always output,
// but the column only if result is given.
if let StatementResult::Select { columns, rows } = result {
if tags.remove("header") {
writeln!(output, "{}", columns.into_iter().map(|c| c.to_string()).join(", "))?;
// Output the result if requested. SELECT results are always output.
let show_result = tags.remove("result");
match result {
StatementResult::Select { columns, rows } => {
if tags.remove("header") {
writeln!(output, "{}", columns.into_iter().join(", "))?;
}
for row in rows {
writeln!(output, "{}", row.into_iter().join(", "))?;
}
}
for row in rows {
writeln!(output, "{}", row.into_iter().map(|v| v.to_string()).join(", "))?;
StatementResult::Begin { state } if show_result => {
let version = state.version;
let kind = if state.read_only { "read-only" } else { "read-write" };
let active = state.active.iter().join(",");
writeln!(output, "v{version} {kind} active={{{active}}}")?;
}
} else if tags.remove("result") {
writeln!(output, "{result:?}")?;
result if show_result => writeln!(output, "{result:?}")?,
_ => {}
}

// Output engine ops if requested.
Expand Down
16 changes: 16 additions & 0 deletions src/sql/testscripts/transactions/anomaly_dirty_read
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# A dirty read is when c2 can read an uncommitted value set by c1. Snapshot
# isolation prevents this.

> CREATE TABLE test (id INT PRIMARY KEY, value STRING)
---
ok

c1:> BEGIN
c1:> INSERT INTO test VALUES (1, 'a')
---
ok

c2:> BEGIN
c2:> SELECT * FROM test WHERE id = 1
---
ok
16 changes: 16 additions & 0 deletions src/sql/testscripts/transactions/anomaly_dirty_write
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# A dirty write is when c2 overwrites an uncommitted value written by c1.
# Snapshot isolation prevents this.

> CREATE TABLE test (id INT PRIMARY KEY, value STRING)
---
ok

c1:> BEGIN
c1:> INSERT INTO test VALUES (1, 'a')
---
ok

c2:> BEGIN
c2:!> INSERT INTO test VALUES (1, 'a')
---
c2: Error: serialization failure, retry transaction
26 changes: 26 additions & 0 deletions src/sql/testscripts/transactions/anomaly_fuzzy_read
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# A fuzzy (or unrepeatable) read is when c2 sees a value change after c1
# updates it. Snapshot isolation prevents this.

> CREATE TABLE test (id INT PRIMARY KEY, value STRING)
> INSERT INTO test VALUES (1, 'a')
---
ok

c1:> BEGIN
c2:> BEGIN
---
ok

c2:> SELECT * FROM test WHERE id = 1
---
c2: 1, 'a'

c1:> UPDATE test SET value = 'b' WHERE id = 1
c1:> COMMIT
c1:> SELECT * FROM test
---
c1: 1, 'b'

c2:> SELECT * FROM test WHERE id = 1
---
c2: 1, 'a'
26 changes: 26 additions & 0 deletions src/sql/testscripts/transactions/anomaly_lost_update
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# A lost update is when c1 and c2 both read a value and update it, where
# c2's update replaces c1. Snapshot isolation prevents this.

> CREATE TABLE test (id INT PRIMARY KEY, value STRING)
---
ok


c1:> BEGIN
c1:> SELECT * FROM test WHERE id = 1
---
ok

c2:> BEGIN
c2:> SELECT * FROM test WHERE id = 1
---
ok

c1:> INSERT INTO test VALUES (1, 'a')
c1:> COMMIT
---
ok

c2:!> INSERT INTO test VALUES (1, 'a')
---
c2: Error: serialization failure, retry transaction
29 changes: 29 additions & 0 deletions src/sql/testscripts/transactions/anomaly_phantom_read
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# A phantom read is when t1 reads entries matching some predicate, but a
# modification by t2 changes which entries match the predicate such that a later
# read by t1 returns them. Snapshot isolation prevents this.

> CREATE TABLE test (id INT PRIMARY KEY, value STRING)
> INSERT INTO test VALUES (1, 'a'), (2, 'b'), (3, 'c')
---
ok

c1:> BEGIN
c2:> BEGIN
---
ok

c1:> SELECT * FROM test WHERE id > 1
---
c1: 2, 'b'
c1: 3, 'c'

c2:> DELETE FROM test WHERE id = 2
c2:> INSERT INTO test VALUES (4, 'd')
c2:> COMMIT
---
ok

c1:> SELECT * FROM test WHERE id > 1
---
c1: 2, 'b'
c1: 3, 'c'
26 changes: 26 additions & 0 deletions src/sql/testscripts/transactions/anomaly_read_skew
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Read skew is when c1 reads a and b, but c2 modifies b in between the
# reads. Snapshot isolation prevents this.

> CREATE TABLE test (id INT PRIMARY KEY, value STRING)
> INSERT INTO test VALUES (1, 'a'), (2, 'b')
---
ok

c1:> BEGIN
c2:> BEGIN
---
ok

c1:> SELECT * FROM test WHERE id = 1
---
c1: 1, 'a'

c2:> UPDATE test SET value = 'b' WHERE id = 1
c2:> UPDATE test SET value = 'a' WHERE id = 2
c2:> COMMIT
---
ok

c1:> SELECT * FROM test WHERE id = 2
---
c1: 2, 'b'
35 changes: 35 additions & 0 deletions src/sql/testscripts/transactions/anomaly_write_skew
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Write skew is when c1 reads a and writes it to b while c2 reads b and writes
# it to a. Snapshot isolation does not prevent this, which is expected, so we
# assert the anomalous behavior. Fixing this would require implementing
# serializable snapshot isolation.

> CREATE TABLE test (id INT PRIMARY KEY, value STRING)
> INSERT INTO test VALUES (1, 'a'), (2, 'b')
---
ok

c1:> BEGIN
c2:> BEGIN
---
ok

c1:> SELECT * FROM test WHERE id = 1
c2:> SELECT * FROM test WHERE id = 2
---
c1: 1, 'a'
c2: 2, 'b'

c1:> UPDATE test SET value = 'a' WHERE id = 2
c2:> UPDATE test SET value = 'b' WHERE id = 1
---
ok

c1:> COMMIT
c2:> COMMIT
---
ok

> SELECT * FROM test
---
1, 'b'
2, 'a'
Loading

0 comments on commit b7a736d

Please sign in to comment.