Skip to content

Commit

Permalink
feat: add support for ignored_chars config to sqlx_core::migrate
Browse files Browse the repository at this point in the history
  • Loading branch information
abonander committed Sep 9, 2024
1 parent 3934629 commit b67a83b
Show file tree
Hide file tree
Showing 4 changed files with 239 additions and 17 deletions.
59 changes: 56 additions & 3 deletions sqlx-core/src/migrate/migration.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use std::borrow::Cow;

use sha2::{Digest, Sha384};
use std::borrow::Cow;

use super::MigrationType;

Expand All @@ -22,8 +21,26 @@ impl Migration {
sql: Cow<'static, str>,
no_tx: bool,
) -> Self {
let checksum = Cow::Owned(Vec::from(Sha384::digest(sql.as_bytes()).as_slice()));
let checksum = checksum(&sql);

Self::with_checksum(
version,
description,
migration_type,
sql,
checksum.into(),
no_tx,
)
}

pub(crate) fn with_checksum(
version: i64,
description: Cow<'static, str>,
migration_type: MigrationType,
sql: Cow<'static, str>,
checksum: Cow<'static, [u8]>,
no_tx: bool,
) -> Self {
Migration {
version,
description,
Expand All @@ -40,3 +57,39 @@ pub struct AppliedMigration {
pub version: i64,
pub checksum: Cow<'static, [u8]>,
}

pub fn checksum(sql: &str) -> Vec<u8> {
Vec::from(Sha384::digest(sql).as_slice())
}

pub fn checksum_fragments<'a>(fragments: impl Iterator<Item = &'a str>) -> Vec<u8> {
let mut digest = Sha384::new();

for fragment in fragments {
digest.update(fragment);
}

digest.finalize().to_vec()
}

#[test]
fn fragments_checksum_equals_full_checksum() {
// Copied from `examples/postgres/axum-social-with-tests/migrations/3_comment.sql`
let sql = "\
create table comment (\r\n\
\tcomment_id uuid primary key default gen_random_uuid(),\r\n\
\tpost_id uuid not null references post(post_id),\r\n\
\tuser_id uuid not null references \"user\"(user_id),\r\n\
\tcontent text not null,\r\n\
\tcreated_at timestamptz not null default now()\r\n\
);\r\n\
\r\n\
create index on comment(post_id, created_at);\r\n\
";

// Should yield a string for each character
let fragments_checksum = checksum_fragments(sql.split(""));
let full_checksum = checksum(sql);

assert_eq!(fragments_checksum, full_checksum);
}
22 changes: 22 additions & 0 deletions sqlx-core/src/migrate/migrator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ pub struct Migrator {
pub locking: bool,
#[doc(hidden)]
pub no_tx: bool,
#[doc(hidden)]
pub table_name: Cow<'static, str>,
}

fn validate_applied_migrations(
Expand Down Expand Up @@ -51,6 +53,7 @@ impl Migrator {
ignore_missing: false,
no_tx: false,
locking: true,
table_name: Cow::Borrowed("_sqlx_migrations"),
};

/// Creates a new instance with the given source.
Expand Down Expand Up @@ -81,6 +84,25 @@ impl Migrator {
})
}

/// Override the name of the table used to track executed migrations.
///
/// May be schema-qualified and/or contain quotes. Defaults to `_sqlx_migrations`.
///
/// Potentially useful for multi-tenant databases.
///
/// ### Warning: Potential Data Loss or Corruption!
/// Changing this option for a production database will likely result in data loss or corruption
/// as the migration machinery will no longer be aware of what migrations have been applied
/// and will attempt to re-run them.
///
/// You should create the new table as a copy of the existing migrations table (with contents!),
/// and be sure all instances of your application have been migrated to the new
/// table before deleting the old one.
pub fn dangerous_set_table_name(&mut self, table_name: impl Into<Cow<'static, str>>) -> &Self {
self.table_name = table_name.into();
self
}

/// Specify whether applied migrations that are missing from the resolved migrations should be ignored.
pub fn set_ignore_missing(&mut self, ignore_missing: bool) -> &Self {
self.ignore_missing = ignore_missing;
Expand Down
4 changes: 2 additions & 2 deletions sqlx-core/src/migrate/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ pub use migrate::{Migrate, MigrateDatabase};
pub use migration::{AppliedMigration, Migration};
pub use migration_type::MigrationType;
pub use migrator::Migrator;
pub use source::MigrationSource;
pub use source::{MigrationSource, ResolveConfig, ResolveWith};

#[doc(hidden)]
pub use source::resolve_blocking;
pub use source::{resolve_blocking, resolve_blocking_with_config};
171 changes: 159 additions & 12 deletions sqlx-core/src/migrate/source.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use crate::error::BoxDynError;
use crate::migrate::{Migration, MigrationType};
use crate::migrate::{migration, Migration, MigrationType};
use futures_core::future::BoxFuture;

use std::borrow::Cow;
use std::collections::BTreeSet;
use std::fmt::Debug;
use std::fs;
use std::io;
Expand All @@ -28,19 +29,48 @@ pub trait MigrationSource<'s>: Debug {

impl<'s> MigrationSource<'s> for &'s Path {
fn resolve(self) -> BoxFuture<'s, Result<Vec<Migration>, BoxDynError>> {
// Behavior changed from previous because `canonicalize()` is potentially blocking
// since it might require going to disk to fetch filesystem data.
self.to_owned().resolve()
}
}

impl MigrationSource<'static> for PathBuf {
fn resolve(self) -> BoxFuture<'static, Result<Vec<Migration>, BoxDynError>> {
// Technically this could just be `Box::pin(spawn_blocking(...))`
// but that would actually be a breaking behavior change because it would call
// `spawn_blocking()` on the current thread
Box::pin(async move {
let canonical = self.canonicalize()?;
let migrations_with_paths =
crate::rt::spawn_blocking(move || resolve_blocking(&canonical)).await?;
crate::rt::spawn_blocking(move || {
let migrations_with_paths = resolve_blocking(&self)?;

Ok(migrations_with_paths.into_iter().map(|(m, _p)| m).collect())
Ok(migrations_with_paths.into_iter().map(|(m, _p)| m).collect())
})
.await
})
}
}

impl MigrationSource<'static> for PathBuf {
fn resolve(self) -> BoxFuture<'static, Result<Vec<Migration>, BoxDynError>> {
Box::pin(async move { self.as_path().resolve().await })
/// A [`MigrationSource`] implementation with configurable resolution.
///
/// `S` may be `PathBuf`, `&Path` or any type that implements `Into<PathBuf>`.
///
/// See [`ResolveConfig`] for details.
#[derive(Debug)]
pub struct ResolveWith<S>(pub S, pub ResolveConfig);

impl<'s, S: Debug + Into<PathBuf> + Send + 's> MigrationSource<'s> for ResolveWith<S> {
fn resolve(self) -> BoxFuture<'s, Result<Vec<Migration>, BoxDynError>> {
Box::pin(async move {
let path = self.0.into();
let config = self.1;

let migrations_with_paths =
crate::rt::spawn_blocking(move || resolve_blocking_with_config(&path, &config))
.await?;

Ok(migrations_with_paths.into_iter().map(|(m, _p)| m).collect())
})
}
}

Expand All @@ -52,11 +82,87 @@ pub struct ResolveError {
source: Option<io::Error>,
}

/// Configuration for migration resolution using [`ResolveWith`].
#[derive(Debug, Default)]
pub struct ResolveConfig {
ignored_chars: BTreeSet<char>,
}

impl ResolveConfig {
/// Return a default, empty configuration.
pub fn new() -> Self {
ResolveConfig {
ignored_chars: BTreeSet::new(),
}
}

/// Ignore a character when hashing migrations.
///
/// The migration SQL string itself will still contain the character,
/// but it will not be included when calculating the checksum.
///
/// This can be used to ignore whitespace characters so changing formatting
/// does not change the checksum.
///
/// Adding the same `char` more than once is a no-op.
///
/// ### Note: Changes Migration Checksum
/// This will change the checksum of resolved migrations,
/// which may cause problems with existing deployments.
///
/// **Use at your own risk.**
pub fn ignore_char(&mut self, c: char) -> &mut Self {
self.ignored_chars.insert(c);
self
}

/// Ignore one or more characters when hashing migrations.
///
/// The migration SQL string itself will still contain these characters,
/// but they will not be included when calculating the checksum.
///
/// This can be used to ignore whitespace characters so changing formatting
/// does not change the checksum.
///
/// Adding the same `char` more than once is a no-op.
///
/// ### Note: Changes Migration Checksum
/// This will change the checksum of resolved migrations,
/// which may cause problems with existing deployments.
///
/// **Use at your own risk.**
pub fn ignore_chars(&mut self, chars: impl IntoIterator<Item = char>) -> &mut Self {
self.ignored_chars.extend(chars);
self
}

/// Iterate over the set of ignored characters.
///
/// Duplicate `char`s are not included.
pub fn ignored_chars(&self) -> impl Iterator<Item = char> + '_ {
self.ignored_chars.iter().copied()
}
}

// FIXME: paths should just be part of `Migration` but we can't add a field backwards compatibly
// since it's `#[non_exhaustive]`.
#[doc(hidden)]
pub fn resolve_blocking(path: &Path) -> Result<Vec<(Migration, PathBuf)>, ResolveError> {
let s = fs::read_dir(path).map_err(|e| ResolveError {
message: format!("error reading migration directory {}: {e}", path.display()),
resolve_blocking_with_config(path, &ResolveConfig::new())
}

#[doc(hidden)]
pub fn resolve_blocking_with_config(
path: &Path,
config: &ResolveConfig,
) -> Result<Vec<(Migration, PathBuf)>, ResolveError> {
let path = path.canonicalize().map_err(|e| ResolveError {
message: format!("error canonicalizing path {}", path.display()),
source: Some(e),
})?;

let s = fs::read_dir(&path).map_err(|e| ResolveError {
message: format!("error reading migration directory {}", path.display()),
source: Some(e),
})?;

Expand All @@ -65,7 +171,7 @@ pub fn resolve_blocking(path: &Path) -> Result<Vec<(Migration, PathBuf)>, Resolv
for res in s {
let entry = res.map_err(|e| ResolveError {
message: format!(
"error reading contents of migration directory {}: {e}",
"error reading contents of migration directory {}",
path.display()
),
source: Some(e),
Expand Down Expand Up @@ -126,12 +232,15 @@ pub fn resolve_blocking(path: &Path) -> Result<Vec<(Migration, PathBuf)>, Resolv
// opt-out of migration transaction
let no_tx = sql.starts_with("-- no-transaction");

let checksum = checksum_with(&sql, &config.ignored_chars);

migrations.push((
Migration::new(
Migration::with_checksum(
version,
Cow::Owned(description),
migration_type,
Cow::Owned(sql),
checksum.into(),
no_tx,
),
entry_path,
Expand All @@ -143,3 +252,41 @@ pub fn resolve_blocking(path: &Path) -> Result<Vec<(Migration, PathBuf)>, Resolv

Ok(migrations)
}

fn checksum_with(sql: &str, ignored_chars: &BTreeSet<char>) -> Vec<u8> {
if ignored_chars.is_empty() {
// This is going to be much faster because it doesn't have to UTF-8 decode `sql`.
return migration::checksum(sql);
}

migration::checksum_fragments(sql.split(|c| ignored_chars.contains(&c)))
}

#[test]
fn checksum_with_ignored_chars() {
// Ensure that `checksum_with` returns the same digest for a given set of ignored chars
// as the equivalent string with the characters removed.
let ignored_chars = [' ', '\t', '\r', '\n'];

// Copied from `examples/postgres/axum-social-with-tests/migrations/3_comment.sql`
let sql = "\
create table comment (\r\n\
\tcomment_id uuid primary key default gen_random_uuid(),\r\n\
\tpost_id uuid not null references post(post_id),\r\n\
\tuser_id uuid not null references \"user\"(user_id),\r\n\
\tcontent text not null,\r\n\
\tcreated_at timestamptz not null default now()\r\n\
);\r\n\
\r\n\
create index on comment(post_id, created_at);\r\n\
";

let stripped_sql = sql.replace(&ignored_chars[..], "");

let ignored_chars = BTreeSet::from(ignored_chars);

let digest_ignored = checksum_with(sql, &ignored_chars);
let digest_stripped = migration::checksum(&stripped_sql);

assert_eq!(digest_ignored, digest_stripped);
}

0 comments on commit b67a83b

Please sign in to comment.