Skip to content

Commit

Permalink
chore: test ignored_chars with U+FEFF (ZWNBSP/BOM)
Browse files Browse the repository at this point in the history
  • Loading branch information
abonander committed Sep 9, 2024
1 parent b67a83b commit 6ff4a56
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 17 deletions.
6 changes: 5 additions & 1 deletion sqlx-core/src/config/reference.toml
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,11 @@ migrations_dir = "foo/migrations"
# ignored_chars = ["\r"]

# Ignore common whitespace characters (beware syntatically significant whitespace!)
ignored_chars = [" ", "\t", "\r", "\n"] # Space, tab, CR, LF
# Space, tab, CR, LF, zero-width non-breaking space (U+FEFF)
#
# U+FEFF is added by some editors as a magic number at the beginning of a text file indicating it is UTF-8 encoded,
# where it is known as a byte-order mark (BOM): https://en.wikipedia.org/wiki/Byte_order_mark
ignored_chars = [" ", "\t", "\r", "\n", "\uFEFF"]

# Specify reversible migrations by default (for `sqlx migrate create`).
#
Expand Down
2 changes: 1 addition & 1 deletion sqlx-core/src/config/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ fn assert_migrate_config(config: &config::migrate::Config) {
assert_eq!(config.table_name.as_deref(), Some("foo._sqlx_migrations"));
assert_eq!(config.migrations_dir.as_deref(), Some("foo/migrations"));

let ignored_chars = BTreeSet::from([' ', '\t', '\r', '\n']);
let ignored_chars = BTreeSet::from([' ', '\t', '\r', '\n', '\u{FEFF}']);

assert_eq!(config.ignored_chars, ignored_chars);

Expand Down
2 changes: 1 addition & 1 deletion sqlx-core/src/migrate/migration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ pub fn checksum_fragments<'a>(fragments: impl Iterator<Item = &'a str>) -> Vec<u
fn fragments_checksum_equals_full_checksum() {
// Copied from `examples/postgres/axum-social-with-tests/migrations/3_comment.sql`
let sql = "\
create table comment (\r\n\
\u{FEFF}create table comment (\r\n\
\tcomment_id uuid primary key default gen_random_uuid(),\r\n\
\tpost_id uuid not null references post(post_id),\r\n\
\tuser_id uuid not null references \"user\"(user_id),\r\n\
Expand Down
34 changes: 20 additions & 14 deletions sqlx-core/src/migrate/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ impl MigrationSource<'static> for PathBuf {
}

/// A [`MigrationSource`] implementation with configurable resolution.
///
///
/// `S` may be `PathBuf`, `&Path` or any type that implements `Into<PathBuf>`.
///
///
/// See [`ResolveConfig`] for details.
#[derive(Debug)]
pub struct ResolveWith<S>(pub S, pub ResolveConfig);
Expand Down Expand Up @@ -97,20 +97,20 @@ impl ResolveConfig {
}

/// Ignore a character when hashing migrations.
///
///
/// The migration SQL string itself will still contain the character,
/// but it will not be included when calculating the checksum.
///
///
/// This can be used to ignore whitespace characters so changing formatting
/// does not change the checksum.
///
///
/// Adding the same `char` more than once is a no-op.
///
///
/// ### Note: Changes Migration Checksum
/// This will change the checksum of resolved migrations,
/// This will change the checksum of resolved migrations,
/// which may cause problems with existing deployments.
///
/// **Use at your own risk.**
/// **Use at your own risk.**
pub fn ignore_char(&mut self, c: char) -> &mut Self {
self.ignored_chars.insert(c);
self
Expand All @@ -123,21 +123,21 @@ impl ResolveConfig {
///
/// This can be used to ignore whitespace characters so changing formatting
/// does not change the checksum.
///
///
/// Adding the same `char` more than once is a no-op.
///
/// ### Note: Changes Migration Checksum
/// This will change the checksum of resolved migrations,
/// This will change the checksum of resolved migrations,
/// which may cause problems with existing deployments.
///
/// **Use at your own risk.**
/// **Use at your own risk.**
pub fn ignore_chars(&mut self, chars: impl IntoIterator<Item = char>) -> &mut Self {
self.ignored_chars.extend(chars);
self
}

/// Iterate over the set of ignored characters.
///
///
/// Duplicate `char`s are not included.
pub fn ignored_chars(&self) -> impl Iterator<Item = char> + '_ {
self.ignored_chars.iter().copied()
Expand Down Expand Up @@ -266,11 +266,17 @@ fn checksum_with(sql: &str, ignored_chars: &BTreeSet<char>) -> Vec<u8> {
fn checksum_with_ignored_chars() {
// Ensure that `checksum_with` returns the same digest for a given set of ignored chars
// as the equivalent string with the characters removed.
let ignored_chars = [' ', '\t', '\r', '\n'];
let ignored_chars = [
' ', '\t', '\r', '\n',
// Zero-width non-breaking space (ZWNBSP), often added as a magic-number at the beginning
// of UTF-8 encoded files as a byte-order mark (BOM):
// https://en.wikipedia.org/wiki/Byte_order_mark
'\u{FEFF}',
];

// Copied from `examples/postgres/axum-social-with-tests/migrations/3_comment.sql`
let sql = "\
create table comment (\r\n\
\u{FEFF}create table comment (\r\n\
\tcomment_id uuid primary key default gen_random_uuid(),\r\n\
\tpost_id uuid not null references post(post_id),\r\n\
\tuser_id uuid not null references \"user\"(user_id),\r\n\
Expand Down

0 comments on commit 6ff4a56

Please sign in to comment.