chore: test ignored_chars with U+FEFF (ZWNBSP/BOM)

https://en.wikipedia.org/wiki/Byte_order_mark
launchbadge · Sep 9, 2024 · 6ff4a56 · 6ff4a56
1 parent b67a83b
commit 6ff4a56
Show file tree

Hide file tree

Showing 4 changed files with 27 additions and 17 deletions.
diff --git a/sqlx-core/src/config/reference.toml b/sqlx-core/src/config/reference.toml
@@ -155,7 +155,11 @@ migrations_dir = "foo/migrations"
 # ignored_chars = ["\r"]
 
 # Ignore common whitespace characters (beware syntatically significant whitespace!)
-ignored_chars = [" ", "\t", "\r", "\n"] # Space, tab, CR, LF
+# Space, tab, CR, LF, zero-width non-breaking space (U+FEFF)
+#
+# U+FEFF is added by some editors as a magic number at the beginning of a text file indicating it is UTF-8 encoded,
+# where it is known as a byte-order mark (BOM): https://en.wikipedia.org/wiki/Byte_order_mark
+ignored_chars = [" ", "\t", "\r", "\n", "\uFEFF"]
 
 # Specify reversible migrations by default (for `sqlx migrate create`).
 #

diff --git a/sqlx-core/src/config/tests.rs b/sqlx-core/src/config/tests.rs
@@ -81,7 +81,7 @@ fn assert_migrate_config(config: &config::migrate::Config) {
     assert_eq!(config.table_name.as_deref(), Some("foo._sqlx_migrations"));
     assert_eq!(config.migrations_dir.as_deref(), Some("foo/migrations"));
 
-    let ignored_chars = BTreeSet::from([' ', '\t', '\r', '\n']);
+    let ignored_chars = BTreeSet::from([' ', '\t', '\r', '\n', '\u{FEFF}']);
 
     assert_eq!(config.ignored_chars, ignored_chars);
 

diff --git a/sqlx-core/src/migrate/migration.rs b/sqlx-core/src/migrate/migration.rs
@@ -76,7 +76,7 @@ pub fn checksum_fragments<'a>(fragments: impl Iterator<Item = &'a str>) -> Vec<u
 fn fragments_checksum_equals_full_checksum() {
     // Copied from `examples/postgres/axum-social-with-tests/migrations/3_comment.sql`
     let sql = "\
-        create table comment (\r\n\
+        \u{FEFF}create table comment (\r\n\
             \tcomment_id uuid primary key default gen_random_uuid(),\r\n\
             \tpost_id uuid not null references post(post_id),\r\n\
             \tuser_id uuid not null references \"user\"(user_id),\r\n\

diff --git a/sqlx-core/src/migrate/source.rs b/sqlx-core/src/migrate/source.rs
@@ -52,9 +52,9 @@ impl MigrationSource<'static> for PathBuf {
 }
 
 /// A [`MigrationSource`] implementation with configurable resolution.
-/// 
+///
 /// `S` may be `PathBuf`, `&Path` or any type that implements `Into<PathBuf>`.
-/// 
+///
 /// See [`ResolveConfig`] for details.
 #[derive(Debug)]
 pub struct ResolveWith<S>(pub S, pub ResolveConfig);
@@ -97,20 +97,20 @@ impl ResolveConfig {
     }
 
     /// Ignore a character when hashing migrations.
-    /// 
+    ///
     /// The migration SQL string itself will still contain the character,
     /// but it will not be included when calculating the checksum.
-    /// 
+    ///
     /// This can be used to ignore whitespace characters so changing formatting
     /// does not change the checksum.
-    /// 
+    ///
     /// Adding the same `char` more than once is a no-op.
-    /// 
+    ///
     /// ### Note: Changes Migration Checksum
-    /// This will change the checksum of resolved migrations, 
+    /// This will change the checksum of resolved migrations,
     /// which may cause problems with existing deployments.
     ///
-    /// **Use at your own risk.** 
+    /// **Use at your own risk.**
     pub fn ignore_char(&mut self, c: char) -> &mut Self {
         self.ignored_chars.insert(c);
         self
@@ -123,21 +123,21 @@ impl ResolveConfig {
     ///
     /// This can be used to ignore whitespace characters so changing formatting
     /// does not change the checksum.
-    /// 
+    ///
     /// Adding the same `char` more than once is a no-op.
     ///
     /// ### Note: Changes Migration Checksum
-    /// This will change the checksum of resolved migrations, 
+    /// This will change the checksum of resolved migrations,
     /// which may cause problems with existing deployments.
     ///
-    /// **Use at your own risk.** 
+    /// **Use at your own risk.**
     pub fn ignore_chars(&mut self, chars: impl IntoIterator<Item = char>) -> &mut Self {
         self.ignored_chars.extend(chars);
         self
     }
 
     /// Iterate over the set of ignored characters.
-    /// 
+    ///
     /// Duplicate `char`s are not included.
     pub fn ignored_chars(&self) -> impl Iterator<Item = char> + '_ {
         self.ignored_chars.iter().copied()
@@ -266,11 +266,17 @@ fn checksum_with(sql: &str, ignored_chars: &BTreeSet<char>) -> Vec<u8> {
 fn checksum_with_ignored_chars() {
     // Ensure that `checksum_with` returns the same digest for a given set of ignored chars
     // as the equivalent string with the characters removed.
-    let ignored_chars = [' ', '\t', '\r', '\n'];
+    let ignored_chars = [
+        ' ', '\t', '\r', '\n',
+        // Zero-width non-breaking space (ZWNBSP), often added as a magic-number at the beginning
+        // of UTF-8 encoded files as a byte-order mark (BOM):
+        // https://en.wikipedia.org/wiki/Byte_order_mark
+        '\u{FEFF}',
+    ];
 
     // Copied from `examples/postgres/axum-social-with-tests/migrations/3_comment.sql`
     let sql = "\
-        create table comment (\r\n\
+        \u{FEFF}create table comment (\r\n\
             \tcomment_id uuid primary key default gen_random_uuid(),\r\n\
             \tpost_id uuid not null references post(post_id),\r\n\
             \tuser_id uuid not null references \"user\"(user_id),\r\n\