From a814631ba8e21e5d816b2aaccc0f097d776359df Mon Sep 17 00:00:00 2001 From: Dan Book Date: Tue, 12 Mar 2024 02:16:14 -0400 Subject: [PATCH] Re-add documentation on how to workaround UTF-8 bug Document a hack to ensure consistency in string interpretation with the mysql_enable_utf8 flag. Adapted from PR #119 by Pali, which was reverted with the rest of the 4.042 changes. --- lib/DBD/mysql.pm | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/lib/DBD/mysql.pm b/lib/DBD/mysql.pm index 903a0ef3..24561790 100644 --- a/lib/DBD/mysql.pm +++ b/lib/DBD/mysql.pm @@ -1533,6 +1533,40 @@ be treated as UTF-8. This will only take effect if used as part of the call to connect(). If you turn the flag on after connecting, you will need to issue the command C to get the same effect. +This flag's implementation suffers the "Unicode Bug" on passed statements and +input bind parameters, and cannot be fixed for historical reasons. In order to +pass strings with Unicode characters consistently through DBD::mysql, you can +use a "hack" workaround of calling the C function on scalars +immediately before passing them to DBD::mysql. Calling the C +function has absolutely no effect on (correctly written) Perl code, but forces +DBD::mysql to interpret it correctly as text data to be encoded. In the same +way, binary (byte) data can be passed through DBD::mysql without being encoded +as text data by calling the C function (it dies on wide +Unicode strings with codepoints above U+FF). See the following example: + + # check that last name contains LATIN CAPITAL LETTER O WITH STROKE (U+D8) + my $statement = "SELECT * FROM users WHERE last_name LIKE '%\x{D8}%' AND first_name = ? AND data = ?"; + + my $wide_string_param = "Andr\x{E9}"; # Andre with LATIN SMALL LETTER E WITH ACUTE (U+E9) + + my $byte_param = "\x{D8}\x{A0}\x{39}\x{F8}"; # some bytes (binary data) + + my $dbh = DBI->connect('DBI:mysql:database', 'username', 'pass', { mysql_enable_utf8mb4 => 1 }); + + utf8::upgrade($statement); # UTF-8 fix for DBD::mysql + my $sth = $dbh->prepare($statement); + + utf8::upgrade($wide_string_param); # UTF-8 fix for DBD::mysql + $sth->bind_param(1, $wide_string_param); + + utf8::downgrade($byte_param); # byte fix for DBD::mysql + $sth->bind_param(2, $byte_param, DBI::SQL_BINARY); # set correct binary type + + $sth->execute(); + + my $output = $sth->fetchall_arrayref(); + # returned data in $output reference should be already UTF-8 decoded as appropriate + =item mysql_enable_utf8mb4 This is similar to mysql_enable_utf8, but is capable of handling 4-byte