diff --git a/CHANGES.txt b/CHANGES.txt index 225b4c90..6c1128c5 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -16,6 +16,12 @@ python 3.6 or newer (3.4/3.5 might work, but they are not tested). Fixed: +- issue2551282 - MySQL utf8mb4 issues and + issue2551115 - Use utf8mb4 as a default for MySQL instead of utf8 + The default database type and collations have been set to: + utf8mb4, utf8mb4_unicode_ci and utf8mb4_0900_bin. They are (sadly) + configurable from config.ini. Require directions on upgrading the + MySQL db have been documented in upgrading.txt. - issue2551063 - Rest/Xmlrpc interfaces needs failed login protection. Failed API login rate limiting with expiring lockout added. (John Rouillard) diff --git a/doc/upgrading.txt b/doc/upgrading.txt index 87b04100..c71f26cd 100644 --- a/doc/upgrading.txt +++ b/doc/upgrading.txt @@ -112,6 +112,128 @@ values or if a value must be changed manually. This will insert the bad API login rate limiting settings. +Update MySQL character set/collations (required) +------------------------------------------------ + +issue2551282_ and issue2551115_ discuss issues with MySQL's utf8 +support. MySQL has variations on utf8 character support. This +version of Roundup expects to use utf8mb4 which is a version of +utf8 that covers all characters, not just the ones in the basic +multilingual plane. Previous versions of Roundup used latin1 or +utf8mb3 (also known as just utf8). Newer versions of MySQL are +supposed to make utf8mb4 and not utf8mb3 the default. + +To convert your database, you need to have MySQL 8.0.11 or newer +(April 2018) and a mysql client. + +.. warning:: + + This conversion can damage your database. Back up your + database using mysqldump or other tools. Preferably on a quiet + database. Verify that your database can be restored (or at + least look up directions for restoring it). This is very + important. + +We suggest shutting down Roundup's interfaces: + + * web + * email + * cron jobs that use Python or roundup-admin + +then make your backup. + +Then connect to your mysql instance using ``mysql`` with the +information in ``config.ini``. If your tracker's ``config.ini`` +includes:: + + name = roundupdb + host = localhost + user = roundupuser + password = rounduppw + +you would run some version of:: + + mysql -u roundupuser --host localhost -p roundupdb + +and supply ``rounduppw`` when prompted. + +With the Roundup database quiet, convert the character set for the +database and then for all the tables. To convert the tables you +need a list of them. To get this run:: + + mysql -sN -u roundupuser --host localhost -p \ + -e 'show tables;' roundupdb > /tmp/tracker.tables + +The ``-sN`` removes line drawing characters and column headers +from the output. For each table ```` in the file, run:: + + ALTER TABLE `` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; + +You can automate this conversion using sed:: + + sed -e 's/^/ALTER TABLE `/' \ + -e 's/$/` CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;/'\ + /tmp/tracker.tables> /tmp/tracker.tables.sql + +The backticks "`" are required as some of the table names became +MySQL reserved words during Roundup's lifetime. + +Inspect ``tracker.tables.sql`` to see if all the lines look +correct. If so then we can start the conversion. + +First convert the character set for the database by running:: + + mysql -u roundupuser --host localhost -p roundupdb + +Then at the ``mysql>`` prompt run:: + + ALTER DATABASE roundupdb CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; + +you should see: ``Query OK, 1 row affected (0.01 sec)``. + +Now to modify all the tables run: + + \. /tmp/tracker.tables.sql + +You will see output similar to:: + + Query OK, 5 rows affected (0.01 sec) + Records: 5 Duplicates: 0 Warnings: 0 + +for each table. The rows/records will depend on the number of +entries in the table. This can take a while. + +Once you have successfully completed this, copy your tracker's +config.ini to a backup file. Edit ``config.ini`` to use the defaults: + + * mysql_charset = utf8mb4 + * mysql_collation = utf8mb4_unicode_ci + * mysql_binary_collation = utf8mb4_0900_bin + +Also look for a ``~/.my.cnf`` for the roundup user and make sure +that the settings for character set (charset) are utf8mb4 compatible. + +To test, run ``roundup-admin -i tracker_home`` and display an +issue designator: e.g. ``display issue10``. Check that the text +fields are properly displayed (e.g. title). Start the web +interface and browse some issues. Again, check that the text +fields display correctly, that the history at the bottom of the +issues displays correctly and if you are using the default full +text search, make sure that that works. + +If this works, bring email cron jobs etc. back online. + +If this fails, take down the web interface, restore the database +from backup, restore the old config.ini. Then test again and +reach out to the mailing list for help. + +We can use assistance in getting these directions corrected or +enhanced. The core Roundup developers don't use MySQL for their +production workloads so we count on users to help us with this. + +.. _issue2551282: https://issues.roundup-tracker.org/issue2551282 +.. _issue2551115: https://issues.roundup-tracker.org/issue2551115 + Disable performance improvement for wsgi mode (optional) -------------------------------------------------------- diff --git a/roundup/backends/back_mysql.py b/roundup/backends/back_mysql.py index da458e4f..71fe2658 100644 --- a/roundup/backends/back_mysql.py +++ b/roundup/backends/back_mysql.py @@ -100,9 +100,13 @@ def db_create(config): kwargs = connection_dict(config) conn = MySQLdb.connect(**kwargs) cursor = conn.cursor() - command = "CREATE DATABASE %s COLLATE utf8_general_ci" % config.RDBMS_NAME + command = "CREATE DATABASE %s COLLATE %s" % (config.RDBMS_NAME, + config.RDBMS_MYSQL_COLLATION) if sys.version_info[0] > 2: - command += ' CHARACTER SET utf8' + charset = config.RDBMS_MYSQL_CHARSET + if charset == 'default': + charset = 'utf8mb4' # use full utf set. + command += ' CHARACTER SET %s' % charset logging.info(command) cursor.execute(command) conn.commit() @@ -652,11 +656,15 @@ def sql_close(self): class MysqlClass: - case_sensitive_equal = 'COLLATE utf8_bin =' + + case_sensitive_equal = None # defined by self.get_case_sensitive_equal() # TODO: AFAIK its version dependent for MySQL supports_subselects = False + def get_case_sensitive_equal(self): + return 'COLLATE %s =' % self.db.config.RDBMS_MYSQL_BINARY_COLLATION + def _subselect(self, proptree): ''' "I can't believe it's not a toy RDBMS" see, even toy RDBMSes like gadfly and sqlite can do sub-selects... diff --git a/roundup/backends/rdbms_common.py b/roundup/backends/rdbms_common.py index 5d350e43..dfe9bf1a 100644 --- a/roundup/backends/rdbms_common.py +++ b/roundup/backends/rdbms_common.py @@ -1643,7 +1643,9 @@ class Class(hyperdb.Class): case_insensitive_like = 'LIKE' # For some databases (mysql) the = operator for strings ignores case. - # We define the default here, can be changed in derivative class + # We define the default here, can be changed in derivative class. + # If set to any false value, self.get_case_sensitive_equal() is + # called to set its value. case_sensitive_equal = '=' # Some DBs order NULL values last. Set this variable in the backend @@ -1675,6 +1677,16 @@ def disableJournalling(self): """ self.do_journal = 0 + def get_case_sensitive_equal(self): + """ For some databases (mysql) the = operator for strings ignores + case. We define the default here, can be changed in derivative class. + + It takes config as an argument because mysql has multiple collations. + The admin sets both the primary and case sensitive collation in + config.ini for mysql. + """ + raise ValueError("get_case_sensitive_equal called in error") + # Editing nodes: def create(self, **propvalues): """ Create a new node of this class and return its id. @@ -2800,10 +2812,14 @@ def _filter_sql(self, search_matches, filterspec, srt=[], grp=[], retr=0, # now add to the where clause w = [] + if not self.case_sensitive_equal: + self.case_sensitive_equal = \ + self.get_case_sensitive_equal() + cse = self.case_sensitive_equal for vv, ex in zip(v, exact): if ex: w.append("_%s._%s %s %s" % ( - pln, k, self.case_sensitive_equal, a)) + pln, k, cse, a)) args.append(vv) else: w.append("_%s._%s %s %s ESCAPE %s" % ( diff --git a/roundup/configuration.py b/roundup/configuration.py index b01f5a8b..ffcf6cce 100644 --- a/roundup/configuration.py +++ b/roundup/configuration.py @@ -1495,11 +1495,23 @@ def str2value(self, value): "Name of the group to use in the MySQL defaults file (.my.cnf).\n" "Only used in MySQL connections."), (Option, 'mysql_charset', 'utf8mb4', - "Charset to use for mysql connection,\n" - "use 'default' for the mysql default, no charset option\n" - "is used when creating the connection in that case.\n" + "Charset to use for mysql connection and databases.\n" + "If set to 'default', no charset option is used when\n" + "creating the db connection and utf8mb4 is used for the\n" + "database charset.\n" "Otherwise any permissible mysql charset is allowed here.\n" "Only used in MySQL connections."), + (Option, 'mysql_collation', 'utf8mb4_unicode_ci', + "Comparison/order to use for mysql database/table collations.\n" + "When upgrading, you can use 'utf8' to match the\n" + "depricated 'utf8mb3'. This must be compatible with the\n" + "mysql_charset setting above. Only used by MySQL."), + (Option, 'mysql_binary_collation', 'utf8mb4_0900_bin', + "Comparison/order to use for mysql database/table collations\n" + "when matching case. When upgrading, you can use 'utf8_bin'\n" + "to match the depricated 'utf8mb3_bin' collation. This must\n" + "be compatible with the mysql_collation above. Only used\n" + "by MySQL."), (IntegerNumberGeqZeroOption, 'sqlite_timeout', '30', "Number of seconds to wait when the SQLite database is locked\n" "Default: use a 30 second timeout (extraordinarily generous)\n"