Skip to content

Commit

Permalink
Change DB stucture, improve LMC behaviour
Browse files Browse the repository at this point in the history
Please, backup your DB before performing any testing with changes in code.

Introduce new DB structure and DB migration mechanism.

Now message LMC messages are interconnected with original messages,
this way we have fast access to last (hence correct) applicable edits,
as well as reference to the original message from the any edit (in case of chained edits).

Change the way LMC messages are being displayed. Now we check if we
can replace a message from current buffer. If we don't have a message in
the buffer, it might've been lost, but we can still display it as a
new message.

Index `timestamp`, `to_jid`, `from_jid` columns to improve performance.

Further information available here:
profanity-im#1893
profanity-im#1899
profanity-im#1902
  • Loading branch information
H3rnand3zzz committed Nov 1, 2023
1 parent 7eee96d commit 29b6699
Show file tree
Hide file tree
Showing 3 changed files with 265 additions and 56 deletions.
272 changes: 241 additions & 31 deletions src/database.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "config.h"

#include <sys/stat.h>
#include <sys/statvfs.h>
#include <sqlite3.h>
#include <glib.h>
#include <stdio.h>
Expand All @@ -58,6 +59,9 @@ static void _add_to_db(ProfMessage* message, char* type, const Jid* const from_j
static char* _get_db_filename(ProfAccount* account);
static prof_msg_type_t _get_message_type_type(const char* const type);
static prof_enc_t _get_message_enc_type(const char* const encstr);
static int _get_db_version(void);
static gboolean _migrate_to_v2(void);
static gboolean _check_available_space_for_db_migration(char* path_to_db);

#define auto_sqlite __attribute__((__cleanup__(auto_free_sqlite)))

Expand Down Expand Up @@ -97,13 +101,17 @@ log_database_init(ProfAccount* account)
}

char* err_msg;
// id is the ID of DB the entry
// from_jid is the senders jid
// to_jid is the receivers jid
// from_resource is the senders resource
// to_jid is the receivers resource
// message is the message text
// timestamp the timestamp like "2020/03/24 11:12:14"

// ChatLogs Table (Version 1)
// Contains all chat messages
//
// id is primary key
// from_jid is the sender's jid
// to_jid is the receiver's jid
// from_resource is the sender's resource
// to_jid is the receiver's resource
// message is the message's text
// timestamp is the timestamp like "2020/03/24 11:12:14"
// type is there to distinguish: message (chat), MUC message (muc), muc pm (mucpm)
// stanza_id is the ID in <message>
// archive_id is the stanza-id from from XEP-0359: Unique and Stable Stanza IDs used for XEP-0313: Message Archive Management
Expand All @@ -115,16 +123,37 @@ log_database_init(ProfAccount* account)
goto out;
}

query = "CREATE TABLE IF NOT EXISTS `DbVersion` ( `dv_id` INTEGER PRIMARY KEY, `version` INTEGER UNIQUE)";
gboolean database_exists = (_get_db_version() != -1);

query = "CREATE TABLE IF NOT EXISTS `DbVersion` (`dv_id` INTEGER PRIMARY KEY, `version` INTEGER UNIQUE)";
if (SQLITE_OK != sqlite3_exec(g_chatlog_database, query, NULL, 0, &err_msg)) {
goto out;
}

query = "INSERT OR IGNORE INTO `DbVersion` (`version`) VALUES('1')";
query = "INSERT OR IGNORE INTO `DbVersion` (`version`) VALUES ('1')";
if (SQLITE_OK != sqlite3_exec(g_chatlog_database, query, NULL, 0, &err_msg)) {
goto out;
}

int db_version = _get_db_version();
if (db_version == -1) {
cons_show_error("DB Initialization Error: Unable to check DB version.");
goto out;
}

if (db_version < 2) {
if (database_exists) {
cons_show("Migrating database schema. This operation may take a while...");
}
if (!_check_available_space_for_db_migration(filename) || !_migrate_to_v2()) {
cons_show_error("Database Initialization Error: Unable to migrate database to version 2. Please, check error logs for details.");
goto out;
}
if (database_exists) {
cons_show("Database schema migration was successful.");
}
}

log_debug("Initialized SQLite database: %s", filename);
return TRUE;

Expand Down Expand Up @@ -260,16 +289,17 @@ log_database_get_previous_chat(const gchar* const contact_barejid, const char* s
auto_sqlite gchar* query = sqlite3_mprintf("SELECT * FROM ("
"SELECT COALESCE(B.`message`, A.`message`) AS message, "
"A.`timestamp`, A.`from_jid`, A.`type`, A.`encryption` FROM `ChatLogs` AS A "
"LEFT JOIN `ChatLogs` AS B ON (A.`stanza_id` = B.`replace_id` AND A.`from_jid` = B.`from_jid`) "
"WHERE A.`replace_id` = '' "
"LEFT JOIN `ChatLogs` AS B ON (A.`replaced_by_db_id` = B.`id` AND A.`from_jid` = B.`from_jid`) "
"WHERE (A.`replaces_db_id` IS NULL OR A.`replaces_db_id` = '') "
"AND ((A.`from_jid` = '%q' AND A.`to_jid` = '%q') OR (A.`from_jid` = '%q' AND A.`to_jid` = '%q')) "
"AND A.`timestamp` < '%q' "
"AND (%Q IS NULL OR A.`timestamp` > %Q) "
"ORDER BY A.`timestamp` %s LIMIT %d) "
"ORDER BY A.`timestamp` %s LIMIT %d) " // order by B.`timestamp` as well on join, but a bit later
"ORDER BY `timestamp` %s;",
contact_barejid, myjid->barejid, myjid->barejid, contact_barejid, end_date_fmt, start_time, start_time, sort1, MESSAGES_TO_RETRIEVE, sort2);

g_date_time_unref(now);
log_warning(query); // DBG DEBUG REMOVE!

if (!query) {
log_error("Could not allocate memory");
Expand All @@ -278,7 +308,7 @@ log_database_get_previous_chat(const gchar* const contact_barejid, const char* s

int rc = sqlite3_prepare_v2(g_chatlog_database, query, -1, &stmt, NULL);
if (rc != SQLITE_OK) {
log_error("Unknown SQLite error in log_database_get_previous_chat()");
log_error("SQLite error in log_database_get_previous_chat(): %s", sqlite3_errmsg(g_chatlog_database));
return NULL;
}

Expand Down Expand Up @@ -375,6 +405,8 @@ static void
_add_to_db(ProfMessage* message, char* type, const Jid* const from_jid, const Jid* const to_jid)
{
auto_gchar gchar* pref_dblog = prefs_get_string(PREF_DBLOG);
auto_char char* original_message = NULL;
int original_message_id = -1;

if (g_strcmp0(pref_dblog, "off") == 0) {
return;
Expand Down Expand Up @@ -407,9 +439,9 @@ _add_to_db(ProfMessage* message, char* type, const Jid* const from_jid, const Ji
type = (char*)_get_message_type_str(message->type);
}

// Check LMC validity (XEP-0308)
// Apply LMC and check its validity (XEP-0308)
if (message->replace_id) {
auto_sqlite char* replace_check_query = sqlite3_mprintf("SELECT `from_jid` FROM `ChatLogs` WHERE `stanza_id` = '%q'",
auto_sqlite char* replace_check_query = sqlite3_mprintf("SELECT `id`, `from_jid`, `replaces_db_id` FROM `ChatLogs` WHERE `stanza_id` = '%q' ORDER BY `timestamp` DESC LIMIT 1",
message->replace_id ? message->replace_id : "");

if (!replace_check_query) {
Expand All @@ -419,25 +451,34 @@ _add_to_db(ProfMessage* message, char* type, const Jid* const from_jid, const Ji

sqlite3_stmt* lmc_stmt = NULL;

if (SQLITE_OK == sqlite3_prepare_v2(g_chatlog_database, replace_check_query, -1, &lmc_stmt, NULL)) {
if (sqlite3_step(lmc_stmt) == SQLITE_ROW) {
const char* from_jid_orig = (const char*)sqlite3_column_text(lmc_stmt, 0);
if (SQLITE_OK != sqlite3_prepare_v2(g_chatlog_database, replace_check_query, -1, &lmc_stmt, NULL)) {
log_error("SQLite error in _add_to_db() on selecting original message: %s", sqlite3_errmsg(g_chatlog_database));
return;
}

if (sqlite3_step(lmc_stmt) == SQLITE_ROW) {
original_message_id = sqlite3_column_int(lmc_stmt, 0);
const char* from_jid_orig = (const char*)sqlite3_column_text(lmc_stmt, 1);

// Handle non-XEP-compliant replacement messages (edit->edit->original)
int tmp = sqlite3_column_int(lmc_stmt, 2);
original_message_id = tmp ? tmp : original_message_id;

if (g_strcmp0(from_jid_orig, from_jid->barejid) != 0) {
log_error("Mismatch in sender JIDs when trying to do LMC. Corrected message sender: %s. Original message sender: %s. Replace-ID: %s. Message: %s", from_jid->barejid, from_jid_orig, message->replace_id, message->plain);
cons_show_error("%s sent message correction with mismatched sender. See log for details.", from_jid->barejid);
sqlite3_finalize(lmc_stmt);
return;
}
if (g_strcmp0(from_jid_orig, from_jid->barejid) != 0) {
log_error("Mismatch in sender JIDs when trying to do LMC. Corrected message sender: %s. Original message sender: %s. Replace-ID: %s. Message: %s", from_jid->barejid, from_jid_orig, message->replace_id, message->plain);
cons_show_error("%s sent a message correction with mismatched sender. See log for details.", from_jid->barejid);
sqlite3_finalize(lmc_stmt);
return;
}
sqlite3_finalize(lmc_stmt);
} else {
log_warning("Got LMC message that does not have original message counterpart in the database from %s", message->from_jid->fulljid);
}
sqlite3_finalize(lmc_stmt);
}

// Check for duplicate messages
auto_sqlite char* duplicate_check_query = sqlite3_mprintf("SELECT 1 FROM `ChatLogs` WHERE (`archive_id` = '%q' AND `archive_id` != '') OR (`stanza_id` = '%q' AND `stanza_id` != '')",
message->stanzaid ? message->stanzaid : "",
message->id ? message->id : "");
auto_sqlite char* duplicate_check_query = sqlite3_mprintf("SELECT 1 FROM `ChatLogs` WHERE (`archive_id` = '%q' AND `archive_id` != '')",
message->stanzaid ? message->stanzaid : "");

if (!duplicate_check_query) {
log_error("Could not allocate memory for SQL duplicate query in log_database_add()");
Expand All @@ -455,12 +496,12 @@ _add_to_db(ProfMessage* message, char* type, const Jid* const from_jid, const Ji
}

if (duplicate_exists) {
log_warning("Duplicate stanza-id found for the message. stanza_id: %s; archive_id: %s; sender: %s; content: %s", message->id, message->stanzaid, from_jid->barejid, message->plain);
return;
log_error("Duplicate stanza-id found for the message. stanza_id: %s; archive_id: %s; sender: %s; content: %s", message->id, message->stanzaid, from_jid->barejid, message->plain);
cons_show_error("Got a message with duplicate server ID from %s.", from_jid->fulljid);
}

// Insert the message
auto_sqlite char* query = sqlite3_mprintf("INSERT INTO `ChatLogs` (`from_jid`, `from_resource`, `to_jid`, `to_resource`, `message`, `timestamp`, `stanza_id`, `archive_id`, `replace_id`, `type`, `encryption`) VALUES ('%q', '%q', '%q', '%q', '%q', '%q', '%q', '%q', '%q', '%q', '%q')",
auto_sqlite char* query = sqlite3_mprintf("INSERT INTO `ChatLogs` (`from_jid`, `from_resource`, `to_jid`, `to_resource`, `message`, `timestamp`, `stanza_id`, `archive_id`, `replaces_db_id`, `replaces_stanza_id`, `type`, `encryption`) VALUES ('%q', '%q', '%q', '%q', '%q', '%q', '%q', '%q', '%q', '%q', '%q', '%q')",
from_jid->barejid,
from_jid->resourcepart ? from_jid->resourcepart : "",
to_jid->barejid,
Expand All @@ -469,6 +510,7 @@ _add_to_db(ProfMessage* message, char* type, const Jid* const from_jid, const Ji
date_fmt ? date_fmt : "",
message->id ? message->id : "",
message->stanzaid ? message->stanzaid : "",
original_message_id == -1 ? "" : sqlite3_mprintf("%d", original_message_id),
message->replace_id ? message->replace_id : "",
type ? type : "",
enc ? enc : "");
Expand All @@ -491,5 +533,173 @@ _add_to_db(ProfMessage* message, char* type, const Jid* const from_jid, const Ji
if (inserted_rows_count < 1) {
log_error("SQLite did not insert message (rows: %d, id: %s, content: %s)", inserted_rows_count, message->id, message->plain);
}

sqlite3_int64 last_inserted_id = sqlite3_last_insert_rowid(g_chatlog_database);
if (last_inserted_id < 1) {
log_error("Message was inserted without error, but unable to get last inserted ID (got: %d)", last_inserted_id);
return;
}

if (message->replace_id) {
// TODO: maybe put as a trigger
auto_sqlite char* original_mark_query = sqlite3_mprintf("UPDATE `ChatLogs` SET `replaced_by_db_id`=%d, 'corrected'=1 WHERE `id` = %d",
last_inserted_id,
original_message_id);
if (!original_mark_query) {
log_error("Could not allocate memory for SQL replace old message query in log_database_add()");
return;
}
if (SQLITE_OK != sqlite3_exec(g_chatlog_database, original_mark_query, NULL, 0, &err_msg)) {
log_error("Unable to replace old message: %s", err_msg);
sqlite3_free(err_msg);
return;
}
}
}
}

static int
_get_db_version(void)
{
int current_version = -1;
const char* query = "SELECT `version` FROM `DbVersion` LIMIT 1";
sqlite3_stmt* statement;

if (sqlite3_prepare_v2(g_chatlog_database, query, -1, &statement, NULL) == SQLITE_OK) {
if (sqlite3_step(statement) == SQLITE_ROW) {
current_version = sqlite3_column_int(statement, 0);
}
sqlite3_finalize(statement);
}
return current_version;
}

/**
* Migration to version 2 introduces new field, indexes timestamps, to_jid, from_jid. Returns TRUE on success.
*
* Changed fields:
* `replace_id` -> `replaces_stanza_id` stanza ID of the replaced message
* `replaces_db_id` database ID for correcting message of the original message
* `replaced_by_db_id` database ID for original message of the last correcting message
* `corrected` whether the message was corrected (replaced my LMC) or not
*/
static gboolean
_migrate_to_v2(void)
{
char* err_msg;

// Make ChatLogsV2 table which will be renamed to ChatLogs on transformation success
char* query = "CREATE TABLE `ChatLogsV2` (`id` INTEGER PRIMARY KEY AUTOINCREMENT, `from_jid` TEXT NOT NULL, `to_jid` TEXT NOT NULL, `from_resource` TEXT, `to_resource` TEXT, `message` TEXT, `timestamp` TEXT, `type` TEXT, `stanza_id` TEXT, `archive_id` TEXT, `encryption` TEXT, `marked_read` INTEGER, `corrected` INTEGER, `replaces_db_id` INTEGER, `replaces_stanza_id` TEXT, `replaced_by_db_id` INTEGER)";
if (SQLITE_OK != sqlite3_exec(g_chatlog_database, query, NULL, 0, &err_msg)) {
log_error("[DB Migration] Unable to create ChatLogsV2 table.");
return FALSE;
}

// Index timestamps since they are often used to sort messages.
query = "CREATE INDEX ChatLogs_timestamp_IDX ON `ChatLogsV2` (`timestamp`)";
if (SQLITE_OK != sqlite3_exec(g_chatlog_database, query, NULL, 0, &err_msg)) {
log_error("[DB Migration] Unable to create index for timestamp.");
goto cleanup;
}

// Index to_jid and from_jid as they are used to select messages from the current chat.
query = "CREATE INDEX ChatLogs_to_jid_IDX ON `ChatLogsV2` (`to_jid`)";
if (SQLITE_OK != sqlite3_exec(g_chatlog_database, query, NULL, 0, &err_msg)) {
log_error("[DB Migration] Unable to create index for to_jid.");
goto cleanup;
}
query = "CREATE INDEX ChatLogs_from_jid_IDX ON `ChatLogsV2` (`from_jid`)";
if (SQLITE_OK != sqlite3_exec(g_chatlog_database, query, NULL, 0, &err_msg)) {
log_error("[DB Migration] Unable to create index for from_jid.");
goto cleanup;
}

// Select all messages form V1 to V2 (and rename replace_id->replaces_stanza_id)
query = "INSERT INTO `ChatLogsV2` (`from_jid`, `to_jid`, `from_resource`, `to_resource`, `message`, `timestamp`, `type`, `stanza_id`, `archive_id`, `replaces_stanza_id`, `encryption`, `marked_read`) "
"SELECT `from_jid`, `to_jid`, `from_resource`, `to_resource`, `message`, `timestamp`, `type`, `stanza_id`, `archive_id`, `replace_id` AS `replaces_stanza_id`, `encryption`, `marked_read`"
"FROM `ChatLogs`;";
if (SQLITE_OK != sqlite3_exec(g_chatlog_database, query, NULL, 0, &err_msg)) {
log_error("[DB Migration] Unable to copy V2 table.");
goto cleanup;
}

// Set replaces_db_id
// Note: sender check is required due to #1898 (24d0030) since previous messages might be affected.
query = "UPDATE `ChatLogsV2` AS A "
"SET `replaces_db_id` = B.`id` "
"FROM `ChatLogs` AS B "
"WHERE A.`replaces_stanza_id` IS NOT NULL AND A.`replaces_stanza_id` != '' AND A.`replaces_stanza_id` = B.`stanza_id` "
"AND A.`from_jid` = B.`from_jid` AND A.`to_jid` = B.`to_jid`;";
if (SQLITE_OK != sqlite3_exec(g_chatlog_database, query, NULL, 0, &err_msg)) {
log_error("[DB Migration] Unable to add original messages to replacement messages in V2 table.");
goto cleanup;
}

// Rename original table so we can place ChatLogsV2 instead (TODO: we can just DROP here instead of rename+drop)
query = "ALTER TABLE ChatLogs RENAME TO ChatLogs_bck;";
if (SQLITE_OK != sqlite3_exec(g_chatlog_database, query, NULL, 0, &err_msg)) {
log_error("[DB Migration] Unable to rename old table.");
goto cleanup;
}

// TODO: drop old table. This part will be added prior to release to avoid potential original database loss by maintainers.
/*
query = "DROP TABLE ChatLogs;";
if (SQLITE_OK != sqlite3_exec(g_chatlog_database, query, NULL, 0, &err_msg)) {
log_error("[DB Migration] Unable to delete old DB backup.");
return FALSE;
}
*/

// Rename ChatLogsV2 to ChatLogs
query = "ALTER TABLE ChatLogsV2 RENAME TO ChatLogs;";
if (SQLITE_OK != sqlite3_exec(g_chatlog_database, query, NULL, 0, &err_msg)) {
log_error("[DB Migration] Unable to rename new table.");
return FALSE; // bad case, but unlikely to happen. Anyway, we don't want to drop it since it's the last copy
}

// Set new DB version (V2)
query = "UPDATE `DbVersion` SET `version` = 2";
if (SQLITE_OK != sqlite3_exec(g_chatlog_database, query, NULL, 0, &err_msg)) {
log_error("[DB Migration] Unable to update DB Version.");
return FALSE;
}

return TRUE;

cleanup:
if (err_msg) {
log_error("[DB Migration] SQLite error: %s", err_msg);
sqlite3_free(err_msg);
err_msg = NULL;
} else {
log_error("Unknown SQLite error.");
}

query = "DROP TABLE `ChatLogsV2`;";
if (SQLITE_OK != sqlite3_exec(g_chatlog_database, query, NULL, 0, &err_msg)) {
log_error("[DB Migration] Unable to drop ChatLogsV2 table: %s", err_msg);
sqlite3_free(err_msg);
}

return FALSE;
}

// Checks if there is more system storage space available than current database takes + 40% (for indexing and other potential size increases)
static gboolean
_check_available_space_for_db_migration(char* path_to_db)
{
struct stat file_stat;
struct statvfs fs_stat;

if (statvfs(path_to_db, &fs_stat) == 0 && stat(path_to_db, &file_stat) == 0) {
unsigned long long file_size = file_stat.st_size / 1024;
unsigned long long available_space_kb = fs_stat.f_frsize * fs_stat.f_bavail / 1024;
log_debug("_check_available_space_for_db_migration(): Available space on disk: %llu KB; DB size: %llu KB", available_space_kb, file_size);

return (available_space_kb >= (file_size + (file_size * 10 / 4)));
} else {
log_error("Error checking available space.");
return FALSE;
}
}
Loading

0 comments on commit 29b6699

Please sign in to comment.