From 5183a77752c558b2d3d6ec62b0d427fd6c928111 Mon Sep 17 00:00:00 2001 From: Ayrat Hudaygulov Date: Wed, 7 Aug 2024 22:10:10 +0100 Subject: [PATCH] added real auto-ban with social score system --- .env.example | 5 + src/VahterBanBot.Tests/ContainerTestBase.fs | 7 +- src/VahterBanBot.Tests/MLBanTests.fs | 114 ++++++++++++++++++-- src/VahterBanBot/Bot.fs | 45 +++++++- src/VahterBanBot/DB.fs | 57 ++++++++-- src/VahterBanBot/Program.fs | 17 ++- src/VahterBanBot/Types.fs | 10 ++ 7 files changed, 229 insertions(+), 26 deletions(-) diff --git a/.env.example b/.env.example index 0406b69..25df51d 100644 --- a/.env.example +++ b/.env.example @@ -2,6 +2,8 @@ BOT_TELEGRAM_TOKEN=SECRET_FROM_TELEGRAM BOT_AUTH_TOKEN=JUST_YOUR_SECRET BOT_HOOK_ROUTE=/bot +BOT_USER_ID=123456789 +BOT_USER_NAME=your_bot ASPNETCORE_URLS=http://+:88 DEBUG=true LOGS_CHANNEL_ID=-1000000000000 @@ -19,6 +21,9 @@ ML_ENABLED=false ML_RETRAIN_INTERVAL_SEC=86400 ML_SEED= ML_SPAM_DELETION_ENABLED=false +ML_SPAM_AUTOBAN_ENABLED=true +ML_SPAM_AUTOBAN_SCORE_THRESHOLD=-5.0 +ML_SPAM_AUTOBAN_CHECK_LAST_MSG_COUNT=10 ML_TRAIN_INTERVAL_DAYS=30 ML_TRAIN_CRITICAL_MSG_COUNT=5 ML_TRAINING_SET_FRACTION=0.2 diff --git a/src/VahterBanBot.Tests/ContainerTestBase.fs b/src/VahterBanBot.Tests/ContainerTestBase.fs index 4e2e1de..0f03b57 100644 --- a/src/VahterBanBot.Tests/ContainerTestBase.fs +++ b/src/VahterBanBot.Tests/ContainerTestBase.fs @@ -81,6 +81,8 @@ type VahterTestContainers() = .WithImage(image) .WithNetwork(network) .WithPortBinding(80, true) + .WithEnvironment("BOT_USER_ID", "1337") + .WithEnvironment("BOT_USER_NAME", "test_bot") .WithEnvironment("BOT_TELEGRAM_TOKEN", "TELEGRAM_SECRET") .WithEnvironment("BOT_AUTH_TOKEN", "OUR_SECRET") .WithEnvironment("LOGS_CHANNEL_ID", "-123") @@ -97,6 +99,9 @@ type VahterTestContainers() = .WithEnvironment("ML_SPAM_DELETION_ENABLED", "true") .WithEnvironment("ML_SPAM_THRESHOLD", "1.0") .WithEnvironment("ML_STOP_WORDS_IN_CHATS", """{"-42":["2"]}""") + .WithEnvironment("ML_SPAM_AUTOBAN_ENABLED", "true") + .WithEnvironment("ML_SPAM_AUTOBAN_CHECK_LAST_MSG_COUNT", "10") + .WithEnvironment("ML_SPAM_AUTOBAN_SCORE_THRESHOLD", "-4.0") // .net 8.0 upgrade has a breaking change // https://learn.microsoft.com/en-us/dotnet/core/compatibility/containers/8.0/aspnet-port // Azure default port for containers is 80, se we need explicitly set it @@ -203,7 +208,7 @@ type VahterTestContainers() = return count > 0 } - member _.MessageIsAutoBanned(msg: Message) = task { + member _.MessageIsAutoDeleted(msg: Message) = task { use conn = new NpgsqlConnection(publicConnectionString) //language=postgresql let sql = "SELECT COUNT(*) FROM banned_by_bot WHERE banned_in_chat_id = @chatId AND message_id = @messageId" diff --git a/src/VahterBanBot.Tests/MLBanTests.fs b/src/VahterBanBot.Tests/MLBanTests.fs index 37dd675..f766340 100644 --- a/src/VahterBanBot.Tests/MLBanTests.fs +++ b/src/VahterBanBot.Tests/MLBanTests.fs @@ -16,7 +16,7 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) = let! _ = fixture.SendMessage msgUpdate // assert that the message got auto banned - let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message + let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message Assert.True msgBanned } @@ -29,7 +29,7 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) = let! _ = fixture.SendMessage msgUpdate // assert that the message got auto banned - let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message + let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message Assert.False msgBanned } @@ -42,7 +42,7 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) = let! _ = fixture.SendMessage msgUpdate // assert that the message got auto banned - let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message + let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message Assert.False msgBanned } @@ -54,7 +54,7 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) = let! _ = fixture.SendMessage msgUpdate // assert that the message got auto banned - let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message + let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message Assert.False msgBanned } @@ -65,7 +65,7 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) = let! _ = fixture.SendMessage msgUpdate // assert that the message got auto banned - let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message + let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message Assert.False msgBanned } @@ -76,7 +76,7 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) = let! _ = fixture.SendMessage msgUpdate // assert that the message got auto banned - let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message + let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message Assert.True msgBanned } @@ -88,7 +88,7 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) = let! _ = fixture.SendMessage msgUpdate // assert that the message got auto banned - let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message + let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message Assert.True msgBanned // assert it is not false-positive let! isFalsePositive = fixture.IsMessageFalsePositive msgUpdate.Message @@ -112,7 +112,7 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) = let! _ = fixture.SendMessage msgUpdate // assert that the message got auto banned - let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message + let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message Assert.True msgBanned // send a callback to mark it as false-positive @@ -125,6 +125,104 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) = let! isFalsePositive = fixture.IsMessageFalsePositive msgUpdate.Message Assert.False isFalsePositive } + + [] + let ``User will be autobanned after consecutive spam`` () = task { + // record a message, where 2 is in a training set as spam word + // ChatsToMonitor[0] doesn't have stopwords + let user = Tg.user() + let msgUpdate = Tg.quickMsg(chat = fixture.ChatsToMonitor[0], text = "66666666", from = user) + + // 1 - no ban + let! _ = fixture.SendMessage msgUpdate + let! msgBanned = fixture.MessageBanned msgUpdate.Message + Assert.False msgBanned + + // 2 - no ban + let! _ = fixture.SendMessage msgUpdate + let! msgBanned = fixture.MessageBanned msgUpdate.Message + Assert.False msgBanned + + // 3 - no ban + let! _ = fixture.SendMessage msgUpdate + let! msgBanned = fixture.MessageBanned msgUpdate.Message + Assert.False msgBanned + + // 4 - ban (depends on the ML_SPAM_AUTOBAN_SCORE_THRESHOLD) + let! _ = fixture.SendMessage msgUpdate + let! msgBanned = fixture.MessageBanned msgUpdate.Message + Assert.True msgBanned + } + + [] + let ``User can recover from autoban by sending good messages`` () = task { + // record a message, where 2 is in a training set as spam word + // ChatsToMonitor[0] doesn't have stopwords + let user = Tg.user() + let spam = Tg.quickMsg(chat = fixture.ChatsToMonitor[0], text = "66666666", from = user) + let notSpam = Tg.quickMsg(chat = fixture.ChatsToMonitor[0], text = "b", from = user) + + // 1 - no ban + let! _ = fixture.SendMessage spam + let! msgBanned = fixture.MessageBanned spam.Message + Assert.False msgBanned + + // 1.5 - no ban + let! _ = fixture.SendMessage notSpam + let! msgBanned = fixture.MessageBanned notSpam.Message + Assert.False msgBanned + + // 2 - no ban + let! _ = fixture.SendMessage spam + let! msgBanned = fixture.MessageBanned spam.Message + Assert.False msgBanned + + // 3 - no ban + let! _ = fixture.SendMessage spam + let! msgBanned = fixture.MessageBanned spam.Message + Assert.False msgBanned + + // 4 - no ban (as user posted 1 good message in beetween) + let! _ = fixture.SendMessage spam + let! msgBanned = fixture.MessageBanned spam.Message + Assert.False msgBanned + } + + [] + let ``User can be saved from auto ban by vahter marking it false-positive`` () = task { + // record a message, where 777777777777777777 is in a training set as spam word + // ChatsToMonitor[0] doesn't have stopwords + let user = Tg.user() + let spam = Tg.quickMsg(chat = fixture.ChatsToMonitor[0], text = "777777777777777777", from = user) + + // 1 - no ban + let! _ = fixture.SendMessage spam + let! msgBanned = fixture.MessageBanned spam.Message + let! msgDeleted = fixture.MessageIsAutoDeleted spam.Message + Assert.True msgDeleted + Assert.False msgBanned + + // 1.5 - vahter marked as false-positive via button + // send a callback to mark it as false-positive + let! callbackId = fixture.GetCallbackId spam.Message "NotASpam" + let msgCallback = Tg.callback(string callbackId, from = fixture.Vahters[0]) + let! _ = fixture.SendMessage msgCallback + + // 2 - no ban + let! _ = fixture.SendMessage spam + let! msgBanned = fixture.MessageBanned spam.Message + Assert.False msgBanned + + // 3 - no ban + let! _ = fixture.SendMessage spam + let! msgBanned = fixture.MessageBanned spam.Message + Assert.False msgBanned + + // 4 - no ban (as vahter marked this as false positive) + let! _ = fixture.SendMessage spam + let! msgBanned = fixture.MessageBanned spam.Message + Assert.False msgBanned + } interface IAssemblyFixture interface IClassFixture diff --git a/src/VahterBanBot/Bot.fs b/src/VahterBanBot/Bot.fs index fbd5626..3b6567d 100644 --- a/src/VahterBanBot/Bot.fs +++ b/src/VahterBanBot/Bot.fs @@ -447,7 +447,29 @@ let killSpammerAutomated logger.LogInformation logMsg } +let autoBan + (botUser: DbUser) + (botClient: ITelegramBotClient) + (botConfig: BotConfiguration) + (message: Message) + (logger: ILogger) = task { + use banOnReplyActivity = botActivity.StartActivity("autoBan") + %banOnReplyActivity + .SetTag("spammerId", message.From.Id) + .SetTag("spammerUsername", message.From.Username) + + let! userStats = DB.getUserStatsByLastNMessages botConfig.MlSpamAutobanCheckLastMsgCount message.From.Id + let socialScore = userStats.good - userStats.bad + + %banOnReplyActivity.SetTag("socialScore", socialScore) + + if double socialScore <= botConfig.MlSpamAutobanScoreThreshold then + // ban user in all monitored chats + do! totalBan botClient botConfig message botUser logger +} + let justMessage + (botUser: DbUser) (botClient: ITelegramBotClient) (botConfig: BotConfiguration) (logger: ILogger) @@ -460,7 +482,16 @@ let justMessage .SetTag("fromUserId", message.From.Id) .SetTag("fromUsername", message.From.Username) - if botConfig.MlEnabled && message.Text <> null then + // check if user got auto-banned already + // that could happen due to the race condition between spammers mass messages + // and the bot's processing queue + let! isAutoBanned = DB.isBannedByVahter botUser.id message.From.Id + if isAutoBanned then + // just delete message and move on + do! botClient.DeleteMessageAsync(ChatId(message.Chat.Id), message.MessageId) + |> safeTaskAwait (fun e -> logger.LogError ($"Failed to delete message {message.MessageId} from chat {message.Chat.Id}", e)) + + elif botConfig.MlEnabled && message.Text <> null then use mlActivity = botActivity.StartActivity("mlPrediction") let shouldBeSkipped = @@ -487,6 +518,10 @@ let justMessage if prediction.Score >= botConfig.MlSpamThreshold then // delete message do! killSpammerAutomated botClient botConfig message logger botConfig.MlSpamDeletionEnabled prediction.Score + + if botConfig.MlSpamAutobanEnabled then + // trigger auto-ban check + do! autoBan botUser botClient botConfig message logger elif prediction.Score >= botConfig.MlWarningThreshold then // just warn do! killSpammerAutomated botClient botConfig message logger false prediction.Score @@ -496,7 +531,7 @@ let justMessage | None -> // no prediction (error or not ready yet) () - + do! message |> DbMessage.newMessage @@ -570,6 +605,7 @@ let adminCommand } let onMessage + (botUser: DbUser) (botClient: ITelegramBotClient) (botConfig: BotConfiguration) (logger: ILogger) @@ -606,7 +642,7 @@ let onMessage // if message is not a command from authorized user, just save it ID to DB else - do! justMessage botClient botConfig logger ml message + do! justMessage botUser botClient botConfig logger ml message } let vahterMarkedAsNotSpam @@ -698,6 +734,7 @@ let onCallback } let onUpdate + (botUser: DbUser) (botClient: ITelegramBotClient) (botConfig: BotConfiguration) (logger: ILogger) @@ -707,5 +744,5 @@ let onUpdate if update.CallbackQuery <> null then do! onCallback botClient botConfig logger update.CallbackQuery else - do! onMessage botClient botConfig logger ml update.Message + do! onMessage botUser botClient botConfig logger ml update.Message } diff --git a/src/VahterBanBot/DB.fs b/src/VahterBanBot/DB.fs index 4530e91..d8c4e23 100644 --- a/src/VahterBanBot/DB.fs +++ b/src/VahterBanBot/DB.fs @@ -106,16 +106,6 @@ let getUserMessages (userId: int64): Task = return Array.ofSeq messages } -let deleteMsgs (msg: DbMessage[]): Task = - task { - let msgIds = msg |> Array.map (_.message_id) - use conn = new NpgsqlConnection(connString) - - //language=postgresql - let sql = "DELETE FROM message WHERE message_id = ANY(@msgIds)" - return! conn.ExecuteAsync(sql, {| msgIds = msgIds |}) - } - let cleanupOldMessages (howOld: TimeSpan): Task = task { use conn = new NpgsqlConnection(connString) @@ -315,3 +305,50 @@ let countUniqueUserMsg (userId: int64): Task = let! result = conn.QuerySingleAsync(sql, {| userId = userId |}) return result } + +let isBannedByVahter (vahterId: int64) (userId: int64): Task = + task { + use conn = new NpgsqlConnection(connString) + + //language=postgresql + let sql = "SELECT EXISTS(SELECT 1 FROM banned WHERE banned_user_id = @userId AND banned_by = @vahterId)" + + let! result = conn.QuerySingleAsync(sql, {| userId = userId; vahterId = vahterId |}) + return result + } + +let getUserStatsByLastNMessages (n: int) (userId: int64): Task = + task { + use conn = new NpgsqlConnection(connString) + + //language=postgresql + let sql = + """ +WITH stats AS (SELECT m.message_id, + m.chat_id, + b.id IS NOT NULL AS banned, + bbb.id IS NOT NULL AS banned_by_bot, + fnm.chat_id IS NOT NULL AS false_neg, + fpm.text IS NOT NULL AS false_pos + FROM message m + LEFT JOIN banned b ON m.message_id = b.message_id AND m.chat_id = b.banned_in_chat_id + LEFT JOIN public.banned_by_bot bbb + ON m.message_id = bbb.message_id AND m.chat_id = bbb.banned_in_chat_id + LEFT JOIN public.false_negative_messages fnm + ON m.message_id = fnm.message_id AND m.chat_id = fnm.chat_id + LEFT JOIN false_positive_messages fpm ON m.text = fpm.text + WHERE m.user_id = @userId + ORDER BY m.created_at DESC + LIMIT @n), + stats_count AS (SELECT message_id, + chat_id, + CASE WHEN false_pos THEN FALSE ELSE banned OR banned_by_bot OR false_neg END AS spam + FROM stats) +SELECT COUNT(*) FILTER (WHERE NOT spam) AS good, + COUNT(*) FILTER (WHERE spam) AS bad +FROM stats_count; + """ + + let! result = conn.QuerySingleAsync(sql, {| userId = userId; n = n |}) + return result + } diff --git a/src/VahterBanBot/Program.fs b/src/VahterBanBot/Program.fs index 2539a99..d0bb972 100644 --- a/src/VahterBanBot/Program.fs +++ b/src/VahterBanBot/Program.fs @@ -16,6 +16,7 @@ open Telegram.Bot.Types open Giraffe open Microsoft.Extensions.DependencyInjection open Telegram.Bot.Types.Enums +open VahterBanBot open VahterBanBot.Cleanup open VahterBanBot.ML open VahterBanBot.Utils @@ -39,6 +40,8 @@ let botConf = { BotToken = getEnv "BOT_TELEGRAM_TOKEN" Route = getEnvOr "BOT_HOOK_ROUTE" "/bot" SecretToken = getEnv "BOT_AUTH_TOKEN" + BotUserId = getEnv "BOT_USER_ID" |> int64 + BotUserName = getEnv "BOT_USER_NAME" LogsChannelId = getEnv "LOGS_CHANNEL_ID" |> int64 ChatsToMonitor = getEnv "CHATS_TO_MONITOR" |> JsonConvert.DeserializeObject<_> AllowedUsers = getEnv "ALLOWED_USERS" |> JsonConvert.DeserializeObject<_> @@ -55,6 +58,9 @@ let botConf = MlRetrainInterval = getEnvOrWith "ML_RETRAIN_INTERVAL_SEC" None (int >> TimeSpan.FromSeconds >> Some) MlSeed = getEnvOrWith "ML_SEED" (Nullable()) (int >> Nullable) MlSpamDeletionEnabled = getEnvOr "ML_SPAM_DELETION_ENABLED" "false" |> bool.Parse + MlSpamAutobanEnabled = getEnvOr "ML_SPAM_AUTOBAN_ENABLED" "false" |> bool.Parse + MlSpamAutobanCheckLastMsgCount = getEnvOr "ML_SPAM_AUTOBAN_CHECK_LAST_MSG_COUNT" "10" |> int + MlSpamAutobanScoreThreshold = getEnvOr "ML_SPAM_AUTOBAN_SCORE_THRESHOLD" "-5.0" |> double MlTrainInterval = getEnvOr "ML_TRAIN_INTERVAL_DAYS" "30" |> int |> TimeSpan.FromDays MlTrainCriticalMsgCount = getEnvOr "ML_TRAIN_CRITICAL_MSG_COUNT" "5" |> int MlTrainingSetFraction = getEnvOr "ML_TRAINING_SET_FRACTION" "0.2" |> float @@ -134,7 +140,12 @@ getEnvWith "APPLICATIONINSIGHTS_CONNECTION_STRING" (fun appInsightKey -> ) %builder.Logging.AddConsole() - + +let botUser = + DbUser.newUser(botConf.BotUserId, botConf.BotUserName) + |> DB.upsertUser + |> fun x -> x.Result + let webApp = choose [ // need for Azure health checks on any route GET >=> text "OK" @@ -154,7 +165,7 @@ let webApp = choose [ let ml = scope.ServiceProvider.GetRequiredService() let logger = ctx.GetLogger() try - do! onUpdate telegramClient botConf (ctx.GetLogger "VahterBanBot.Bot") ml update + do! onUpdate botUser telegramClient botConf (ctx.GetLogger "VahterBanBot.Bot") ml update %topActivity.SetTag("update-error", false) with e -> logger.LogError(e, $"Unexpected error while processing update: {updateBodyJson}") @@ -181,7 +192,7 @@ if botConf.UsePolling then let logger = ctx.ServiceProvider.GetRequiredService>() let client = ctx.ServiceProvider.GetRequiredService() let ml = ctx.ServiceProvider.GetRequiredService() - do! onUpdate client botConf logger ml update + do! onUpdate botUser client botConf logger ml update } member x.HandlePollingErrorAsync (botClient: ITelegramBotClient, ex: Exception, cancellationToken: CancellationToken) = Task.CompletedTask diff --git a/src/VahterBanBot/Types.fs b/src/VahterBanBot/Types.fs index 7776a53..bd5f8dc 100644 --- a/src/VahterBanBot/Types.fs +++ b/src/VahterBanBot/Types.fs @@ -13,6 +13,8 @@ type BotConfiguration = { BotToken: string Route: string SecretToken: string + BotUserId: int64 + BotUserName: string LogsChannelId: int64 ChatsToMonitor: Dictionary AllowedUsers: Dictionary @@ -29,6 +31,9 @@ type BotConfiguration = MlRetrainInterval: TimeSpan option MlSeed: Nullable MlSpamDeletionEnabled: bool + MlSpamAutobanEnabled: bool + MlSpamAutobanCheckLastMsgCount: int + MlSpamAutobanScoreThreshold: double MlTrainInterval: TimeSpan MlTrainCriticalMsgCount: int MlTrainingSetFraction: float @@ -145,3 +150,8 @@ type CallbackMessageTypeHandler() = parameter.Value <- JsonConvert.SerializeObject value override this.Parse(value) = JsonConvert.DeserializeObject(value.ToString()) + +[] +type UserStats = + { good: int + bad: int }