From 5b15d20b45407dc235c6fde8b2139f7b6b23eaaf Mon Sep 17 00:00:00 2001 From: Ayrat Hudaygulov Date: Fri, 13 Sep 2024 22:17:51 +0100 Subject: [PATCH 1/2] added test --- src/VahterBanBot.Tests/MLBanTests.fs | 13 +++++++++++++ src/VahterBanBot.Tests/TgMessageUtils.fs | 3 ++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/VahterBanBot.Tests/MLBanTests.fs b/src/VahterBanBot.Tests/MLBanTests.fs index f766340..c1fa12c 100644 --- a/src/VahterBanBot.Tests/MLBanTests.fs +++ b/src/VahterBanBot.Tests/MLBanTests.fs @@ -223,6 +223,19 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) = let! msgBanned = fixture.MessageBanned spam.Message Assert.False msgBanned } + + [] + let ``Message with spam in photo caption also triggers auto-delete`` () = task { + // record a message, where 2 is in a training set as spam word + // but text is in a message.Caption + // ChatsToMonitor[0] doesn't have stopwords + let msgUpdate = Tg.quickMsg(chat = fixture.ChatsToMonitor[0], text = null, caption = "2") + let! _ = fixture.SendMessage msgUpdate + + // assert that the message got auto banned + let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message + Assert.True msgBanned + } interface IAssemblyFixture interface IClassFixture diff --git a/src/VahterBanBot.Tests/TgMessageUtils.fs b/src/VahterBanBot.Tests/TgMessageUtils.fs index 743e299..6ab2921 100644 --- a/src/VahterBanBot.Tests/TgMessageUtils.fs +++ b/src/VahterBanBot.Tests/TgMessageUtils.fs @@ -32,7 +32,7 @@ type Tg() = ) ) - static member quickMsg (?text: string, ?chat: Chat, ?from: User, ?date: DateTime, ?callback: CallbackQuery) = + static member quickMsg (?text: string, ?chat: Chat, ?from: User, ?date: DateTime, ?callback: CallbackQuery, ?caption: string) = Update( Id = next(), Message = @@ -42,6 +42,7 @@ type Tg() = Chat = (chat |> Option.defaultValue (Tg.chat())), From = (from |> Option.defaultValue (Tg.user())), Date = (date |> Option.defaultValue DateTime.UtcNow), + Caption = (caption |> Option.defaultValue null), ReplyToMessage = null ) ) From 75be93951ce1e23d75386333602162124d5015b7 Mon Sep 17 00:00:00 2001 From: Ayrat Hudaygulov Date: Fri, 13 Sep 2024 22:20:06 +0100 Subject: [PATCH 2/2] ML now triggers on caption if text is missing --- src/VahterBanBot.Tests/MLBanTests.fs | 4 ++-- src/VahterBanBot/Bot.fs | 10 +++++----- src/VahterBanBot/Types.fs | 4 ++-- src/VahterBanBot/Utils.fs | 7 +++++++ 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/VahterBanBot.Tests/MLBanTests.fs b/src/VahterBanBot.Tests/MLBanTests.fs index c1fa12c..0ab5909 100644 --- a/src/VahterBanBot.Tests/MLBanTests.fs +++ b/src/VahterBanBot.Tests/MLBanTests.fs @@ -226,10 +226,10 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) = [] let ``Message with spam in photo caption also triggers auto-delete`` () = task { - // record a message, where 2 is in a training set as spam word + // record a message, where 2222222 is in a training set as spam word // but text is in a message.Caption // ChatsToMonitor[0] doesn't have stopwords - let msgUpdate = Tg.quickMsg(chat = fixture.ChatsToMonitor[0], text = null, caption = "2") + let msgUpdate = Tg.quickMsg(chat = fixture.ChatsToMonitor[0], text = null, caption = "2222222") let! _ = fixture.SendMessage msgUpdate // assert that the message got auto banned diff --git a/src/VahterBanBot/Bot.fs b/src/VahterBanBot/Bot.fs index 128c575..2cf3369 100644 --- a/src/VahterBanBot/Bot.fs +++ b/src/VahterBanBot/Bot.fs @@ -424,7 +424,7 @@ let killSpammerAutomated |> DB.banUserByBot let msgType = if deleteMessage then "Deleted" else "Detected" - let logMsg = $"{msgType} spam (score: {score}) in {prependUsername message.Chat.Username} ({message.Chat.Id}) from {prependUsername message.From.Username} ({message.From.Id}) with text:\n{message.Text}" + let logMsg = $"{msgType} spam (score: {score}) in {prependUsername message.Chat.Username} ({message.Chat.Id}) from {prependUsername message.From.Username} ({message.From.Id}) with text:\n{message.TextOrCaption}" let! replyMarkup = task { if deleteMessage then @@ -493,7 +493,7 @@ let justMessage do! botClient.DeleteMessageAsync(ChatId(message.Chat.Id), message.MessageId) |> safeTaskAwait (fun e -> logger.LogError ($"Failed to delete message {message.MessageId} from chat {message.Chat.Id}", e)) - elif botConfig.MlEnabled && message.Text <> null then + elif botConfig.MlEnabled && message.TextOrCaption <> null then use mlActivity = botActivity.StartActivity("mlPrediction") let shouldBeSkipped = @@ -506,14 +506,14 @@ let justMessage match botConfig.MlStopWordsInChats.TryGetValue message.Chat.Id with | true, stopWords -> stopWords - |> Seq.exists (fun sw -> message.Text.Contains(sw, StringComparison.OrdinalIgnoreCase)) + |> Seq.exists (fun sw -> message.TextOrCaption.Contains(sw, StringComparison.OrdinalIgnoreCase)) | _ -> false %mlActivity.SetTag("skipPrediction", shouldBeSkipped) if not shouldBeSkipped then let! usrMsgCount = DB.countUniqueUserMsg message.From.Id - match ml.Predict(message.Text, usrMsgCount) with + match ml.Predict(message.TextOrCaption, usrMsgCount) with | Some prediction -> %mlActivity.SetTag("spamScoreMl", prediction.Score) @@ -667,7 +667,7 @@ let vahterMarkedAsNotSpam let vahterUsername = vahter.username |> Option.defaultValue null - let logMsg = $"Vahter {prependUsername vahterUsername} ({vahter.id}) marked message {msgId} in {prependUsername chatName}({chatId}) as false-positive (NOT A SPAM)\n{msg.message.Text}" + let logMsg = $"Vahter {prependUsername vahterUsername} ({vahter.id}) marked message {msgId} in {prependUsername chatName}({chatId}) as false-positive (NOT A SPAM)\n{msg.message.TextOrCaption}" do! botClient.SendTextMessageAsync(ChatId(botConfig.LogsChannelId), logMsg) |> taskIgnore logger.LogInformation logMsg } diff --git a/src/VahterBanBot/Types.fs b/src/VahterBanBot/Types.fs index 788e70d..afa79e4 100644 --- a/src/VahterBanBot/Types.fs +++ b/src/VahterBanBot/Types.fs @@ -72,7 +72,7 @@ module DbBanned = if isNull message.From || isNull message.Chat then failwith "Message should have a user and a chat" { message_id = Some message.MessageId - message_text = message.Text + message_text = message.TextOrCaption banned_user_id = message.From.Id banned_at = DateTime.UtcNow banned_in_chat_id = Some message.Chat.Id @@ -92,7 +92,7 @@ type DbMessage = message_id = message.MessageId user_id = message.From.Id created_at = DateTime.UtcNow - text = message.Text + text = message.TextOrCaption raw_message = JsonConvert.SerializeObject message } [] diff --git a/src/VahterBanBot/Utils.fs b/src/VahterBanBot/Utils.fs index 6178d4c..a2de58e 100644 --- a/src/VahterBanBot/Utils.fs +++ b/src/VahterBanBot/Utils.fs @@ -56,3 +56,10 @@ type Task<'x> with member this.Ignore() = task { let! _ = this in () } let inline taskIgnore (t: Task<'x>) = t.Ignore() + +type Telegram.Bot.Types.Message with + member msg.TextOrCaption = + if isNull msg.Text then + msg.Caption + else + msg.Text