Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Now ML triggers on caption as well #54

Merged
merged 2 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/VahterBanBot.Tests/MLBanTests.fs
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,19 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) =
let! msgBanned = fixture.MessageBanned spam.Message
Assert.False msgBanned
}

[<Fact>]
let ``Message with spam in photo caption also triggers auto-delete`` () = task {
// record a message, where 2222222 is in a training set as spam word
// but text is in a message.Caption
// ChatsToMonitor[0] doesn't have stopwords
let msgUpdate = Tg.quickMsg(chat = fixture.ChatsToMonitor[0], text = null, caption = "2222222")
let! _ = fixture.SendMessage msgUpdate

// assert that the message got auto banned
let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
Assert.True msgBanned
}

interface IAssemblyFixture<VahterTestContainers>
interface IClassFixture<MlAwaitFixture>
3 changes: 2 additions & 1 deletion src/VahterBanBot.Tests/TgMessageUtils.fs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ type Tg() =
)
)

static member quickMsg (?text: string, ?chat: Chat, ?from: User, ?date: DateTime, ?callback: CallbackQuery) =
static member quickMsg (?text: string, ?chat: Chat, ?from: User, ?date: DateTime, ?callback: CallbackQuery, ?caption: string) =
Update(
Id = next(),
Message =
Expand All @@ -42,6 +42,7 @@ type Tg() =
Chat = (chat |> Option.defaultValue (Tg.chat())),
From = (from |> Option.defaultValue (Tg.user())),
Date = (date |> Option.defaultValue DateTime.UtcNow),
Caption = (caption |> Option.defaultValue null),
ReplyToMessage = null
)
)
Expand Down
10 changes: 5 additions & 5 deletions src/VahterBanBot/Bot.fs
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ let killSpammerAutomated
|> DB.banUserByBot

let msgType = if deleteMessage then "Deleted" else "Detected"
let logMsg = $"{msgType} spam (score: {score}) in {prependUsername message.Chat.Username} ({message.Chat.Id}) from {prependUsername message.From.Username} ({message.From.Id}) with text:\n{message.Text}"
let logMsg = $"{msgType} spam (score: {score}) in {prependUsername message.Chat.Username} ({message.Chat.Id}) from {prependUsername message.From.Username} ({message.From.Id}) with text:\n{message.TextOrCaption}"

let! replyMarkup = task {
if deleteMessage then
Expand Down Expand Up @@ -493,7 +493,7 @@ let justMessage
do! botClient.DeleteMessageAsync(ChatId(message.Chat.Id), message.MessageId)
|> safeTaskAwait (fun e -> logger.LogError ($"Failed to delete message {message.MessageId} from chat {message.Chat.Id}", e))

elif botConfig.MlEnabled && message.Text <> null then
elif botConfig.MlEnabled && message.TextOrCaption <> null then
use mlActivity = botActivity.StartActivity("mlPrediction")

let shouldBeSkipped =
Expand All @@ -506,14 +506,14 @@ let justMessage
match botConfig.MlStopWordsInChats.TryGetValue message.Chat.Id with
| true, stopWords ->
stopWords
|> Seq.exists (fun sw -> message.Text.Contains(sw, StringComparison.OrdinalIgnoreCase))
|> Seq.exists (fun sw -> message.TextOrCaption.Contains(sw, StringComparison.OrdinalIgnoreCase))
| _ -> false
%mlActivity.SetTag("skipPrediction", shouldBeSkipped)

if not shouldBeSkipped then
let! usrMsgCount = DB.countUniqueUserMsg message.From.Id

match ml.Predict(message.Text, usrMsgCount) with
match ml.Predict(message.TextOrCaption, usrMsgCount) with
| Some prediction ->
%mlActivity.SetTag("spamScoreMl", prediction.Score)

Expand Down Expand Up @@ -667,7 +667,7 @@ let vahterMarkedAsNotSpam

let vahterUsername = vahter.username |> Option.defaultValue null

let logMsg = $"Vahter {prependUsername vahterUsername} ({vahter.id}) marked message {msgId} in {prependUsername chatName}({chatId}) as false-positive (NOT A SPAM)\n{msg.message.Text}"
let logMsg = $"Vahter {prependUsername vahterUsername} ({vahter.id}) marked message {msgId} in {prependUsername chatName}({chatId}) as false-positive (NOT A SPAM)\n{msg.message.TextOrCaption}"
do! botClient.SendTextMessageAsync(ChatId(botConfig.LogsChannelId), logMsg) |> taskIgnore
logger.LogInformation logMsg
}
Expand Down
4 changes: 2 additions & 2 deletions src/VahterBanBot/Types.fs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ module DbBanned =
if isNull message.From || isNull message.Chat then
failwith "Message should have a user and a chat"
{ message_id = Some message.MessageId
message_text = message.Text
message_text = message.TextOrCaption
banned_user_id = message.From.Id
banned_at = DateTime.UtcNow
banned_in_chat_id = Some message.Chat.Id
Expand All @@ -92,7 +92,7 @@ type DbMessage =
message_id = message.MessageId
user_id = message.From.Id
created_at = DateTime.UtcNow
text = message.Text
text = message.TextOrCaption
raw_message = JsonConvert.SerializeObject message }

[<CLIMutable>]
Expand Down
7 changes: 7 additions & 0 deletions src/VahterBanBot/Utils.fs
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,10 @@ type Task<'x> with
member this.Ignore() = task { let! _ = this in () }

let inline taskIgnore (t: Task<'x>) = t.Ignore()

type Telegram.Bot.Types.Message with
member msg.TextOrCaption =
if isNull msg.Text then
msg.Caption
else
msg.Text