Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added real auto-ban with social score system #47

Merged
merged 1 commit into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
BOT_TELEGRAM_TOKEN=SECRET_FROM_TELEGRAM
BOT_AUTH_TOKEN=JUST_YOUR_SECRET
BOT_HOOK_ROUTE=/bot
BOT_USER_ID=123456789
BOT_USER_NAME=your_bot
ASPNETCORE_URLS=http://+:88
DEBUG=true
LOGS_CHANNEL_ID=-1000000000000
Expand All @@ -19,6 +21,9 @@ ML_ENABLED=false
ML_RETRAIN_INTERVAL_SEC=86400
ML_SEED=
ML_SPAM_DELETION_ENABLED=false
ML_SPAM_AUTOBAN_ENABLED=true
ML_SPAM_AUTOBAN_SCORE_THRESHOLD=-5.0
ML_SPAM_AUTOBAN_CHECK_LAST_MSG_COUNT=10
ML_TRAIN_INTERVAL_DAYS=30
ML_TRAIN_CRITICAL_MSG_COUNT=5
ML_TRAINING_SET_FRACTION=0.2
Expand Down
7 changes: 6 additions & 1 deletion src/VahterBanBot.Tests/ContainerTestBase.fs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ type VahterTestContainers() =
.WithImage(image)
.WithNetwork(network)
.WithPortBinding(80, true)
.WithEnvironment("BOT_USER_ID", "1337")
.WithEnvironment("BOT_USER_NAME", "test_bot")
.WithEnvironment("BOT_TELEGRAM_TOKEN", "TELEGRAM_SECRET")
.WithEnvironment("BOT_AUTH_TOKEN", "OUR_SECRET")
.WithEnvironment("LOGS_CHANNEL_ID", "-123")
Expand All @@ -97,6 +99,9 @@ type VahterTestContainers() =
.WithEnvironment("ML_SPAM_DELETION_ENABLED", "true")
.WithEnvironment("ML_SPAM_THRESHOLD", "1.0")
.WithEnvironment("ML_STOP_WORDS_IN_CHATS", """{"-42":["2"]}""")
.WithEnvironment("ML_SPAM_AUTOBAN_ENABLED", "true")
.WithEnvironment("ML_SPAM_AUTOBAN_CHECK_LAST_MSG_COUNT", "10")
.WithEnvironment("ML_SPAM_AUTOBAN_SCORE_THRESHOLD", "-4.0")
// .net 8.0 upgrade has a breaking change
// https://learn.microsoft.com/en-us/dotnet/core/compatibility/containers/8.0/aspnet-port
// Azure default port for containers is 80, se we need explicitly set it
Expand Down Expand Up @@ -203,7 +208,7 @@ type VahterTestContainers() =
return count > 0
}

member _.MessageIsAutoBanned(msg: Message) = task {
member _.MessageIsAutoDeleted(msg: Message) = task {
use conn = new NpgsqlConnection(publicConnectionString)
//language=postgresql
let sql = "SELECT COUNT(*) FROM banned_by_bot WHERE banned_in_chat_id = @chatId AND message_id = @messageId"
Expand Down
114 changes: 106 additions & 8 deletions src/VahterBanBot.Tests/MLBanTests.fs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) =
let! _ = fixture.SendMessage msgUpdate

// assert that the message got auto banned
let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message
let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
Assert.True msgBanned
}

Expand All @@ -29,7 +29,7 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) =
let! _ = fixture.SendMessage msgUpdate

// assert that the message got auto banned
let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message
let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
Assert.False msgBanned
}

Expand All @@ -42,7 +42,7 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) =
let! _ = fixture.SendMessage msgUpdate

// assert that the message got auto banned
let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message
let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
Assert.False msgBanned
}

Expand All @@ -54,7 +54,7 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) =
let! _ = fixture.SendMessage msgUpdate

// assert that the message got auto banned
let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message
let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
Assert.False msgBanned
}

Expand All @@ -65,7 +65,7 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) =
let! _ = fixture.SendMessage msgUpdate

// assert that the message got auto banned
let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message
let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
Assert.False msgBanned
}

Expand All @@ -76,7 +76,7 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) =
let! _ = fixture.SendMessage msgUpdate

// assert that the message got auto banned
let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message
let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
Assert.True msgBanned
}

Expand All @@ -88,7 +88,7 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) =
let! _ = fixture.SendMessage msgUpdate

// assert that the message got auto banned
let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message
let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
Assert.True msgBanned
// assert it is not false-positive
let! isFalsePositive = fixture.IsMessageFalsePositive msgUpdate.Message
Expand All @@ -112,7 +112,7 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) =
let! _ = fixture.SendMessage msgUpdate

// assert that the message got auto banned
let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message
let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
Assert.True msgBanned

// send a callback to mark it as false-positive
Expand All @@ -125,6 +125,104 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) =
let! isFalsePositive = fixture.IsMessageFalsePositive msgUpdate.Message
Assert.False isFalsePositive
}

[<Fact>]
let ``User will be autobanned after consecutive spam`` () = task {
// record a message, where 2 is in a training set as spam word
// ChatsToMonitor[0] doesn't have stopwords
let user = Tg.user()
let msgUpdate = Tg.quickMsg(chat = fixture.ChatsToMonitor[0], text = "66666666", from = user)

// 1 - no ban
let! _ = fixture.SendMessage msgUpdate
let! msgBanned = fixture.MessageBanned msgUpdate.Message
Assert.False msgBanned

// 2 - no ban
let! _ = fixture.SendMessage msgUpdate
let! msgBanned = fixture.MessageBanned msgUpdate.Message
Assert.False msgBanned

// 3 - no ban
let! _ = fixture.SendMessage msgUpdate
let! msgBanned = fixture.MessageBanned msgUpdate.Message
Assert.False msgBanned

// 4 - ban (depends on the ML_SPAM_AUTOBAN_SCORE_THRESHOLD)
let! _ = fixture.SendMessage msgUpdate
let! msgBanned = fixture.MessageBanned msgUpdate.Message
Assert.True msgBanned
}

[<Fact>]
let ``User can recover from autoban by sending good messages`` () = task {
// record a message, where 2 is in a training set as spam word
// ChatsToMonitor[0] doesn't have stopwords
let user = Tg.user()
let spam = Tg.quickMsg(chat = fixture.ChatsToMonitor[0], text = "66666666", from = user)
let notSpam = Tg.quickMsg(chat = fixture.ChatsToMonitor[0], text = "b", from = user)

// 1 - no ban
let! _ = fixture.SendMessage spam
let! msgBanned = fixture.MessageBanned spam.Message
Assert.False msgBanned

// 1.5 - no ban
let! _ = fixture.SendMessage notSpam
let! msgBanned = fixture.MessageBanned notSpam.Message
Assert.False msgBanned

// 2 - no ban
let! _ = fixture.SendMessage spam
let! msgBanned = fixture.MessageBanned spam.Message
Assert.False msgBanned

// 3 - no ban
let! _ = fixture.SendMessage spam
let! msgBanned = fixture.MessageBanned spam.Message
Assert.False msgBanned

// 4 - no ban (as user posted 1 good message in beetween)
let! _ = fixture.SendMessage spam
let! msgBanned = fixture.MessageBanned spam.Message
Assert.False msgBanned
}

[<Fact>]
let ``User can be saved from auto ban by vahter marking it false-positive`` () = task {
// record a message, where 777777777777777777 is in a training set as spam word
// ChatsToMonitor[0] doesn't have stopwords
let user = Tg.user()
let spam = Tg.quickMsg(chat = fixture.ChatsToMonitor[0], text = "777777777777777777", from = user)

// 1 - no ban
let! _ = fixture.SendMessage spam
let! msgBanned = fixture.MessageBanned spam.Message
let! msgDeleted = fixture.MessageIsAutoDeleted spam.Message
Assert.True msgDeleted
Assert.False msgBanned

// 1.5 - vahter marked as false-positive via button
// send a callback to mark it as false-positive
let! callbackId = fixture.GetCallbackId spam.Message "NotASpam"
let msgCallback = Tg.callback(string callbackId, from = fixture.Vahters[0])
let! _ = fixture.SendMessage msgCallback

// 2 - no ban
let! _ = fixture.SendMessage spam
let! msgBanned = fixture.MessageBanned spam.Message
Assert.False msgBanned

// 3 - no ban
let! _ = fixture.SendMessage spam
let! msgBanned = fixture.MessageBanned spam.Message
Assert.False msgBanned

// 4 - no ban (as vahter marked this as false positive)
let! _ = fixture.SendMessage spam
let! msgBanned = fixture.MessageBanned spam.Message
Assert.False msgBanned
}

interface IAssemblyFixture<VahterTestContainers>
interface IClassFixture<MlAwaitFixture>
45 changes: 41 additions & 4 deletions src/VahterBanBot/Bot.fs
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,29 @@ let killSpammerAutomated
logger.LogInformation logMsg
}

let autoBan
(botUser: DbUser)
(botClient: ITelegramBotClient)
(botConfig: BotConfiguration)
(message: Message)
(logger: ILogger) = task {
use banOnReplyActivity = botActivity.StartActivity("autoBan")
%banOnReplyActivity
.SetTag("spammerId", message.From.Id)
.SetTag("spammerUsername", message.From.Username)

let! userStats = DB.getUserStatsByLastNMessages botConfig.MlSpamAutobanCheckLastMsgCount message.From.Id
let socialScore = userStats.good - userStats.bad

%banOnReplyActivity.SetTag("socialScore", socialScore)

if double socialScore <= botConfig.MlSpamAutobanScoreThreshold then
// ban user in all monitored chats
do! totalBan botClient botConfig message botUser logger
}

let justMessage
(botUser: DbUser)
(botClient: ITelegramBotClient)
(botConfig: BotConfiguration)
(logger: ILogger)
Expand All @@ -460,7 +482,16 @@ let justMessage
.SetTag("fromUserId", message.From.Id)
.SetTag("fromUsername", message.From.Username)

if botConfig.MlEnabled && message.Text <> null then
// check if user got auto-banned already
// that could happen due to the race condition between spammers mass messages
// and the bot's processing queue
let! isAutoBanned = DB.isBannedByVahter botUser.id message.From.Id
if isAutoBanned then
// just delete message and move on
do! botClient.DeleteMessageAsync(ChatId(message.Chat.Id), message.MessageId)
|> safeTaskAwait (fun e -> logger.LogError ($"Failed to delete message {message.MessageId} from chat {message.Chat.Id}", e))

elif botConfig.MlEnabled && message.Text <> null then
use mlActivity = botActivity.StartActivity("mlPrediction")

let shouldBeSkipped =
Expand All @@ -487,6 +518,10 @@ let justMessage
if prediction.Score >= botConfig.MlSpamThreshold then
// delete message
do! killSpammerAutomated botClient botConfig message logger botConfig.MlSpamDeletionEnabled prediction.Score

if botConfig.MlSpamAutobanEnabled then
// trigger auto-ban check
do! autoBan botUser botClient botConfig message logger
elif prediction.Score >= botConfig.MlWarningThreshold then
// just warn
do! killSpammerAutomated botClient botConfig message logger false prediction.Score
Expand All @@ -496,7 +531,7 @@ let justMessage
| None ->
// no prediction (error or not ready yet)
()

do!
message
|> DbMessage.newMessage
Expand Down Expand Up @@ -570,6 +605,7 @@ let adminCommand
}

let onMessage
(botUser: DbUser)
(botClient: ITelegramBotClient)
(botConfig: BotConfiguration)
(logger: ILogger)
Expand Down Expand Up @@ -606,7 +642,7 @@ let onMessage

// if message is not a command from authorized user, just save it ID to DB
else
do! justMessage botClient botConfig logger ml message
do! justMessage botUser botClient botConfig logger ml message
}

let vahterMarkedAsNotSpam
Expand Down Expand Up @@ -698,6 +734,7 @@ let onCallback
}

let onUpdate
(botUser: DbUser)
(botClient: ITelegramBotClient)
(botConfig: BotConfiguration)
(logger: ILogger)
Expand All @@ -707,5 +744,5 @@ let onUpdate
if update.CallbackQuery <> null then
do! onCallback botClient botConfig logger update.CallbackQuery
else
do! onMessage botClient botConfig logger ml update.Message
do! onMessage botUser botClient botConfig logger ml update.Message
}
57 changes: 47 additions & 10 deletions src/VahterBanBot/DB.fs
Original file line number Diff line number Diff line change
Expand Up @@ -106,16 +106,6 @@ let getUserMessages (userId: int64): Task<DbMessage array> =
return Array.ofSeq messages
}

let deleteMsgs (msg: DbMessage[]): Task<int> =
task {
let msgIds = msg |> Array.map (_.message_id)
use conn = new NpgsqlConnection(connString)

//language=postgresql
let sql = "DELETE FROM message WHERE message_id = ANY(@msgIds)"
return! conn.ExecuteAsync(sql, {| msgIds = msgIds |})
}

let cleanupOldMessages (howOld: TimeSpan): Task<int> =
task {
use conn = new NpgsqlConnection(connString)
Expand Down Expand Up @@ -315,3 +305,50 @@ let countUniqueUserMsg (userId: int64): Task<int> =
let! result = conn.QuerySingleAsync<int>(sql, {| userId = userId |})
return result
}

let isBannedByVahter (vahterId: int64) (userId: int64): Task<bool> =
task {
use conn = new NpgsqlConnection(connString)

//language=postgresql
let sql = "SELECT EXISTS(SELECT 1 FROM banned WHERE banned_user_id = @userId AND banned_by = @vahterId)"

let! result = conn.QuerySingleAsync<bool>(sql, {| userId = userId; vahterId = vahterId |})
return result
}

let getUserStatsByLastNMessages (n: int) (userId: int64): Task<UserStats> =
task {
use conn = new NpgsqlConnection(connString)

//language=postgresql
let sql =
"""
WITH stats AS (SELECT m.message_id,
m.chat_id,
b.id IS NOT NULL AS banned,
bbb.id IS NOT NULL AS banned_by_bot,
fnm.chat_id IS NOT NULL AS false_neg,
fpm.text IS NOT NULL AS false_pos
FROM message m
LEFT JOIN banned b ON m.message_id = b.message_id AND m.chat_id = b.banned_in_chat_id
LEFT JOIN public.banned_by_bot bbb
ON m.message_id = bbb.message_id AND m.chat_id = bbb.banned_in_chat_id
LEFT JOIN public.false_negative_messages fnm
ON m.message_id = fnm.message_id AND m.chat_id = fnm.chat_id
LEFT JOIN false_positive_messages fpm ON m.text = fpm.text
WHERE m.user_id = @userId
ORDER BY m.created_at DESC
LIMIT @n),
stats_count AS (SELECT message_id,
chat_id,
CASE WHEN false_pos THEN FALSE ELSE banned OR banned_by_bot OR false_neg END AS spam
FROM stats)
SELECT COUNT(*) FILTER (WHERE NOT spam) AS good,
COUNT(*) FILTER (WHERE spam) AS bad
FROM stats_count;
"""

let! result = conn.QuerySingleAsync<UserStats>(sql, {| userId = userId; n = n |})
return result
}
Loading