Skip to content

Commit

Permalink
added real auto-ban with social score system (#47)
Browse files Browse the repository at this point in the history
  • Loading branch information
Szer authored Aug 7, 2024
1 parent 45e56e9 commit 2983943
Show file tree
Hide file tree
Showing 7 changed files with 229 additions and 26 deletions.
5 changes: 5 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
BOT_TELEGRAM_TOKEN=SECRET_FROM_TELEGRAM
BOT_AUTH_TOKEN=JUST_YOUR_SECRET
BOT_HOOK_ROUTE=/bot
BOT_USER_ID=123456789
BOT_USER_NAME=your_bot
ASPNETCORE_URLS=http://+:88
DEBUG=true
LOGS_CHANNEL_ID=-1000000000000
Expand All @@ -19,6 +21,9 @@ ML_ENABLED=false
ML_RETRAIN_INTERVAL_SEC=86400
ML_SEED=
ML_SPAM_DELETION_ENABLED=false
ML_SPAM_AUTOBAN_ENABLED=true
ML_SPAM_AUTOBAN_SCORE_THRESHOLD=-5.0
ML_SPAM_AUTOBAN_CHECK_LAST_MSG_COUNT=10
ML_TRAIN_INTERVAL_DAYS=30
ML_TRAIN_CRITICAL_MSG_COUNT=5
ML_TRAINING_SET_FRACTION=0.2
Expand Down
7 changes: 6 additions & 1 deletion src/VahterBanBot.Tests/ContainerTestBase.fs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ type VahterTestContainers() =
.WithImage(image)
.WithNetwork(network)
.WithPortBinding(80, true)
.WithEnvironment("BOT_USER_ID", "1337")
.WithEnvironment("BOT_USER_NAME", "test_bot")
.WithEnvironment("BOT_TELEGRAM_TOKEN", "TELEGRAM_SECRET")
.WithEnvironment("BOT_AUTH_TOKEN", "OUR_SECRET")
.WithEnvironment("LOGS_CHANNEL_ID", "-123")
Expand All @@ -97,6 +99,9 @@ type VahterTestContainers() =
.WithEnvironment("ML_SPAM_DELETION_ENABLED", "true")
.WithEnvironment("ML_SPAM_THRESHOLD", "1.0")
.WithEnvironment("ML_STOP_WORDS_IN_CHATS", """{"-42":["2"]}""")
.WithEnvironment("ML_SPAM_AUTOBAN_ENABLED", "true")
.WithEnvironment("ML_SPAM_AUTOBAN_CHECK_LAST_MSG_COUNT", "10")
.WithEnvironment("ML_SPAM_AUTOBAN_SCORE_THRESHOLD", "-4.0")
// .net 8.0 upgrade has a breaking change
// https://learn.microsoft.com/en-us/dotnet/core/compatibility/containers/8.0/aspnet-port
// Azure default port for containers is 80, se we need explicitly set it
Expand Down Expand Up @@ -203,7 +208,7 @@ type VahterTestContainers() =
return count > 0
}

member _.MessageIsAutoBanned(msg: Message) = task {
member _.MessageIsAutoDeleted(msg: Message) = task {
use conn = new NpgsqlConnection(publicConnectionString)
//language=postgresql
let sql = "SELECT COUNT(*) FROM banned_by_bot WHERE banned_in_chat_id = @chatId AND message_id = @messageId"
Expand Down
114 changes: 106 additions & 8 deletions src/VahterBanBot.Tests/MLBanTests.fs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) =
let! _ = fixture.SendMessage msgUpdate

// assert that the message got auto banned
let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message
let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
Assert.True msgBanned
}

Expand All @@ -29,7 +29,7 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) =
let! _ = fixture.SendMessage msgUpdate

// assert that the message got auto banned
let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message
let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
Assert.False msgBanned
}

Expand All @@ -42,7 +42,7 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) =
let! _ = fixture.SendMessage msgUpdate

// assert that the message got auto banned
let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message
let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
Assert.False msgBanned
}

Expand All @@ -54,7 +54,7 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) =
let! _ = fixture.SendMessage msgUpdate

// assert that the message got auto banned
let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message
let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
Assert.False msgBanned
}

Expand All @@ -65,7 +65,7 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) =
let! _ = fixture.SendMessage msgUpdate

// assert that the message got auto banned
let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message
let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
Assert.False msgBanned
}

Expand All @@ -76,7 +76,7 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) =
let! _ = fixture.SendMessage msgUpdate

// assert that the message got auto banned
let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message
let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
Assert.True msgBanned
}

Expand All @@ -88,7 +88,7 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) =
let! _ = fixture.SendMessage msgUpdate

// assert that the message got auto banned
let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message
let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
Assert.True msgBanned
// assert it is not false-positive
let! isFalsePositive = fixture.IsMessageFalsePositive msgUpdate.Message
Expand All @@ -112,7 +112,7 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) =
let! _ = fixture.SendMessage msgUpdate

// assert that the message got auto banned
let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message
let! msgBanned = fixture.MessageIsAutoDeleted msgUpdate.Message
Assert.True msgBanned

// send a callback to mark it as false-positive
Expand All @@ -125,6 +125,104 @@ type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) =
let! isFalsePositive = fixture.IsMessageFalsePositive msgUpdate.Message
Assert.False isFalsePositive
}

[<Fact>]
let ``User will be autobanned after consecutive spam`` () = task {
// record a message, where 2 is in a training set as spam word
// ChatsToMonitor[0] doesn't have stopwords
let user = Tg.user()
let msgUpdate = Tg.quickMsg(chat = fixture.ChatsToMonitor[0], text = "66666666", from = user)

// 1 - no ban
let! _ = fixture.SendMessage msgUpdate
let! msgBanned = fixture.MessageBanned msgUpdate.Message
Assert.False msgBanned

// 2 - no ban
let! _ = fixture.SendMessage msgUpdate
let! msgBanned = fixture.MessageBanned msgUpdate.Message
Assert.False msgBanned

// 3 - no ban
let! _ = fixture.SendMessage msgUpdate
let! msgBanned = fixture.MessageBanned msgUpdate.Message
Assert.False msgBanned

// 4 - ban (depends on the ML_SPAM_AUTOBAN_SCORE_THRESHOLD)
let! _ = fixture.SendMessage msgUpdate
let! msgBanned = fixture.MessageBanned msgUpdate.Message
Assert.True msgBanned
}

[<Fact>]
let ``User can recover from autoban by sending good messages`` () = task {
// record a message, where 2 is in a training set as spam word
// ChatsToMonitor[0] doesn't have stopwords
let user = Tg.user()
let spam = Tg.quickMsg(chat = fixture.ChatsToMonitor[0], text = "66666666", from = user)
let notSpam = Tg.quickMsg(chat = fixture.ChatsToMonitor[0], text = "b", from = user)

// 1 - no ban
let! _ = fixture.SendMessage spam
let! msgBanned = fixture.MessageBanned spam.Message
Assert.False msgBanned

// 1.5 - no ban
let! _ = fixture.SendMessage notSpam
let! msgBanned = fixture.MessageBanned notSpam.Message
Assert.False msgBanned

// 2 - no ban
let! _ = fixture.SendMessage spam
let! msgBanned = fixture.MessageBanned spam.Message
Assert.False msgBanned

// 3 - no ban
let! _ = fixture.SendMessage spam
let! msgBanned = fixture.MessageBanned spam.Message
Assert.False msgBanned

// 4 - no ban (as user posted 1 good message in beetween)
let! _ = fixture.SendMessage spam
let! msgBanned = fixture.MessageBanned spam.Message
Assert.False msgBanned
}

[<Fact>]
let ``User can be saved from auto ban by vahter marking it false-positive`` () = task {
// record a message, where 777777777777777777 is in a training set as spam word
// ChatsToMonitor[0] doesn't have stopwords
let user = Tg.user()
let spam = Tg.quickMsg(chat = fixture.ChatsToMonitor[0], text = "777777777777777777", from = user)

// 1 - no ban
let! _ = fixture.SendMessage spam
let! msgBanned = fixture.MessageBanned spam.Message
let! msgDeleted = fixture.MessageIsAutoDeleted spam.Message
Assert.True msgDeleted
Assert.False msgBanned

// 1.5 - vahter marked as false-positive via button
// send a callback to mark it as false-positive
let! callbackId = fixture.GetCallbackId spam.Message "NotASpam"
let msgCallback = Tg.callback(string callbackId, from = fixture.Vahters[0])
let! _ = fixture.SendMessage msgCallback

// 2 - no ban
let! _ = fixture.SendMessage spam
let! msgBanned = fixture.MessageBanned spam.Message
Assert.False msgBanned

// 3 - no ban
let! _ = fixture.SendMessage spam
let! msgBanned = fixture.MessageBanned spam.Message
Assert.False msgBanned

// 4 - no ban (as vahter marked this as false positive)
let! _ = fixture.SendMessage spam
let! msgBanned = fixture.MessageBanned spam.Message
Assert.False msgBanned
}

interface IAssemblyFixture<VahterTestContainers>
interface IClassFixture<MlAwaitFixture>
45 changes: 41 additions & 4 deletions src/VahterBanBot/Bot.fs
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,29 @@ let killSpammerAutomated
logger.LogInformation logMsg
}

let autoBan
(botUser: DbUser)
(botClient: ITelegramBotClient)
(botConfig: BotConfiguration)
(message: Message)
(logger: ILogger) = task {
use banOnReplyActivity = botActivity.StartActivity("autoBan")
%banOnReplyActivity
.SetTag("spammerId", message.From.Id)
.SetTag("spammerUsername", message.From.Username)

let! userStats = DB.getUserStatsByLastNMessages botConfig.MlSpamAutobanCheckLastMsgCount message.From.Id
let socialScore = userStats.good - userStats.bad

%banOnReplyActivity.SetTag("socialScore", socialScore)

if double socialScore <= botConfig.MlSpamAutobanScoreThreshold then
// ban user in all monitored chats
do! totalBan botClient botConfig message botUser logger
}

let justMessage
(botUser: DbUser)
(botClient: ITelegramBotClient)
(botConfig: BotConfiguration)
(logger: ILogger)
Expand All @@ -460,7 +482,16 @@ let justMessage
.SetTag("fromUserId", message.From.Id)
.SetTag("fromUsername", message.From.Username)

if botConfig.MlEnabled && message.Text <> null then
// check if user got auto-banned already
// that could happen due to the race condition between spammers mass messages
// and the bot's processing queue
let! isAutoBanned = DB.isBannedByVahter botUser.id message.From.Id
if isAutoBanned then
// just delete message and move on
do! botClient.DeleteMessageAsync(ChatId(message.Chat.Id), message.MessageId)
|> safeTaskAwait (fun e -> logger.LogError ($"Failed to delete message {message.MessageId} from chat {message.Chat.Id}", e))

elif botConfig.MlEnabled && message.Text <> null then
use mlActivity = botActivity.StartActivity("mlPrediction")

let shouldBeSkipped =
Expand All @@ -487,6 +518,10 @@ let justMessage
if prediction.Score >= botConfig.MlSpamThreshold then
// delete message
do! killSpammerAutomated botClient botConfig message logger botConfig.MlSpamDeletionEnabled prediction.Score

if botConfig.MlSpamAutobanEnabled then
// trigger auto-ban check
do! autoBan botUser botClient botConfig message logger
elif prediction.Score >= botConfig.MlWarningThreshold then
// just warn
do! killSpammerAutomated botClient botConfig message logger false prediction.Score
Expand All @@ -496,7 +531,7 @@ let justMessage
| None ->
// no prediction (error or not ready yet)
()

do!
message
|> DbMessage.newMessage
Expand Down Expand Up @@ -570,6 +605,7 @@ let adminCommand
}

let onMessage
(botUser: DbUser)
(botClient: ITelegramBotClient)
(botConfig: BotConfiguration)
(logger: ILogger)
Expand Down Expand Up @@ -606,7 +642,7 @@ let onMessage

// if message is not a command from authorized user, just save it ID to DB
else
do! justMessage botClient botConfig logger ml message
do! justMessage botUser botClient botConfig logger ml message
}

let vahterMarkedAsNotSpam
Expand Down Expand Up @@ -698,6 +734,7 @@ let onCallback
}

let onUpdate
(botUser: DbUser)
(botClient: ITelegramBotClient)
(botConfig: BotConfiguration)
(logger: ILogger)
Expand All @@ -707,5 +744,5 @@ let onUpdate
if update.CallbackQuery <> null then
do! onCallback botClient botConfig logger update.CallbackQuery
else
do! onMessage botClient botConfig logger ml update.Message
do! onMessage botUser botClient botConfig logger ml update.Message
}
57 changes: 47 additions & 10 deletions src/VahterBanBot/DB.fs
Original file line number Diff line number Diff line change
Expand Up @@ -106,16 +106,6 @@ let getUserMessages (userId: int64): Task<DbMessage array> =
return Array.ofSeq messages
}

let deleteMsgs (msg: DbMessage[]): Task<int> =
task {
let msgIds = msg |> Array.map (_.message_id)
use conn = new NpgsqlConnection(connString)

//language=postgresql
let sql = "DELETE FROM message WHERE message_id = ANY(@msgIds)"
return! conn.ExecuteAsync(sql, {| msgIds = msgIds |})
}

let cleanupOldMessages (howOld: TimeSpan): Task<int> =
task {
use conn = new NpgsqlConnection(connString)
Expand Down Expand Up @@ -315,3 +305,50 @@ let countUniqueUserMsg (userId: int64): Task<int> =
let! result = conn.QuerySingleAsync<int>(sql, {| userId = userId |})
return result
}

let isBannedByVahter (vahterId: int64) (userId: int64): Task<bool> =
task {
use conn = new NpgsqlConnection(connString)

//language=postgresql
let sql = "SELECT EXISTS(SELECT 1 FROM banned WHERE banned_user_id = @userId AND banned_by = @vahterId)"

let! result = conn.QuerySingleAsync<bool>(sql, {| userId = userId; vahterId = vahterId |})
return result
}

let getUserStatsByLastNMessages (n: int) (userId: int64): Task<UserStats> =
task {
use conn = new NpgsqlConnection(connString)

//language=postgresql
let sql =
"""
WITH stats AS (SELECT m.message_id,
m.chat_id,
b.id IS NOT NULL AS banned,
bbb.id IS NOT NULL AS banned_by_bot,
fnm.chat_id IS NOT NULL AS false_neg,
fpm.text IS NOT NULL AS false_pos
FROM message m
LEFT JOIN banned b ON m.message_id = b.message_id AND m.chat_id = b.banned_in_chat_id
LEFT JOIN public.banned_by_bot bbb
ON m.message_id = bbb.message_id AND m.chat_id = bbb.banned_in_chat_id
LEFT JOIN public.false_negative_messages fnm
ON m.message_id = fnm.message_id AND m.chat_id = fnm.chat_id
LEFT JOIN false_positive_messages fpm ON m.text = fpm.text
WHERE m.user_id = @userId
ORDER BY m.created_at DESC
LIMIT @n),
stats_count AS (SELECT message_id,
chat_id,
CASE WHEN false_pos THEN FALSE ELSE banned OR banned_by_bot OR false_neg END AS spam
FROM stats)
SELECT COUNT(*) FILTER (WHERE NOT spam) AS good,
COUNT(*) FILTER (WHERE spam) AS bad
FROM stats_count;
"""

let! result = conn.QuerySingleAsync<UserStats>(sql, {| userId = userId; n = n |})
return result
}
Loading

0 comments on commit 2983943

Please sign in to comment.