Skip to content

Commit

Permalink
Spam scoring+reporting (#10)
Browse files Browse the repository at this point in the history
* Spam scoring+reporting

* better emoji check

* Update src/VahterBanBot/Bot.fs

---------

Co-authored-by: Ayrat Hudaygulov <[email protected]>
  • Loading branch information
zawodskoj and Szer authored Jun 13, 2024
1 parent e8338db commit 29c424b
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 0 deletions.
102 changes: 102 additions & 0 deletions src/VahterBanBot/Antispam.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
module VahterBanBot.Antispam

open System
open System.Linq

let cyrillicLikeCharacters = [| 'u'; 't'; 'a' |]
let cyrillicCharacters = "абвгдежзиклмнопрстуфхцчшщъыьэюяё".ToHashSet()

let countFakeCyrillicWords (wl: string list) =

let hasCyrillicLikeCharacters (w: string) =
w.IndexOfAny(cyrillicLikeCharacters) <> -1

let isMostlyCyrillic (w: string) =
let isCyrillic c = cyrillicCharacters.Contains(c)

w.Count(isCyrillic) > (w.Length / 2)

let isFakeCyrillicWord w =
isMostlyCyrillic w && hasCyrillicLikeCharacters w

wl.Count(isFakeCyrillicWord)

let phrases = [
10, [ "обучение"; "бесплатное" ]
10, [ "бесплатное"; "обучение" ]
7, [ "удаленная"; "работа" ]
7, [ "удаленную"; "работу" ]
3, [ "в"; "лс" ]
3, [ "в"; "личку" ]
3, [ "в"; "личные"; "сообщения" ]
]

let countPhrases (wl: string list) =
// premium performance
let rec countPhrase wl totalScore phrase =
let score, p::ps = phrase

match wl with
| w :: ws when w = p ->
if ws.Take(ps.Length).SequenceEqual(ps) then
countPhrase (List.skip ps.Length ws) (totalScore + score) phrase
else
countPhrase ws totalScore phrase
| _ -> totalScore

List.sumBy (countPhrase wl 0) phrases

let wordPrefixesWeighted = [
10, "крипт"
10, "crypto"
10, "defi"
10, "usdt"
10, "трейд"
7, "вакансия"
5, "партнер"
5, "заработок"
5, "заработк"
3, "зарплата"
]

let countWords (wl: string list) =
let checkWord wl word =
let score, (actualWord: string) = word

let checkSingleWord (w: string) = if w.StartsWith(actualWord) then score else 0

List.sumBy checkSingleWord wl

List.sumBy (checkWord wl) wordPrefixesWeighted

let distillWords (str: string) =
// regexs are probably better
let isCyrLatAlphaChar c =
let isLat = c >= 'a' && c <= 'z'
let isCyr = c >= 'а' && c <= 'я' // who cares about Ё
let isDigit = c >= '0' && c <= '9'
let isDollar = c = '$' // useful

isLat || isCyr || isDigit || isDollar

let filteredStr = String.filter isCyrLatAlphaChar (str.ToLower())

List.ofArray <| filteredStr.Split(' ', StringSplitOptions.TrimEntries ||| StringSplitOptions.RemoveEmptyEntries)

let countEmojiLikeCharacters str =
let mutable emojis = 0

let countEmoji (c: char) =
if c >= char 0xDD00 then emojis <- emojis + 1

String.iter countEmoji str

emojis

let calcSpamScore msg =
let words = distillWords msg

(countFakeCyrillicWords words) * 100
+ (countEmojiLikeCharacters msg) * 5
+ (countPhrases words) * 10
+ (countWords words) * 10
24 changes: 24 additions & 0 deletions src/VahterBanBot/Bot.fs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ open Telegram.Bot
open Telegram.Bot.Types
open VahterBanBot.Types
open VahterBanBot.Utils
open VahterBanBot.Antispam

let botActivity = new ActivitySource("VahterBanBot")

Expand Down Expand Up @@ -383,6 +384,24 @@ let unban
do! unbanUserTask.Ignore()
}

let warnSpamDetection
(botClient: ITelegramBotClient)
(botConfig: BotConfiguration)
(message: Message)
(logger: ILogger)
score = task {
use banOnReplyActivity = botActivity.StartActivity("warnSpamDetection")
%banOnReplyActivity
.SetTag("targetId", message.ReplyToMessage.From.Id)
.SetTag("targetUsername", message.ReplyToMessage.From.Username)

let logMsg = $"Detected spam (score: {score}) in {prependUsername message.Chat.Username} ({message.Chat.Id}) from {prependUsername message.From.Username} ({message.From.Id}) with text:\n{message.Text}"

// log both to logger and to logs channel
do! botClient.SendTextMessageAsync(ChatId(botConfig.LogsChannelId), logMsg) |> taskIgnore
logger.LogInformation logMsg
}

let onUpdate
(botClient: ITelegramBotClient)
(botConfig: BotConfiguration)
Expand Down Expand Up @@ -470,6 +489,11 @@ let onUpdate

// if message is not a command from authorized user, just save it ID to DB
else
let spamScore = calcSpamScore message.Text

if spamScore >= 100 then
do! warnSpamDetection botClient botConfig message logger spamScore

use _ =
botActivity
.StartActivity("justMessage")
Expand Down
1 change: 1 addition & 0 deletions src/VahterBanBot/VahterBanBot.fsproj
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
<Compile Include="Utils.fs" />
<Compile Include="Types.fs" />
<Compile Include="DB.fs" />
<Compile Include="Antispam.fs" />
<Compile Include="Bot.fs" />
<Compile Include="Cleanup.fs" />
<Compile Include="Program.fs" />
Expand Down

0 comments on commit 29c424b

Please sign in to comment.