From c44689623e9e3c72c8a4978a61feff349b48d68c Mon Sep 17 00:00:00 2001 From: Ayrat Hudaygulov Date: Sat, 20 Jul 2024 19:01:10 +0100 Subject: [PATCH 1/2] added ML training on start (might be cached in future?) and ability to autoban --- .env.example | 7 + src/VahterBanBot.Tests/ContainerTestBase.fs | 28 +- src/VahterBanBot.Tests/MLBanTests.fs | 70 +++ src/VahterBanBot.Tests/TgMessageUtils.fs | 2 +- .../VahterBanBot.Tests.fsproj | 4 + src/VahterBanBot.Tests/test_seed.sql | 448 ++++++++++++++++++ src/VahterBanBot/Bot.fs | 64 ++- src/VahterBanBot/DB.fs | 80 +++- src/VahterBanBot/ML.fs | 101 ++++ src/VahterBanBot/Program.fs | 18 +- src/VahterBanBot/Types.fs | 9 +- src/VahterBanBot/Utils.fs | 6 + src/VahterBanBot/VahterBanBot.fsproj | 2 + src/migrations/V8__ml-stuff.sql | 20 + 14 files changed, 832 insertions(+), 27 deletions(-) create mode 100644 src/VahterBanBot.Tests/MLBanTests.fs create mode 100644 src/VahterBanBot.Tests/test_seed.sql create mode 100644 src/VahterBanBot/ML.fs create mode 100644 src/migrations/V8__ml-stuff.sql diff --git a/.env.example b/.env.example index 84107c5..b061ce9 100644 --- a/.env.example +++ b/.env.example @@ -15,3 +15,10 @@ USE_FAKE_TG_API=false CLEANUP_OLD_MESSAGES=true CLEANUP_INTERVAL_SEC=86400 CLEANUP_OLD_LIMIT_SEC=259200 +ML_ENABLED=false +ML_SEED= +ML_SPAM_DELETION_ENABLED=false +ML_TRAIN_BEFORE_DATE=2021-01-01 +ML_TRAINING_SET_FRACTION=0.2 +ML_SPAM_THRESHOLD=0.5 +ML_STOP_WORDS_IN_CHATS={"-123":["word1","word2"]} diff --git a/src/VahterBanBot.Tests/ContainerTestBase.fs b/src/VahterBanBot.Tests/ContainerTestBase.fs index 776c35c..381cd72 100644 --- a/src/VahterBanBot.Tests/ContainerTestBase.fs +++ b/src/VahterBanBot.Tests/ContainerTestBase.fs @@ -90,6 +90,14 @@ type VahterTestContainers() = .WithEnvironment("USE_FAKE_TG_API", "true") .WithEnvironment("USE_POLLING", "false") .WithEnvironment("DATABASE_URL", internalConnectionString) + .WithEnvironment("CLEANUP_OLD_MESSAGES", "false") + .WithEnvironment("ML_ENABLED", "true") + // seed data uses 2021-01-01 as a date for all messages + .WithEnvironment("ML_TRAIN_BEFORE_DATE", "2021-01-02T00:00:00Z") + .WithEnvironment("ML_SEED", "42") + .WithEnvironment("ML_SPAM_DELETION_ENABLED", "true") + .WithEnvironment("ML_SPAM_THRESHOLD", "1.0") + .WithEnvironment("ML_STOP_WORDS_IN_CHATS", """{"-42":["2"]}""") // .net 8.0 upgrade has a breaking change // https://learn.microsoft.com/en-us/dotnet/core/compatibility/containers/8.0/aspnet-port // Azure default port for containers is 80, se we need explicitly set it @@ -124,10 +132,14 @@ type VahterTestContainers() = failwith out // seed some test data - // inserting the only admin users we have - // TODO might be a script in test assembly - let! _ = dbContainer.ExecAsync([|"""INSERT INTO "user"(id, username, banned_by, banned_at, ban_reason) VALUES (34, 'vahter_1', NULL, NULL, NULL), (69, 'vahter_2', NULL, NULL, NULL);"""|]) - + let script = File.ReadAllText(CommonDirectoryPath.GetCallerFileDirectory().DirectoryPath + "/test_seed.sql") + let scriptFilePath = String.Join("/", String.Empty, "tmp", Guid.NewGuid().ToString("D"), Path.GetRandomFileName()) + do! dbContainer.CopyAsync(Encoding.Default.GetBytes script, scriptFilePath, Unix.FileMode644) + let! scriptResult = dbContainer.ExecAsync [|"psql"; "--username"; "vahter_bot_ban_service"; "--dbname"; "vahter_bot_ban"; "--file"; scriptFilePath |] + + if scriptResult.Stderr <> "" then + failwith scriptResult.Stderr + // start the app container do! appContainer.StartAsync() @@ -185,3 +197,11 @@ type VahterTestContainers() = let! count = conn.QuerySingleAsync(sql, {| chatId = msg.Chat.Id; messageId = msg.MessageId |}) return count > 0 } + + member _.MessageIsAutoBanned(msg: Message) = task { + use conn = new NpgsqlConnection(publicConnectionString) + //language=postgresql + let sql = "SELECT COUNT(*) FROM banned_by_bot WHERE banned_in_chat_id = @chatId AND message_id = @messageId" + let! count = conn.QuerySingleAsync(sql, {| chatId = msg.Chat.Id; messageId = msg.MessageId |}) + return count > 0 + } diff --git a/src/VahterBanBot.Tests/MLBanTests.fs b/src/VahterBanBot.Tests/MLBanTests.fs new file mode 100644 index 0000000..8f427f2 --- /dev/null +++ b/src/VahterBanBot.Tests/MLBanTests.fs @@ -0,0 +1,70 @@ +module VahterBanBot.Tests.MLBanTests + +open System.Net +open System.Threading.Tasks +open VahterBanBot.Tests.ContainerTestBase +open VahterBanBot.Tests.TgMessageUtils +open Xunit +open Xunit.Extensions.AssemblyFixture + +type MLBanTests(fixture: VahterTestContainers) = + + [] + let ``Message IS autobanned if it looks like a spam`` () = task { + // we assume 5 seconds is enough for model to train. Could be flaky + do! Task.Delay 5000 + + // record a message, where 2 is in a training set as spam word + // ChatsToMonitor[0] doesn't have stopwords + let msgUpdate = Tg.quickMsg(chat = fixture.ChatsToMonitor[0], text = "2") + let! _ = fixture.SendMessage msgUpdate + + // assert that the message got auto banned + let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message + Assert.True msgBanned + } + + [] + let ``Message is NOT autobanned if it has a stopword in specific chat`` () = task { + // we assume 5 seconds is enough for model to train. Could be flaky + do! Task.Delay 5000 + + // record a message, where 2 is in a training set as spam word + // ChatsToMonitor[1] does have a stopword 2 + let msgUpdate = Tg.quickMsg(chat = fixture.ChatsToMonitor[1], text = "2") + let! _ = fixture.SendMessage msgUpdate + + // assert that the message got auto banned + let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message + Assert.False msgBanned + } + + [] + let ``Message is NOT autobanned if it is a known false-positive spam`` () = task { + // we assume 5 seconds is enough for model to train. Could be flaky + do! Task.Delay 5000 + + // record a message, where 3 is in a training set as spam word + let msgUpdate = Tg.quickMsg(chat = fixture.ChatsToMonitor[0], text = "a") + let! _ = fixture.SendMessage msgUpdate + + // assert that the message got auto banned + let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message + Assert.False msgBanned + } + + [] + let ``Message IS autobanned if it is a known false-negative spam`` () = task { + // we assume 5 seconds is enough for model to train. Could be flaky + do! Task.Delay 5000 + + // record a message, where 3 is in a training set as false negative spam word + let msgUpdate = Tg.quickMsg(chat = fixture.ChatsToMonitor[0], text = "3") + let! _ = fixture.SendMessage msgUpdate + + // assert that the message got auto banned + let! msgBanned = fixture.MessageIsAutoBanned msgUpdate.Message + Assert.True msgBanned + } + + interface IAssemblyFixture diff --git a/src/VahterBanBot.Tests/TgMessageUtils.fs b/src/VahterBanBot.Tests/TgMessageUtils.fs index f09aa72..34f0484 100644 --- a/src/VahterBanBot.Tests/TgMessageUtils.fs +++ b/src/VahterBanBot.Tests/TgMessageUtils.fs @@ -5,7 +5,7 @@ open System.Threading open Telegram.Bot.Types type Tg() = - static let mutable i = 0L + static let mutable i = 10000L // higher than the data in the test_seed.sql static let nextInt64() = Interlocked.Increment &i static let next() = nextInt64() |> int static member user (?id: int64, ?username: string, ?firstName: string) = diff --git a/src/VahterBanBot.Tests/VahterBanBot.Tests.fsproj b/src/VahterBanBot.Tests/VahterBanBot.Tests.fsproj index 526b262..acb0158 100644 --- a/src/VahterBanBot.Tests/VahterBanBot.Tests.fsproj +++ b/src/VahterBanBot.Tests/VahterBanBot.Tests.fsproj @@ -9,10 +9,14 @@ + + PreserveNewest + + diff --git a/src/VahterBanBot.Tests/test_seed.sql b/src/VahterBanBot.Tests/test_seed.sql new file mode 100644 index 0000000..df1c932 --- /dev/null +++ b/src/VahterBanBot.Tests/test_seed.sql @@ -0,0 +1,448 @@ +INSERT INTO public."user"(id, username, banned_by, banned_at, ban_reason) +VALUES (34, 'vahter_1', NULL, NULL, NULL), + (69, 'vahter_2', NULL, NULL, NULL); + +-- insert some fake data for ML training +INSERT INTO public."user"(id, username, banned_by, banned_at, ban_reason) +VALUES (1, 'a', NULL, NULL, NULL), + (2, 'b', NULL, NULL, NULL), + (3, 'c', NULL, NULL, NULL), + (4, 'd', NULL, NULL, NULL), + (5, 'e', NULL, NULL, NULL), + (6, 'f', NULL, NULL, NULL), + (7, 'g', NULL, NULL, NULL), + (8, 'h', NULL, NULL, NULL), + (9, 'i', NULL, NULL, NULL), + (10, 'j', NULL, NULL, NULL); + +INSERT INTO public.message(chat_id, message_id, user_id, created_at, text, raw_message) +VALUES (-666, 1, 1, '2021-01-01 00:00:00', 'a', '{}'), -- false positive user banned + (-666, 2, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 3, 1, '2021-01-01 00:00:02', 'a', '{}'), + (-666, 4, 2, '2021-01-01 00:00:03', 'a', '{}'), + (-666, 5, 2, '2021-01-01 00:00:04', 'a', '{}'), + (-666, 6, 3, '2021-01-01 00:00:05', 'a', '{}'), + (-666, 7, 3, '2021-01-01 00:00:06', 'a', '{}'), + (-666, 8, 4, '2021-01-01 00:00:07', 'a', '{}'), -- false positive message banned + (-666, 9, 5, '2021-01-01 00:00:08', '1', '{}'), + (-666, 10, 5, '2021-01-01 00:00:09', '1', '{}'), + (-42, 1, 1, '2021-01-01 00:00:00', 'a', '{}'), + (-42, 2, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-42, 3, 1, '2021-01-01 00:00:02', 'a', '{}'), + (-42, 4, 2, '2021-01-01 00:00:03', 'a', '{}'), + (-42, 5, 2, '2021-01-01 00:00:04', 'a', '{}'), + (-42, 6, 3, '2021-01-01 00:00:05', 'a', '{}'), + (-42, 7, 3, '2021-01-01 00:00:06', 'a', '{}'), + (-42, 8, 4, '2021-01-01 00:00:07', '3', '{}'), -- false negative + (-42, 9, 6, '2021-01-01 00:00:08', '1', '{}'), + + -- to prevent small sample size, we'll copy the next line 100 times + -- this is spam + (-42, 10, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 11, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 12, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 13, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 14, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 15, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 16, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 17, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 18, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 19, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 20, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 21, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 22, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 23, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 24, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 25, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 26, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 27, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 28, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 29, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 30, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 31, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 32, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 33, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 34, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 35, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 36, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 37, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 38, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 39, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 40, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 41, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 42, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 43, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 44, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 45, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 46, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 47, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 48, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 49, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 50, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 51, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 52, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 53, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 54, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 55, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 56, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 57, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 58, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 59, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 60, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 61, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 62, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 63, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 64, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 65, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 66, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 67, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 68, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 69, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 70, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 71, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 72, 6, '2021-01-01 00:00:09', '1', '{}'), + (-42, 73, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 74, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 75, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 76, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 77, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 78, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 79, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 80, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 81, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 82, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 83, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 84, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 85, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 86, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 87, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 88, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 89, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 90, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 91, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 92, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 93, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 94, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 95, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 96, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 97, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 98, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 99, 6, '2021-01-01 00:00:09', '2', '{}'), + -- this is not spam + (-666, 100, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 101, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 102, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 103, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 104, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 105, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 106, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 107, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 108, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 109, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 110, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 111, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 112, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 113, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 114, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 115, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 116, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 117, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 118, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 119, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 120, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 121, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 122, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 123, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 124, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 125, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 126, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 127, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 128, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 129, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 130, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 131, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 132, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 133, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 134, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 135, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 136, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 137, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 138, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 139, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 140, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 141, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 142, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 143, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 144, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 145, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 146, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 147, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 148, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 149, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 150, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 151, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 152, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 153, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 154, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 155, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 156, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 157, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 158, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 159, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 160, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 161, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 162, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 163, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 164, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 165, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 166, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 167, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 168, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 169, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 170, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 171, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 172, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 173, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 174, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 175, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 176, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 177, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 178, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 179, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 180, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 181, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 182, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 183, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 184, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 185, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 186, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 187, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 188, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 189, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 190, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 191, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 192, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 193, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 194, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 195, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 196, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 197, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 198, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 199, 1, '2021-01-01 00:00:01', 'a', '{}'), + + -- to enforce false-negative appearance + (-666, 200, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 201, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 202, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 203, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 204, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 205, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 206, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 207, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 208, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 209, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 210, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 211, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 212, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 213, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 214, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 215, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 216, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 217, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 218, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 219, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 220, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 221, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 222, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 223, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 224, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 225, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 226, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 227, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 228, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 229, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 230, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 231, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 232, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 233, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 234, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 235, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 236, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 237, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 238, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 239, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 240, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 241, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 242, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 243, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 244, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 245, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 246, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 247, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 248, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 249, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 250, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 251, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 252, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 253, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 254, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 255, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 256, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 257, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 258, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 259, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 260, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 261, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 262, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 263, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 264, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 265, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 266, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 267, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 268, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 269, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 270, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 271, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 272, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 273, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 274, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 275, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 276, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 277, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 278, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 279, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 280, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 281, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 282, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 283, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 284, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 285, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 286, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 287, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 288, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 289, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 290, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 291, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 292, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 293, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 294, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 295, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 296, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 297, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 298, 1, '2021-01-01 00:00:01', '3', '{}'), + (-666, 299, 1, '2021-01-01 00:00:01', '3', '{}'); + +INSERT INTO public.banned(id, message_id, message_text, banned_user_id, banned_at, banned_in_chat_id, banned_in_chat_username, banned_by) +VALUES (1, 1, 'a', 1, '2021-01-01 00:00:00', -666, 'pro.hell', 34), + (2, 8, 'a', 4, '2021-01-01 00:00:07', -666, 'pro.hell', 69), + (3, 9, '1', 5, '2021-01-01 00:00:08', -666, 'pro.hell', 34), + (4, 10, '2', 6, '2021-01-01 00:00:09', -42, 'dotnetru', 69); + +INSERT INTO public.false_positive_users(user_id) +VALUES (1); + +INSERT INTO public.false_positive_messages(id) +VALUES (2); + +INSERT INTO public.false_negative_messages(chat_id, message_id) +VALUES (-42, 8), + (-666, 200), + (-666, 201), + (-666, 202), + (-666, 203), + (-666, 204), + (-666, 205), + (-666, 206), + (-666, 207), + (-666, 208), + (-666, 209), + (-666, 210), + (-666, 211), + (-666, 212), + (-666, 213), + (-666, 214), + (-666, 215), + (-666, 216), + (-666, 217), + (-666, 218), + (-666, 219), + (-666, 220), + (-666, 221), + (-666, 222), + (-666, 223), + (-666, 224), + (-666, 225), + (-666, 226), + (-666, 227), + (-666, 228), + (-666, 229), + (-666, 230), + (-666, 231), + (-666, 232), + (-666, 233), + (-666, 234), + (-666, 235), + (-666, 236), + (-666, 237), + (-666, 238), + (-666, 239), + (-666, 240), + (-666, 241), + (-666, 242), + (-666, 243), + (-666, 244), + (-666, 245), + (-666, 246), + (-666, 247), + (-666, 248), + (-666, 249), + (-666, 250), + (-666, 251), + (-666, 252), + (-666, 253), + (-666, 254), + (-666, 255), + (-666, 256), + (-666, 257), + (-666, 258), + (-666, 259), + (-666, 260), + (-666, 261), + (-666, 262), + (-666, 263), + (-666, 264), + (-666, 265), + (-666, 266), + (-666, 267), + (-666, 268), + (-666, 269), + (-666, 270), + (-666, 271), + (-666, 272), + (-666, 273), + (-666, 274), + (-666, 275), + (-666, 276), + (-666, 277), + (-666, 278), + (-666, 279), + (-666, 280), + (-666, 281), + (-666, 282), + (-666, 283), + (-666, 284), + (-666, 285), + (-666, 286), + (-666, 287), + (-666, 288), + (-666, 289), + (-666, 290), + (-666, 291), + (-666, 292), + (-666, 293), + (-666, 294), + (-666, 295), + (-666, 296), + (-666, 297), + (-666, 298), + (-666, 299); diff --git a/src/VahterBanBot/Bot.fs b/src/VahterBanBot/Bot.fs index 48614dc..98e6600 100644 --- a/src/VahterBanBot/Bot.fs +++ b/src/VahterBanBot/Bot.fs @@ -7,6 +7,7 @@ open System.Threading.Tasks open Microsoft.Extensions.Logging open Telegram.Bot open Telegram.Bot.Types +open VahterBanBot.ML open VahterBanBot.Types open VahterBanBot.Utils open VahterBanBot.Antispam @@ -377,19 +378,29 @@ let unban logger.LogInformation logMsg } -let warnSpamDetection +let killSpammerAutomated (botClient: ITelegramBotClient) (botConfig: BotConfiguration) (message: Message) (logger: ILogger) + (deleteMessage: bool) score = task { - use banOnReplyActivity = botActivity.StartActivity("warnSpamDetection") + use banOnReplyActivity = botActivity.StartActivity("killAutomated") %banOnReplyActivity .SetTag("spammerId", message.From.Id) .SetTag("spammerUsername", message.From.Username) + + if deleteMessage then + // delete message + do! botClient.DeleteMessageAsync(ChatId(message.Chat.Id), message.MessageId) + |> safeTaskAwait (fun e -> logger.LogError ($"Failed to delete message {message.MessageId} from chat {message.Chat.Id}", e)) + // 0 here is the bot itself + do! DbBanned.banMessage 0 message + |> DB.banUserByBot + + let msgType = if deleteMessage then "Deleted" else "Detected" + let logMsg = $"""{msgType} spam (score: {score}) in {prependUsername message.Chat.Username} ({message.Chat.Id}) from {prependUsername message.From.Username} ({message.From.Id}) with text:\n{message.Text}""" - let logMsg = $"Detected spam (score: {score}) in {prependUsername message.Chat.Username} ({message.Chat.Id}) from {prependUsername message.From.Username} ({message.From.Id}) with text:\n{message.Text}" - // log both to logger and to logs channel do! botClient.SendTextMessageAsync(ChatId(botConfig.LogsChannelId), logMsg) |> taskIgnore logger.LogInformation logMsg @@ -399,18 +410,48 @@ let justMessage (botClient: ITelegramBotClient) (botConfig: BotConfiguration) (logger: ILogger) + (ml: MachineLearning) (message: Message) = task { - let spamScore = if message.Text <> null then calcSpamScore message.Text else 0 - - if spamScore > 100 then - do! warnSpamDetection botClient botConfig message logger spamScore - use _ = + use justMessageActivity = botActivity .StartActivity("justMessage") .SetTag("fromUserId", message.From.Id) .SetTag("fromUsername", message.From.Username) - .SetTag("spamScore", spamScore) + + + if botConfig.MlEnabled && message.Text <> null then + use mlActivity = botActivity.StartActivity("mlPrediction") + + let shouldBeSkipped = + match botConfig.MlStopWordsInChats.TryGetValue message.Chat.Id with + | true, stopWords -> + stopWords + |> Seq.exists (fun sw -> message.Text.Contains(sw, StringComparison.OrdinalIgnoreCase)) + | _ -> false + %mlActivity.SetTag("skipPrediction", shouldBeSkipped) + + if not shouldBeSkipped then + match ml.Predict message.Text with + | Some prediction -> + %mlActivity.SetTag("spamScoreMl", prediction.Score) + + if prediction.Score >= botConfig.MlSpamThreshold then + // delete message + do! killSpammerAutomated botClient botConfig message logger botConfig.MlSpamDeletionEnabled prediction.Score + elif prediction.Score > 0.0f then + // just warn + do! killSpammerAutomated botClient botConfig message logger false prediction.Score + else + // not a spam + () + | None -> + // no prediction (error or not ready yet) + () + + let spamScore = if message.Text <> null then calcSpamScore message.Text else 0 + %justMessageActivity.SetTag("spamScore", spamScore) + do! message |> DbMessage.newMessage @@ -497,6 +538,7 @@ let onUpdate (botClient: ITelegramBotClient) (botConfig: BotConfiguration) (logger: ILogger) + (ml: MachineLearning) (message: Message) = task { use banOnReplyActivity = botActivity.StartActivity("onUpdate") @@ -530,5 +572,5 @@ let onUpdate // if message is not a command from authorized user, just save it ID to DB else - do! justMessage botClient botConfig logger message + do! justMessage botClient botConfig logger ml message } diff --git a/src/VahterBanBot/DB.fs b/src/VahterBanBot/DB.fs index 1125651..1b5c7f5 100644 --- a/src/VahterBanBot/DB.fs +++ b/src/VahterBanBot/DB.fs @@ -2,6 +2,7 @@ open System open System.Threading.Tasks +open Microsoft.ML.Data open Npgsql open VahterBanBot.Types open Dapper @@ -64,6 +65,21 @@ VALUES (@message_id, @message_text, @banned_user_id, @banned_at, @banned_in_chat return banned } +let banUserByBot (banned: DbBanned) : Task = + task { + use conn = new NpgsqlConnection(connString) + + //language=postgresql + let sql = + """ +INSERT INTO banned_by_bot (message_id, message_text, banned_user_id, banned_at, banned_in_chat_id, banned_in_chat_username) +VALUES (@message_id, @message_text, @banned_user_id, @banned_at, @banned_in_chat_id, @banned_in_chat_username) + """ + + let! _ = conn.ExecuteAsync(sql, banned) + return banned + } + let getUserMessages (userId: int64): Task = task { use conn = new NpgsqlConnection(connString) @@ -101,13 +117,18 @@ let getVahterStats(banInterval: TimeSpan option): Task = //language=postgresql let sql = """ -SELECT vahter.username AS vahter - , COUNT(*) AS killCountTotal - , COUNT(*) FILTER (WHERE b.banned_at > NOW() - @banInterval::INTERVAL) AS killCountInterval -FROM banned b - JOIN "user" vahter ON vahter.id = b.banned_by -GROUP BY b.banned_by, vahter.username -ORDER BY killCountTotal DESC +(SELECT vahter.username AS vahter + , COUNT(*) AS killCountTotal + , COUNT(*) FILTER (WHERE b.banned_at > NOW() - @banInterval::INTERVAL) AS killCountInterval + FROM banned b + JOIN "user" vahter ON vahter.id = b.banned_by + GROUP BY b.banned_by, vahter.username + UNION + SELECT 'bot' AS vahter + , COUNT(*) AS killCountTotal + , COUNT(*) FILTER (WHERE bbb.banned_at > NOW() - @banInterval::INTERVAL) AS killCountInterval + FROM banned_by_bot bbb) + ORDER BY killCountTotal DESC """ let! stats = conn.QueryAsync(sql, {| banInterval = banInterval |}) @@ -123,3 +144,48 @@ let getUserById (userId: int64): Task = let! users = conn.QueryAsync(sql, {| userId = userId |}) return users |> Seq.tryHead } + +type SpamOrHam = + { [] + text: string + [] + spam: bool } + +let mlData(criticalDate: DateTime) : Task = + task { + use conn = new NpgsqlConnection(connString) + + //language=postgresql + let sql = + """ +WITH really_banned AS (SELECT * + FROM banned b + -- known false positive spam messages + WHERE NOT EXISTS(SELECT 1 FROM false_positive_users fpu WHERE fpu.user_id = b.banned_user_id) + AND NOT EXISTS(SELECT 1 FROM false_positive_messages fpm WHERE fpm.id = b.id) + AND b.message_text IS NOT NULL + AND b.banned_at <= @criticalDate), + spam_or_ham AS (SELECT DISTINCT COALESCE(m.text, re_id.message_text) AS text, + CASE + -- known false negative spam messages + WHEN EXISTS(SELECT 1 + FROM false_negative_messages fnm + WHERE fnm.chat_id = m.chat_id + AND fnm.message_id = m.message_id) + THEN TRUE + WHEN re_id.banned_user_id IS NULL AND re_text.banned_user_id IS NULL + THEN FALSE + ELSE TRUE + END AS spam + FROM (SELECT * FROM message WHERE text IS NOT NULL AND created_at <= @criticalDate) m + FULL OUTER JOIN really_banned re_id + ON m.message_id = re_id.message_id AND m.chat_id = re_id.banned_in_chat_id + LEFT JOIN really_banned re_text ON m.text = re_text.message_text) +SELECT * +FROM spam_or_ham +ORDER BY RANDOM(); +""" + + let! data = conn.QueryAsync(sql, {| criticalDate = criticalDate |}) + return Array.ofSeq data + } diff --git a/src/VahterBanBot/ML.fs b/src/VahterBanBot/ML.fs new file mode 100644 index 0000000..95ff504 --- /dev/null +++ b/src/VahterBanBot/ML.fs @@ -0,0 +1,101 @@ +module VahterBanBot.ML + +open System +open System.Diagnostics +open System.Text +open System.Threading.Tasks +open Microsoft.Extensions.Hosting +open Microsoft.Extensions.Logging +open Microsoft.ML +open Microsoft.ML.Data +open Telegram.Bot +open Telegram.Bot.Types +open VahterBanBot.DB +open VahterBanBot.Types +open VahterBanBot.Utils + +[] +type Prediction = + { Score: single + text: string + spam: bool } + +type MachineLearning( + logger: ILogger, + telegramClient: ITelegramBotClient, + botConf: BotConfiguration +) = + let metricsToString(metrics: CalibratedBinaryClassificationMetrics) (duration: TimeSpan) = + let sb = StringBuilder() + %sb.AppendLine($"Model trained in {duration.TotalSeconds} seconds with following metrics:") + %sb.AppendLine($"Accuracy: {metrics.Accuracy}") + %sb.AppendLine($"AreaUnderPrecisionRecallCurve: {metrics.AreaUnderPrecisionRecallCurve}") + %sb.AppendLine($"ConfusionMatrix:\n```\n{metrics.ConfusionMatrix.GetFormattedConfusionTable()}\n```") + %sb.AppendLine($"Entropy:{metrics.Entropy}") + %sb.AppendLine($"F1Score:{metrics.F1Score}") + %sb.AppendLine($"LogLoss:{metrics.LogLoss}") + %sb.AppendLine($"LogLossReduction:{metrics.LogLossReduction}") + %sb.AppendLine($"NegativePrecision:{metrics.NegativePrecision}") + %sb.AppendLine($"NegativeRecall:{metrics.NegativeRecall}") + %sb.AppendLine($"PositivePrecision:{metrics.PositivePrecision}") + %sb.AppendLine($"PositiveRecall:{metrics.PositiveRecall}") + sb.ToString() + + let mutable predictionEngine: PredictionEngine option = None + + let trainModel() = task { + // switch to thread pool + do! Task.Yield() + + let sw = Stopwatch.StartNew() + + let mlContext = MLContext(botConf.MlSeed) + + let! data = DB.mlData botConf.MlTrainBeforeDate + + let dataView = mlContext.Data.LoadFromEnumerable data + let trainTestSplit = mlContext.Data.TrainTestSplit(dataView, testFraction = botConf.MlTrainingSetFraction) + let trainingData = trainTestSplit.TrainSet + let testData = trainTestSplit.TestSet + + let dataProcessPipeline = mlContext.Transforms.Text.FeaturizeText(outputColumnName = "Features", inputColumnName = "text") + let trainer = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName = "spam", featureColumnName = "Features") + let trainingPipeline = dataProcessPipeline.Append(trainer) + + let trainedModel = trainingPipeline.Fit(trainingData) + predictionEngine <- Some(mlContext.Model.CreatePredictionEngine(trainedModel)) + + let predictions = trainedModel.Transform(testData) + let metrics = mlContext.BinaryClassification.Evaluate(data = predictions, labelColumnName = "spam", scoreColumnName = "Score") + + sw.Stop() + + let metricsStr = metricsToString metrics sw.Elapsed + logger.LogInformation metricsStr + do! telegramClient.SendTextMessageAsync(ChatId(botConf.LogsChannelId), metricsStr) + |> taskIgnore + } + + member _.Predict(text: string) = + try + match predictionEngine with + | Some predictionEngine -> + predictionEngine.Predict({ text = text; spam = false }) + |> Some + | None -> + logger.LogInformation "Model not trained yet" + None + with ex -> + logger.LogError(ex, "Error predicting") + None + + interface IHostedService with + member this.StartAsync _ = task { + if botConf.MlEnabled then + try + do! trainModel() + with ex -> + logger.LogError(ex, "Error training model") + } + + member this.StopAsync _ = Task.CompletedTask diff --git a/src/VahterBanBot/Program.fs b/src/VahterBanBot/Program.fs index f2ee6ae..5689be0 100644 --- a/src/VahterBanBot/Program.fs +++ b/src/VahterBanBot/Program.fs @@ -16,6 +16,7 @@ open Giraffe open Microsoft.Extensions.DependencyInjection open Telegram.Bot.Types.Enums open VahterBanBot.Cleanup +open VahterBanBot.ML open VahterBanBot.Utils open VahterBanBot.Bot open VahterBanBot.Types @@ -44,7 +45,14 @@ let botConf = UseFakeTgApi = getEnvOr "USE_FAKE_TG_API" "false" |> bool.Parse CleanupOldMessages = getEnvOr "CLEANUP_OLD_MESSAGES" "true" |> bool.Parse CleanupInterval = getEnvOr "CLEANUP_INTERVAL_SEC" "86400" |> int |> TimeSpan.FromSeconds - CleanupOldLimit = getEnvOr "CLEANUP_OLD_LIMIT_SEC" "259200" |> int |> TimeSpan.FromSeconds } + CleanupOldLimit = getEnvOr "CLEANUP_OLD_LIMIT_SEC" "259200" |> int |> TimeSpan.FromSeconds + MlEnabled = getEnvOr "ML_ENABLED" "false" |> bool.Parse + MlSeed = getEnvOrWith "ML_SEED" (Nullable()) (int >> Nullable) + MlSpamDeletionEnabled = getEnvOr "ML_SPAM_DELETION_ENABLED" "false" |> bool.Parse + MlTrainBeforeDate = getEnvOrWith "ML_TRAIN_BEFORE_DATE" DateTime.UtcNow (DateTimeOffset.Parse >> _.UtcDateTime) + MlTrainingSetFraction = getEnvOr "ML_TRAINING_SET_FRACTION" "0.2" |> float + MlSpamThreshold = getEnvOr "ML_SPAM_THRESHOLD" "0.5" |> single + MlStopWordsInChats = getEnvOr "ML_STOP_WORDS_IN_CHATS" "{}" |> JsonConvert.DeserializeObject<_> } let validateApiKey (ctx : HttpContext) = match ctx.TryGetRequestHeader "X-Telegram-Bot-Api-Secret-Token" with @@ -60,6 +68,8 @@ let builder = WebApplication.CreateBuilder() .AddGiraffe() .AddHostedService() .AddHostedService() + .AddSingleton() + .AddHostedService(fun sp -> sp.GetRequiredService()) .AddHttpClient("telegram_bot_client") .AddTypedClient(fun httpClient sp -> let options = TelegramBotClientOptions(botConf.BotToken) @@ -132,9 +142,10 @@ let webApp = choose [ use scope = ctx.RequestServices.CreateScope() let telegramClient = scope.ServiceProvider.GetRequiredService() + let ml = scope.ServiceProvider.GetRequiredService() let logger = ctx.GetLogger() try - do! onUpdate telegramClient botConf (ctx.GetLogger "VahterBanBot.Bot") update.Message + do! onUpdate telegramClient botConf (ctx.GetLogger "VahterBanBot.Bot") ml update.Message %topActivity.SetTag("update-error", false) with e -> logger.LogError(e, $"Unexpected error while processing update: {updateBodyJson}") @@ -160,7 +171,8 @@ if botConf.UsePolling then let ctx = app.Services.CreateScope() let logger = ctx.ServiceProvider.GetRequiredService>() let client = ctx.ServiceProvider.GetRequiredService() - do! onUpdate client botConf logger update.Message + let ml = ctx.ServiceProvider.GetRequiredService() + do! onUpdate client botConf logger ml update.Message } member x.HandlePollingErrorAsync (botClient: ITelegramBotClient, ex: Exception, cancellationToken: CancellationToken) = Task.CompletedTask diff --git a/src/VahterBanBot/Types.fs b/src/VahterBanBot/Types.fs index 1da2bff..e5f0099 100644 --- a/src/VahterBanBot/Types.fs +++ b/src/VahterBanBot/Types.fs @@ -20,7 +20,14 @@ type BotConfiguration = UsePolling: bool CleanupOldMessages: bool CleanupInterval: TimeSpan - CleanupOldLimit: TimeSpan } + CleanupOldLimit: TimeSpan + MlEnabled: bool + MlSeed: Nullable + MlSpamDeletionEnabled: bool + MlTrainBeforeDate: DateTime + MlTrainingSetFraction: float + MlSpamThreshold: single + MlStopWordsInChats: Dictionary } [] type DbUser = diff --git a/src/VahterBanBot/Utils.fs b/src/VahterBanBot/Utils.fs index 6213479..6178d4c 100644 --- a/src/VahterBanBot/Utils.fs +++ b/src/VahterBanBot/Utils.fs @@ -22,6 +22,12 @@ let getEnvWith name action = if value <> null then action value +let getEnvOrWith name defaultValue action = + let value = Environment.GetEnvironmentVariable name + if value <> null then + action value + else defaultValue + let prependUsername (s: string) = if isNull s then null diff --git a/src/VahterBanBot/VahterBanBot.fsproj b/src/VahterBanBot/VahterBanBot.fsproj index 5054a1a..7075343 100644 --- a/src/VahterBanBot/VahterBanBot.fsproj +++ b/src/VahterBanBot/VahterBanBot.fsproj @@ -10,6 +10,7 @@ + @@ -35,6 +36,7 @@ + diff --git a/src/migrations/V8__ml-stuff.sql b/src/migrations/V8__ml-stuff.sql new file mode 100644 index 0000000..5ddbb47 --- /dev/null +++ b/src/migrations/V8__ml-stuff.sql @@ -0,0 +1,20 @@ +CREATE TABLE banned_by_bot +( + id BIGSERIAL PRIMARY KEY, + message_id INTEGER NULL, + message_text TEXT, + banned_user_id BIGINT NOT NULL + REFERENCES "user" (id), + banned_at TIMESTAMPTZ NOT NULL, + banned_in_chat_id BIGINT NULL, + banned_in_chat_username TEXT NULL +); + +CREATE INDEX banned_by_bot_banned_user_id_idx + ON banned_by_bot (banned_user_id); + +CREATE INDEX banned_by_bot_banned_in_chat_id_idx + ON banned_by_bot (banned_in_chat_id); + +CREATE INDEX banned_by_bot_message_id_idx + ON banned_by_bot (message_id); From 8d1029dfab7fd9d9f782b2a92b39f6df1d7109dc Mon Sep 17 00:00:00 2001 From: Ayrat Hudaygulov Date: Sat, 20 Jul 2024 19:49:00 +0100 Subject: [PATCH 2/2] fixed tests --- src/VahterBanBot.Tests/ContainerTestBase.fs | 8 + src/VahterBanBot.Tests/MLBanTests.fs | 15 +- src/VahterBanBot.Tests/TgMessageUtils.fs | 2 +- src/VahterBanBot.Tests/test_seed.sql | 852 ++++++++++---------- 4 files changed, 437 insertions(+), 440 deletions(-) diff --git a/src/VahterBanBot.Tests/ContainerTestBase.fs b/src/VahterBanBot.Tests/ContainerTestBase.fs index 381cd72..3f2413d 100644 --- a/src/VahterBanBot.Tests/ContainerTestBase.fs +++ b/src/VahterBanBot.Tests/ContainerTestBase.fs @@ -4,6 +4,7 @@ open System open System.IO open System.Net.Http open System.Text +open System.Threading.Tasks open DotNet.Testcontainers.Builders open DotNet.Testcontainers.Configurations open DotNet.Testcontainers.Containers @@ -205,3 +206,10 @@ type VahterTestContainers() = let! count = conn.QuerySingleAsync(sql, {| chatId = msg.Chat.Id; messageId = msg.MessageId |}) return count > 0 } + +// workaround to wait for ML to be ready +type MlAwaitFixture() = + interface IAsyncLifetime with + member this.DisposeAsync() = Task.CompletedTask + // we assume 5 seconds is enough for model to train. Could be flaky + member this.InitializeAsync() = Task.Delay 5000 diff --git a/src/VahterBanBot.Tests/MLBanTests.fs b/src/VahterBanBot.Tests/MLBanTests.fs index 8f427f2..6ae35ac 100644 --- a/src/VahterBanBot.Tests/MLBanTests.fs +++ b/src/VahterBanBot.Tests/MLBanTests.fs @@ -7,13 +7,10 @@ open VahterBanBot.Tests.TgMessageUtils open Xunit open Xunit.Extensions.AssemblyFixture -type MLBanTests(fixture: VahterTestContainers) = +type MLBanTests(fixture: VahterTestContainers, _unused: MlAwaitFixture) = [] let ``Message IS autobanned if it looks like a spam`` () = task { - // we assume 5 seconds is enough for model to train. Could be flaky - do! Task.Delay 5000 - // record a message, where 2 is in a training set as spam word // ChatsToMonitor[0] doesn't have stopwords let msgUpdate = Tg.quickMsg(chat = fixture.ChatsToMonitor[0], text = "2") @@ -26,9 +23,6 @@ type MLBanTests(fixture: VahterTestContainers) = [] let ``Message is NOT autobanned if it has a stopword in specific chat`` () = task { - // we assume 5 seconds is enough for model to train. Could be flaky - do! Task.Delay 5000 - // record a message, where 2 is in a training set as spam word // ChatsToMonitor[1] does have a stopword 2 let msgUpdate = Tg.quickMsg(chat = fixture.ChatsToMonitor[1], text = "2") @@ -41,9 +35,6 @@ type MLBanTests(fixture: VahterTestContainers) = [] let ``Message is NOT autobanned if it is a known false-positive spam`` () = task { - // we assume 5 seconds is enough for model to train. Could be flaky - do! Task.Delay 5000 - // record a message, where 3 is in a training set as spam word let msgUpdate = Tg.quickMsg(chat = fixture.ChatsToMonitor[0], text = "a") let! _ = fixture.SendMessage msgUpdate @@ -55,9 +46,6 @@ type MLBanTests(fixture: VahterTestContainers) = [] let ``Message IS autobanned if it is a known false-negative spam`` () = task { - // we assume 5 seconds is enough for model to train. Could be flaky - do! Task.Delay 5000 - // record a message, where 3 is in a training set as false negative spam word let msgUpdate = Tg.quickMsg(chat = fixture.ChatsToMonitor[0], text = "3") let! _ = fixture.SendMessage msgUpdate @@ -68,3 +56,4 @@ type MLBanTests(fixture: VahterTestContainers) = } interface IAssemblyFixture + interface IClassFixture diff --git a/src/VahterBanBot.Tests/TgMessageUtils.fs b/src/VahterBanBot.Tests/TgMessageUtils.fs index 34f0484..e9703c1 100644 --- a/src/VahterBanBot.Tests/TgMessageUtils.fs +++ b/src/VahterBanBot.Tests/TgMessageUtils.fs @@ -5,7 +5,7 @@ open System.Threading open Telegram.Bot.Types type Tg() = - static let mutable i = 10000L // higher than the data in the test_seed.sql + static let mutable i = 1L // higher than the data in the test_seed.sql static let nextInt64() = Interlocked.Increment &i static let next() = nextInt64() |> int static member user (?id: int64, ?username: string, ?firstName: string) = diff --git a/src/VahterBanBot.Tests/test_seed.sql b/src/VahterBanBot.Tests/test_seed.sql index df1c932..de9c6ca 100644 --- a/src/VahterBanBot.Tests/test_seed.sql +++ b/src/VahterBanBot.Tests/test_seed.sql @@ -4,445 +4,445 @@ VALUES (34, 'vahter_1', NULL, NULL, NULL), -- insert some fake data for ML training INSERT INTO public."user"(id, username, banned_by, banned_at, ban_reason) -VALUES (1, 'a', NULL, NULL, NULL), - (2, 'b', NULL, NULL, NULL), - (3, 'c', NULL, NULL, NULL), - (4, 'd', NULL, NULL, NULL), - (5, 'e', NULL, NULL, NULL), - (6, 'f', NULL, NULL, NULL), - (7, 'g', NULL, NULL, NULL), - (8, 'h', NULL, NULL, NULL), - (9, 'i', NULL, NULL, NULL), - (10, 'j', NULL, NULL, NULL); +VALUES (1001, 'a', NULL, NULL, NULL), + (1002, 'b', NULL, NULL, NULL), + (1003, 'c', NULL, NULL, NULL), + (1004, 'd', NULL, NULL, NULL), + (1005, 'e', NULL, NULL, NULL), + (1006, 'f', NULL, NULL, NULL), + (1007, 'g', NULL, NULL, NULL), + (1008, 'h', NULL, NULL, NULL), + (1009, 'i', NULL, NULL, NULL), + (1010, 'j', NULL, NULL, NULL); INSERT INTO public.message(chat_id, message_id, user_id, created_at, text, raw_message) -VALUES (-666, 1, 1, '2021-01-01 00:00:00', 'a', '{}'), -- false positive user banned - (-666, 2, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 3, 1, '2021-01-01 00:00:02', 'a', '{}'), - (-666, 4, 2, '2021-01-01 00:00:03', 'a', '{}'), - (-666, 5, 2, '2021-01-01 00:00:04', 'a', '{}'), - (-666, 6, 3, '2021-01-01 00:00:05', 'a', '{}'), - (-666, 7, 3, '2021-01-01 00:00:06', 'a', '{}'), - (-666, 8, 4, '2021-01-01 00:00:07', 'a', '{}'), -- false positive message banned - (-666, 9, 5, '2021-01-01 00:00:08', '1', '{}'), - (-666, 10, 5, '2021-01-01 00:00:09', '1', '{}'), - (-42, 1, 1, '2021-01-01 00:00:00', 'a', '{}'), - (-42, 2, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-42, 3, 1, '2021-01-01 00:00:02', 'a', '{}'), - (-42, 4, 2, '2021-01-01 00:00:03', 'a', '{}'), - (-42, 5, 2, '2021-01-01 00:00:04', 'a', '{}'), - (-42, 6, 3, '2021-01-01 00:00:05', 'a', '{}'), - (-42, 7, 3, '2021-01-01 00:00:06', 'a', '{}'), - (-42, 8, 4, '2021-01-01 00:00:07', '3', '{}'), -- false negative - (-42, 9, 6, '2021-01-01 00:00:08', '1', '{}'), +VALUES (-666, 10001, 1001, '2021-01-01 00:00:00', 'a', '{}'), -- false positive user banned + (-666, 10002, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10003, 1001, '2021-01-01 00:00:02', 'a', '{}'), + (-666, 10004, 1002, '2021-01-01 00:00:03', 'a', '{}'), + (-666, 10005, 1002, '2021-01-01 00:00:04', 'a', '{}'), + (-666, 10006, 1003, '2021-01-01 00:00:05', 'a', '{}'), + (-666, 10007, 1003, '2021-01-01 00:00:06', 'a', '{}'), + (-666, 10008, 1004, '2021-01-01 00:00:07', 'a', '{}'), -- false positive message banned + (-666, 10009, 1005, '2021-01-01 00:00:08', '1', '{}'), + (-666, 10010, 1005, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10001, 1001, '2021-01-01 00:00:00', 'a', '{}'), + (-42, 10002, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-42, 10003, 1001, '2021-01-01 00:00:02', 'a', '{}'), + (-42, 10004, 1002, '2021-01-01 00:00:03', 'a', '{}'), + (-42, 10005, 1002, '2021-01-01 00:00:04', 'a', '{}'), + (-42, 10006, 1003, '2021-01-01 00:00:05', 'a', '{}'), + (-42, 10007, 1003, '2021-01-01 00:00:06', 'a', '{}'), + (-42, 10008, 1004, '2021-01-01 00:00:07', '3', '{}'), -- false negative + (-42, 10009, 1006, '2021-01-01 00:00:08', '1', '{}'), -- to prevent small sample size, we'll copy the next line 100 times -- this is spam - (-42, 10, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 11, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 12, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 13, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 14, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 15, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 16, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 17, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 18, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 19, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 20, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 21, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 22, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 23, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 24, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 25, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 26, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 27, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 28, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 29, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 30, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 31, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 32, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 33, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 34, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 35, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 36, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 37, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 38, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 39, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 40, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 41, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 42, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 43, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 44, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 45, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 46, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 47, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 48, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 49, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 50, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 51, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 52, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 53, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 54, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 55, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 56, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 57, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 58, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 59, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 60, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 61, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 62, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 63, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 64, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 65, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 66, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 67, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 68, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 69, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 70, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 71, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 72, 6, '2021-01-01 00:00:09', '1', '{}'), - (-42, 73, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 74, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 75, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 76, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 77, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 78, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 79, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 80, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 81, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 82, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 83, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 84, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 85, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 86, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 87, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 88, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 89, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 90, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 91, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 92, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 93, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 94, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 95, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 96, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 97, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 98, 6, '2021-01-01 00:00:09', '2', '{}'), - (-42, 99, 6, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10010, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10011, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10012, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10013, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10014, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10015, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10016, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10017, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10018, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10019, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10020, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10021, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10022, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10023, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10024, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10025, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10026, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10027, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10028, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10029, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10030, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10031, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10032, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10033, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10034, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10035, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10036, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10037, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10038, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10039, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10040, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10041, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10042, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10043, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10044, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10045, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10046, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10047, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10048, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10049, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10050, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10051, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10052, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10053, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10054, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10055, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10056, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10057, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10058, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10059, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10060, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10061, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10062, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10063, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10064, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10065, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10066, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10067, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10068, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10069, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10070, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10071, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10072, 1006, '2021-01-01 00:00:09', '1', '{}'), + (-42, 10073, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10074, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10075, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10076, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10077, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10078, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10079, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10080, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10081, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10082, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10083, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10084, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10085, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10086, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10087, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10088, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10089, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10090, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10091, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10092, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10093, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10094, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10095, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10096, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10097, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10098, 1006, '2021-01-01 00:00:09', '2', '{}'), + (-42, 10099, 1006, '2021-01-01 00:00:09', '2', '{}'), -- this is not spam - (-666, 100, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 101, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 102, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 103, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 104, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 105, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 106, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 107, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 108, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 109, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 110, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 111, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 112, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 113, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 114, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 115, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 116, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 117, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 118, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 119, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 120, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 121, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 122, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 123, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 124, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 125, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 126, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 127, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 128, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 129, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 130, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 131, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 132, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 133, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 134, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 135, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 136, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 137, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 138, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 139, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 140, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 141, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 142, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 143, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 144, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 145, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 146, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 147, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 148, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 149, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 150, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 151, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 152, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 153, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 154, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 155, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 156, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 157, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 158, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 159, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 160, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 161, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 162, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 163, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 164, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 165, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 166, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 167, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 168, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 169, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 170, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 171, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 172, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 173, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 174, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 175, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 176, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 177, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 178, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 179, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 180, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 181, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 182, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 183, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 184, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 185, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 186, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 187, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 188, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 189, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 190, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 191, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 192, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 193, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 194, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 195, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 196, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 197, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 198, 1, '2021-01-01 00:00:01', 'a', '{}'), - (-666, 199, 1, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10100, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10101, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10102, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10103, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10104, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10105, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10106, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10107, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10108, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10109, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10110, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10111, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10112, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10113, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10114, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10115, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10116, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10117, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10118, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10119, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10120, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10121, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10122, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10123, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10124, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10125, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10126, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10127, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10128, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10129, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10130, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10131, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10132, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10133, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10134, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10135, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10136, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10137, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10138, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10139, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10140, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10141, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10142, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10143, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10144, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10145, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10146, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10147, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10148, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10149, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10150, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10151, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10152, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10153, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10154, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10155, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10156, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10157, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10158, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10159, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10160, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10161, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10162, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10163, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10164, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10165, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10166, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10167, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10168, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10169, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10170, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10171, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10172, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10173, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10174, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10175, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10176, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10177, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10178, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10179, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10180, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10181, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10182, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10183, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10184, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10185, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10186, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10187, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10188, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10189, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10190, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10191, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10192, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10193, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10194, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10195, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10196, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10197, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10198, 1001, '2021-01-01 00:00:01', 'a', '{}'), + (-666, 10199, 1001, '2021-01-01 00:00:01', 'a', '{}'), -- to enforce false-negative appearance - (-666, 200, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 201, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 202, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 203, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 204, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 205, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 206, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 207, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 208, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 209, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 210, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 211, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 212, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 213, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 214, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 215, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 216, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 217, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 218, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 219, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 220, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 221, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 222, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 223, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 224, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 225, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 226, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 227, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 228, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 229, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 230, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 231, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 232, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 233, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 234, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 235, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 236, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 237, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 238, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 239, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 240, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 241, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 242, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 243, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 244, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 245, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 246, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 247, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 248, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 249, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 250, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 251, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 252, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 253, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 254, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 255, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 256, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 257, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 258, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 259, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 260, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 261, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 262, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 263, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 264, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 265, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 266, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 267, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 268, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 269, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 270, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 271, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 272, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 273, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 274, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 275, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 276, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 277, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 278, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 279, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 280, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 281, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 282, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 283, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 284, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 285, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 286, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 287, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 288, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 289, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 290, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 291, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 292, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 293, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 294, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 295, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 296, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 297, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 298, 1, '2021-01-01 00:00:01', '3', '{}'), - (-666, 299, 1, '2021-01-01 00:00:01', '3', '{}'); + (-666, 10200, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10201, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10202, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10203, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10204, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10205, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10206, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10207, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10208, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10209, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10210, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10211, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10212, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10213, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10214, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10215, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10216, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10217, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10218, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10219, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10220, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10221, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10222, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10223, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10224, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10225, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10226, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10227, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10228, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10229, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10230, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10231, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10232, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10233, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10234, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10235, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10236, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10237, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10238, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10239, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10240, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10241, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10242, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10243, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10244, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10245, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10246, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10247, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10248, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10249, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10250, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10251, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10252, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10253, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10254, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10255, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10256, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10257, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10258, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10259, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10260, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10261, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10262, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10263, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10264, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10265, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10266, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10267, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10268, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10269, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10270, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10271, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10272, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10273, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10274, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10275, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10276, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10277, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10278, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10279, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10280, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10281, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10282, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10283, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10284, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10285, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10286, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10287, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10288, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10289, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10290, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10291, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10292, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10293, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10294, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10295, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10296, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10297, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10298, 1001, '2021-01-01 00:00:01', '3', '{}'), + (-666, 10299, 1001, '2021-01-01 00:00:01', '3', '{}'); INSERT INTO public.banned(id, message_id, message_text, banned_user_id, banned_at, banned_in_chat_id, banned_in_chat_username, banned_by) -VALUES (1, 1, 'a', 1, '2021-01-01 00:00:00', -666, 'pro.hell', 34), - (2, 8, 'a', 4, '2021-01-01 00:00:07', -666, 'pro.hell', 69), - (3, 9, '1', 5, '2021-01-01 00:00:08', -666, 'pro.hell', 34), - (4, 10, '2', 6, '2021-01-01 00:00:09', -42, 'dotnetru', 69); +VALUES (100001, 10001, 'a', 1001, '2021-01-01 00:00:00', -666, 'pro.hell', 34), + (100002, 10008, 'a', 1004, '2021-01-01 00:00:07', -666, 'pro.hell', 69), + (100003, 10009, '1', 1005, '2021-01-01 00:00:08', -666, 'pro.hell', 34), + (100004, 10010, '2', 1006, '2021-01-01 00:00:09', -42, 'dotnetru', 69); INSERT INTO public.false_positive_users(user_id) -VALUES (1); +VALUES (1001); INSERT INTO public.false_positive_messages(id) -VALUES (2); +VALUES (100002); INSERT INTO public.false_negative_messages(chat_id, message_id) -VALUES (-42, 8), - (-666, 200), - (-666, 201), - (-666, 202), - (-666, 203), - (-666, 204), - (-666, 205), - (-666, 206), - (-666, 207), - (-666, 208), - (-666, 209), - (-666, 210), - (-666, 211), - (-666, 212), - (-666, 213), - (-666, 214), - (-666, 215), - (-666, 216), - (-666, 217), - (-666, 218), - (-666, 219), - (-666, 220), - (-666, 221), - (-666, 222), - (-666, 223), - (-666, 224), - (-666, 225), - (-666, 226), - (-666, 227), - (-666, 228), - (-666, 229), - (-666, 230), - (-666, 231), - (-666, 232), - (-666, 233), - (-666, 234), - (-666, 235), - (-666, 236), - (-666, 237), - (-666, 238), - (-666, 239), - (-666, 240), - (-666, 241), - (-666, 242), - (-666, 243), - (-666, 244), - (-666, 245), - (-666, 246), - (-666, 247), - (-666, 248), - (-666, 249), - (-666, 250), - (-666, 251), - (-666, 252), - (-666, 253), - (-666, 254), - (-666, 255), - (-666, 256), - (-666, 257), - (-666, 258), - (-666, 259), - (-666, 260), - (-666, 261), - (-666, 262), - (-666, 263), - (-666, 264), - (-666, 265), - (-666, 266), - (-666, 267), - (-666, 268), - (-666, 269), - (-666, 270), - (-666, 271), - (-666, 272), - (-666, 273), - (-666, 274), - (-666, 275), - (-666, 276), - (-666, 277), - (-666, 278), - (-666, 279), - (-666, 280), - (-666, 281), - (-666, 282), - (-666, 283), - (-666, 284), - (-666, 285), - (-666, 286), - (-666, 287), - (-666, 288), - (-666, 289), - (-666, 290), - (-666, 291), - (-666, 292), - (-666, 293), - (-666, 294), - (-666, 295), - (-666, 296), - (-666, 297), - (-666, 298), - (-666, 299); +VALUES (-42, 10008), + (-666, 10200), + (-666, 10201), + (-666, 10202), + (-666, 10203), + (-666, 10204), + (-666, 10205), + (-666, 10206), + (-666, 10207), + (-666, 10208), + (-666, 10209), + (-666, 10210), + (-666, 10211), + (-666, 10212), + (-666, 10213), + (-666, 10214), + (-666, 10215), + (-666, 10216), + (-666, 10217), + (-666, 10218), + (-666, 10219), + (-666, 10220), + (-666, 10221), + (-666, 10222), + (-666, 10223), + (-666, 10224), + (-666, 10225), + (-666, 10226), + (-666, 10227), + (-666, 10228), + (-666, 10229), + (-666, 10230), + (-666, 10231), + (-666, 10232), + (-666, 10233), + (-666, 10234), + (-666, 10235), + (-666, 10236), + (-666, 10237), + (-666, 10238), + (-666, 10239), + (-666, 10240), + (-666, 10241), + (-666, 10242), + (-666, 10243), + (-666, 10244), + (-666, 10245), + (-666, 10246), + (-666, 10247), + (-666, 10248), + (-666, 10249), + (-666, 10250), + (-666, 10251), + (-666, 10252), + (-666, 10253), + (-666, 10254), + (-666, 10255), + (-666, 10256), + (-666, 10257), + (-666, 10258), + (-666, 10259), + (-666, 10260), + (-666, 10261), + (-666, 10262), + (-666, 10263), + (-666, 10264), + (-666, 10265), + (-666, 10266), + (-666, 10267), + (-666, 10268), + (-666, 10269), + (-666, 10270), + (-666, 10271), + (-666, 10272), + (-666, 10273), + (-666, 10274), + (-666, 10275), + (-666, 10276), + (-666, 10277), + (-666, 10278), + (-666, 10279), + (-666, 10280), + (-666, 10281), + (-666, 10282), + (-666, 10283), + (-666, 10284), + (-666, 10285), + (-666, 10286), + (-666, 10287), + (-666, 10288), + (-666, 10289), + (-666, 10290), + (-666, 10291), + (-666, 10292), + (-666, 10293), + (-666, 10294), + (-666, 10295), + (-666, 10296), + (-666, 10297), + (-666, 10298), + (-666, 10299);