diff --git a/src/VahterBanBot/ML.fs b/src/VahterBanBot/ML.fs index 873993a..23f549b 100644 --- a/src/VahterBanBot/ML.fs +++ b/src/VahterBanBot/ML.fs @@ -67,6 +67,8 @@ type MachineLearning( let trainDate = DateTime.UtcNow - botConf.MlTrainInterval let! rawData = DB.mlData botConf.MlTrainCriticalMsgCount trainDate + logger.LogInformation $"Training data count: {rawData.Length}" + let data = rawData |> Array.map (fun x -> @@ -94,19 +96,27 @@ type MachineLearning( featureColumnName = "Features", maximumNumberOfIterations = botConf.MlMaxNumberOfIterations )) + + logger.LogInformation "Fitting model..." let trainedModel = dataProcessPipeline.Fit(trainingData) + + logger.LogInformation "Evaluating model..." + predictionEngine <- Some(mlContext.Model.CreatePredictionEngine(trainedModel)) let predictions = trainedModel.Transform(testData) let metrics = mlContext.BinaryClassification.Evaluate(data = predictions, labelColumnName = "spam", scoreColumnName = "Score") + logger.LogInformation "Model transformation complete" + sw.Stop() let metricsStr = metricsToString metrics sw.Elapsed logger.LogInformation metricsStr do! telegramClient.SendTextMessageAsync(ChatId(botConf.LogsChannelId), metricsStr, parseMode = ParseMode.Markdown) |> taskIgnore + logger.LogInformation "Model trained" with ex -> logger.LogError(ex, "Error training model") } diff --git a/src/VahterBanBot/Program.fs b/src/VahterBanBot/Program.fs index ee72f0f..5615993 100644 --- a/src/VahterBanBot/Program.fs +++ b/src/VahterBanBot/Program.fs @@ -75,7 +75,7 @@ let botConf = MlTrainingSetFraction = getEnvOr "ML_TRAINING_SET_FRACTION" "0.2" |> float MlSpamThreshold = getEnvOr "ML_SPAM_THRESHOLD" "0.5" |> single MlWarningThreshold = getEnvOr "ML_WARNING_THRESHOLD" "0.0" |> single - MlMaxNumberOfIterations = getEnvOr "ML_MAX_NUMBER_OF_ITERATIONS" "100" |> int + MlMaxNumberOfIterations = getEnvOr "ML_MAX_NUMBER_OF_ITERATIONS" "50" |> int MlStopWordsInChats = getEnvOr "ML_STOP_WORDS_IN_CHATS" "{}" |> fromJson } let validateApiKey (ctx : HttpContext) =