From e6ff23fd7eb27a2bd5a57989175867127f96d664 Mon Sep 17 00:00:00 2001 From: MrPowerGamerBR Date: Tue, 16 Jul 2024 18:46:44 -0300 Subject: [PATCH] Naive Bayes --- .../HowToBuyPesadelosNaiveBayesResponse.kt | 14 +++ .../sparklypower/SparklyNaiveBayes.kt | 91 +++++++++++++++++++ .../sparklypower/SparklyNaiveBayesResponse.kt | 33 +++++++ .../sparklypower/SparklyPowerResponses.kt | 11 ++- .../loritta/helper/utils/NaiveBayes.kt | 66 ++++++++++++++ 5 files changed, 214 insertions(+), 1 deletion(-) create mode 100644 src/main/kotlin/net/perfectdreams/loritta/helper/serverresponses/sparklypower/HowToBuyPesadelosNaiveBayesResponse.kt create mode 100644 src/main/kotlin/net/perfectdreams/loritta/helper/serverresponses/sparklypower/SparklyNaiveBayes.kt create mode 100644 src/main/kotlin/net/perfectdreams/loritta/helper/serverresponses/sparklypower/SparklyNaiveBayesResponse.kt create mode 100644 src/main/kotlin/net/perfectdreams/loritta/helper/utils/NaiveBayes.kt diff --git a/src/main/kotlin/net/perfectdreams/loritta/helper/serverresponses/sparklypower/HowToBuyPesadelosNaiveBayesResponse.kt b/src/main/kotlin/net/perfectdreams/loritta/helper/serverresponses/sparklypower/HowToBuyPesadelosNaiveBayesResponse.kt new file mode 100644 index 0000000..97e4512 --- /dev/null +++ b/src/main/kotlin/net/perfectdreams/loritta/helper/serverresponses/sparklypower/HowToBuyPesadelosNaiveBayesResponse.kt @@ -0,0 +1,14 @@ +package net.perfectdreams.loritta.helper.serverresponses.sparklypower + +import net.perfectdreams.loritta.api.messages.LorittaReply + +class HowToBuyPesadelosNaiveBayesResponse(sparklyNaiveBayes: SparklyNaiveBayes) : SparklyNaiveBayesResponse(SparklyNaiveBayes.QuestionCategory.BUY_PESADELOS, sparklyNaiveBayes) { + override fun getResponse(message: String): List { + return listOf( + LorittaReply( + "Você pode comprar pesadelos acessando o meu website! https://sparklypower.net/loja", + "<:pantufa_coffee:853048446981111828>" + ) + ) + } +} \ No newline at end of file diff --git a/src/main/kotlin/net/perfectdreams/loritta/helper/serverresponses/sparklypower/SparklyNaiveBayes.kt b/src/main/kotlin/net/perfectdreams/loritta/helper/serverresponses/sparklypower/SparklyNaiveBayes.kt new file mode 100644 index 0000000..d37748c --- /dev/null +++ b/src/main/kotlin/net/perfectdreams/loritta/helper/serverresponses/sparklypower/SparklyNaiveBayes.kt @@ -0,0 +1,91 @@ +package net.perfectdreams.loritta.helper.serverresponses.sparklypower + +import net.perfectdreams.loritta.helper.utils.NaiveBayes +import java.text.Normalizer + +class SparklyNaiveBayes { + val classifier = NaiveBayes() + + fun setup() { + classifier.train( + QuestionCategory.BUY_PESADELOS, + listOf( + "como ganho pesadelos", + "como consigo pesadelos", + "como compra pesadelos" + ) + ) + + classifier.train( + QuestionCategory.SPARKLY_IP, + listOf( + "qual é o IP do SparklyPower" + ) + ) + + classifier.train( + QuestionCategory.BUY_VIP, + listOf( + "como compra VIP no SparklyPower" + ) + ) + } + + private fun train(category: QuestionCategory, documents: List) = classifier.train( + category, documents.map { normalizeNaiveBayesInput(it) } + ) + + fun main() { + val documents = listOf( + Pair("qual é o IP do SparklyPower", "ip"), + Pair("manda o IP do SparklyPower", "ip"), + Pair("como comprar VIP", "vip"), + Pair("quero comprar VIP", "vip"), + Pair("como eu protejo um terreno?", "terreno"), + Pair("como proteger um terreno?", "claim"), + ) + + val classifier = NaiveBayes() + classifier.train( + "pesadelos", + listOf( + "como ganho pesadelos", + "como consigo pesadelos", + "como compra pesadelos", + "como comprar pesadelos", + "como posso comprar pesadelos" + ).map { normalizeNaiveBayesInput(it) } + ) + classifier.train(documents) + } + + fun replaceShortenedWordsWithLongWords(source: String) = source + .replace(Regex("\\bSparkly\\b", RegexOption.IGNORE_CASE), "SparklyPower") + .replace(Regex("\\bservidor\\b", RegexOption.IGNORE_CASE), "SparklyPower") + .replace(Regex("\\bserver\\b", RegexOption.IGNORE_CASE), "SparklyPower") + .replace(Regex("\\bpesa\\b", RegexOption.IGNORE_CASE), "pesadelos") + .replace(Regex("\\beh\\b", RegexOption.IGNORE_CASE), "é") + .replace(Regex("\\badissiona\\b", RegexOption.IGNORE_CASE), "adiciona") + .replace(Regex("\\badissiono\\b", RegexOption.IGNORE_CASE), "adiciono") + + + fun normalizeNaiveBayesInput(source: String) = source + .normalize() + .replace("?", "") + .replace("!", "") + .replace(".", "") + .replace(",", "") + .trim() + + private fun String.normalize(): String { + val normalizedString = Normalizer.normalize(this, Normalizer.Form.NFD) + val regex = "\\p{InCombiningDiacriticalMarks}+".toRegex() + return regex.replace(normalizedString, "") + } + + enum class QuestionCategory { + BUY_PESADELOS, + BUY_VIP, + SPARKLY_IP + } +} \ No newline at end of file diff --git a/src/main/kotlin/net/perfectdreams/loritta/helper/serverresponses/sparklypower/SparklyNaiveBayesResponse.kt b/src/main/kotlin/net/perfectdreams/loritta/helper/serverresponses/sparklypower/SparklyNaiveBayesResponse.kt new file mode 100644 index 0000000..bce195c --- /dev/null +++ b/src/main/kotlin/net/perfectdreams/loritta/helper/serverresponses/sparklypower/SparklyNaiveBayesResponse.kt @@ -0,0 +1,33 @@ +package net.perfectdreams.loritta.helper.serverresponses.sparklypower + +import mu.KotlinLogging +import net.perfectdreams.loritta.helper.serverresponses.LorittaResponse + +abstract class SparklyNaiveBayesResponse( + private val category: SparklyNaiveBayes.QuestionCategory, + private val sparklyNaiveBayes: SparklyNaiveBayes +) : LorittaResponse { + private val logger = KotlinLogging.logger {} + + override fun handleResponse(message: String): Boolean { + val normalizedMessage = sparklyNaiveBayes.normalizeNaiveBayesInput(sparklyNaiveBayes.replaceShortenedWordsWithLongWords(message)) + + val classifications = sparklyNaiveBayes.classifier.detailedClassification(normalizedMessage) + .entries + .sortedBy { it.value } + + logger.info { "Results for $normalizedMessage: $classifications" } + + // Get the best classification that matches our message + val bestMatch = classifications.last() + // Not the same category? Bail out! + if (bestMatch.key != category) + return false + + val secondBestMatch = classifications[classifications.size - 2] + val diffBetweenBestMatchAndSecondBestMatch = bestMatch.value - secondBestMatch.value + + // We compare between the second best because if two questions are very similar, then the question is a bit confusing + return bestMatch.value >= 0.4 && diffBetweenBestMatchAndSecondBestMatch >= 0.2 + } +} \ No newline at end of file diff --git a/src/main/kotlin/net/perfectdreams/loritta/helper/serverresponses/sparklypower/SparklyPowerResponses.kt b/src/main/kotlin/net/perfectdreams/loritta/helper/serverresponses/sparklypower/SparklyPowerResponses.kt index 10fb049..7844bcd 100644 --- a/src/main/kotlin/net/perfectdreams/loritta/helper/serverresponses/sparklypower/SparklyPowerResponses.kt +++ b/src/main/kotlin/net/perfectdreams/loritta/helper/serverresponses/sparklypower/SparklyPowerResponses.kt @@ -4,6 +4,12 @@ package net.perfectdreams.loritta.helper.serverresponses.sparklypower * Class holding a list containing all Loritta Helper's automatic responses (Portuguese) */ object SparklyPowerResponses { + val sparklyNaiveBayes = SparklyNaiveBayes() + + init { + sparklyNaiveBayes.setup() + } + val responses = listOf( ServerInformationResponse(), HowToBuyPesadelosResponse(), @@ -12,6 +18,9 @@ object SparklyPowerResponses { HowToRegisterResponse(), HowToResetPasswordResponse(), HowToTransferSonhosResponse(), - HowToVoteResponse() + HowToVoteResponse(), + + HowToBuyPesadelosNaiveBayesResponse(sparklyNaiveBayes), + HowToBuyPesadelosNaiveBayesResponse(sparklyNaiveBayes) ).sortedByDescending { it.priority } } diff --git a/src/main/kotlin/net/perfectdreams/loritta/helper/utils/NaiveBayes.kt b/src/main/kotlin/net/perfectdreams/loritta/helper/utils/NaiveBayes.kt new file mode 100644 index 0000000..c8a6aab --- /dev/null +++ b/src/main/kotlin/net/perfectdreams/loritta/helper/utils/NaiveBayes.kt @@ -0,0 +1,66 @@ +package net.perfectdreams.loritta.helper.utils + +// Thanks ChatGPT +class NaiveBayes { + private val classCounts: MutableMap = HashMap() + val wordCounts: MutableMap> = HashMap() + private var totalDocuments: Int = 0 + + fun train(category: CATEGORYTYPE, documents: List) = train( + documents.map { it to category } + ) + + fun train(documents: List>) { + for ((text, label) in documents) { + classCounts[label] = classCounts.getOrDefault(label, 0) + 1 + totalDocuments++ + + val words = text.split("\\s+".toRegex()).map { it.toLowerCase() } + if (!wordCounts.containsKey(label)) { + wordCounts[label] = HashMap() + } + val labelWordCounts = wordCounts[label]!! + + for (word in words) { + labelWordCounts[word] = labelWordCounts.getOrDefault(word, 0) + 1 + } + } + } + + fun classify(text: String) = detailedClassification(text).entries.maxBy { it.value }.key + + fun detailedClassification(text: String): Map { + val words = text.split("\\s+".toRegex()).map { it.lowercase() } + val classProbabilities = mutableMapOf() + + for (label in classCounts.keys) { + val logProbability = Math.log(classCounts[label]!!.toDouble() / totalDocuments) + var totalWordCountForClass = 0 + wordCounts[label]?.values?.forEach { totalWordCountForClass += it } + + var logProbabilitySum = logProbability + for (word in words) { + val wordCount = wordCounts[label]?.getOrDefault(word, 0) ?: 0 + logProbabilitySum += java.lang.Math.log((wordCount + 1).toDouble() / (totalWordCountForClass + wordCounts.size)) + } + + classProbabilities[label] = logProbabilitySum + } + + // Convert log probabilities to normal probabilities + val maxLogProbability = classProbabilities.values.maxOrNull() ?: Double.NEGATIVE_INFINITY + var sumProbabilities = 0.0 + for (label in classProbabilities.keys) { + val probability = Math.exp(classProbabilities[label]!! - maxLogProbability) + classProbabilities[label] = probability + sumProbabilities += probability + } + + // Normalize the probabilities + for (label in classProbabilities.keys) { + classProbabilities[label] = classProbabilities[label]!! / sumProbabilities + } + + return classProbabilities + } +} \ No newline at end of file