From 8f4e3739982b26c0c9ba1bcceb24835b1d7de6c0 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Mon, 9 Dec 2024 15:49:59 +0100 Subject: [PATCH] :rotating_light: Code linting --- docs/tutorials/08_MelusineRegex.md | 4 - melusine/processors.py | 1 + tests/detectors/test_thanks_detector.py | 5 +- .../detectors/test_vacation_reply_detector.py | 6 +- tests/functional/test_emails_fixtures.py | 122 ++++++++++---- tests/gmail/test_gmail.py | 3 +- .../processors/test_content_refined_tagger.py | 149 ++++++++++++++---- tests/processors/test_processors.py | 44 ++++-- 8 files changed, 244 insertions(+), 90 deletions(-) diff --git a/docs/tutorials/08_MelusineRegex.md b/docs/tutorials/08_MelusineRegex.md index 46a5105..e974b79 100644 --- a/docs/tutorials/08_MelusineRegex.md +++ b/docs/tutorials/08_MelusineRegex.md @@ -17,7 +17,6 @@ from melusine.base import MelusineRegex class AnnoyingEmailsRegex(MelusineRegex): - @property def positive(self) -> Union[str, Dict[str, str]]: return dict( @@ -65,7 +64,6 @@ from melusine.base import MelusineRegex class AnnoyingEmailsRegex(MelusineRegex): - @property def positive(self) -> Union[str, Dict[str, str]]: return dict( @@ -192,7 +190,6 @@ from melusine.base import MelusineRegex class AnnoyingEmailsRegex(MelusineRegex): - @property def positive(self) -> Union[str, Dict[str, str]]: return dict( @@ -237,7 +234,6 @@ That is were neutral regex can be of use. Whenever a neutral regex is matched, i ```python class IfritAlertRegex(MelusineRegex): - @property def positive(self) -> Union[str, Dict[str, str]]: return dict( diff --git a/melusine/processors.py b/melusine/processors.py index 0e989f4..06d54bd 100644 --- a/melusine/processors.py +++ b/melusine/processors.py @@ -1556,6 +1556,7 @@ class RefinedTagger(MelusineTransformer): """ Post-processing class to refine initial tags. """ + def __init__( self, input_columns: str = "messages", diff --git a/tests/detectors/test_thanks_detector.py b/tests/detectors/test_thanks_detector.py index da50073..fb6d16f 100644 --- a/tests/detectors/test_thanks_detector.py +++ b/tests/detectors/test_thanks_detector.py @@ -111,10 +111,7 @@ def test_thanks_detector_missing_field(thanks_detector_df): ], False, "Merci\nMerci a vous", - [ - {"base_text": "Merci", "base_tag": "THANKS"}, - {"base_text": "Merci a vous", "base_tag": "THANKS"} - ], + [{"base_text": "Merci", "base_tag": "THANKS"}, {"base_text": "Merci a vous", "base_tag": "THANKS"}], ), ], ) diff --git a/tests/detectors/test_vacation_reply_detector.py b/tests/detectors/test_vacation_reply_detector.py index 7594522..998d264 100644 --- a/tests/detectors/test_vacation_reply_detector.py +++ b/tests/detectors/test_vacation_reply_detector.py @@ -35,7 +35,7 @@ def test_instanciation(): {"base_tag": "HELLO", "base_text": "Bonjour,"}, { "base_tag": "BODY", - "base_text": "je vous confirme l'annulation du rdv du 01/01/2022 à 16h." + "base_text": "je vous confirme l'annulation du rdv du 01/01/2022 à 16h.", }, {"base_tag": "GREETINGS", "base_text": "Bien cordialement, John Smith."}, ], @@ -58,7 +58,7 @@ def test_instanciation(): {"base_tag": "HELLO", "base_text": "Bonjour,"}, { "base_tag": "BODY", - "base_text": "Actuellement en conge je prendrai connaissance de votre message ulterieurement." + "base_text": "Actuellement en conge je prendrai connaissance de votre message ulterieurement.", }, {"base_tag": "GREETINGS", "base_text": "Cordialement, "}, ], @@ -101,7 +101,7 @@ def test_transform(df, good_result): {"base_tag": "HELLO", "base_text": "Bonjour,"}, { "base_tag": "BODY", - "base_text": "Actuellement en conge je prendrai connaissance de votre message ulterieurement." + "base_text": "Actuellement en conge je prendrai connaissance de votre message ulterieurement.", }, {"base_tag": "GREETINGS", "base_text": "Cordialement, "}, ], diff --git a/tests/functional/test_emails_fixtures.py b/tests/functional/test_emails_fixtures.py index 934a730..0fd662d 100644 --- a/tests/functional/test_emails_fixtures.py +++ b/tests/functional/test_emails_fixtures.py @@ -129,7 +129,11 @@ [ {"base_text": "Bonjour,", "base_tag": "HELLO", "base_tag_list": ["HELLO"]}, {"base_text": "Vous trouverez ci-joint l'attestation", "base_tag": "BODY", "base_tag_list": ["BODY"]}, - {"base_text": "Merci de me confirmer la bonne réception de ce message.", "base_tag": "BODY", "base_tag_list": ["BODY"]}, + { + "base_text": "Merci de me confirmer la bonne réception de ce message.", + "base_tag": "BODY", + "base_tag_list": ["BODY"], + }, {"base_text": "Vous en remerciant par avance.", "base_tag": "THANKS", "base_tag_list": ["THANKS"]}, {"base_text": "Cordialement,", "base_tag": "GREETINGS", "base_tag_list": ["GREETINGS"]}, {"base_text": "Jean Dupont", "base_tag": "BODY", "base_tag_list": ["BODY"]}, @@ -137,7 +141,11 @@ [ {"base_text": "Bonjour,", "base_tag": "HELLO", "base_tag_list": ["HELLO"]}, {"base_text": "Veuillez trouver ci-jointe la lettre", "base_tag": "BODY", "base_tag_list": ["BODY"]}, - {"base_text": "La visualisation des fichiers PDF nécessite Adobe Reader.", "base_tag": "FOOTER", "base_tag_list": ["FOOTER"]}, + { + "base_text": "La visualisation des fichiers PDF nécessite Adobe Reader.", + "base_tag": "FOOTER", + "base_tag_list": ["FOOTER"], + }, {"base_text": "Sentiments mutualistes.", "base_tag": "GREETINGS", "base_tag_list": ["GREETINGS"]}, {"base_text": "La MAIF", "base_tag": "BODY", "base_tag_list": ["BODY"]}, ], @@ -147,27 +155,63 @@ "messages.tags": [ [ {"base_text": "Bonjour,", "base_tag": "HELLO", "base_tag_list": ["HELLO"], "refined_tag": "HELLO"}, - {"base_text": "Vous trouverez ci-joint l'attestation", "base_tag": "BODY", "base_tag_list": ["BODY"], - "refined_tag": "BODY"}, - {"base_text": "Merci de me confirmer la bonne réception de ce message.", "base_tag": "BODY", - "base_tag_list": ["BODY"], "refined_tag": "BODY"}, - {"base_text": "Vous en remerciant par avance.", "base_tag": "THANKS", "base_tag_list": ["THANKS"], - "refined_tag": "THANKS"}, - {"base_text": "Cordialement,", "base_tag": "GREETINGS", "base_tag_list": ["GREETINGS"], - "refined_tag": "GREETINGS"}, - {"base_text": "Jean Dupont", "base_tag": "BODY", "base_tag_list": ["BODY"], - "refined_tag": "SIGNATURE_NAME"}, + { + "base_text": "Vous trouverez ci-joint l'attestation", + "base_tag": "BODY", + "base_tag_list": ["BODY"], + "refined_tag": "BODY", + }, + { + "base_text": "Merci de me confirmer la bonne réception de ce message.", + "base_tag": "BODY", + "base_tag_list": ["BODY"], + "refined_tag": "BODY", + }, + { + "base_text": "Vous en remerciant par avance.", + "base_tag": "THANKS", + "base_tag_list": ["THANKS"], + "refined_tag": "THANKS", + }, + { + "base_text": "Cordialement,", + "base_tag": "GREETINGS", + "base_tag_list": ["GREETINGS"], + "refined_tag": "GREETINGS", + }, + { + "base_text": "Jean Dupont", + "base_tag": "BODY", + "base_tag_list": ["BODY"], + "refined_tag": "SIGNATURE_NAME", + }, ], [ {"base_text": "Bonjour,", "base_tag": "HELLO", "base_tag_list": ["HELLO"], "refined_tag": "HELLO"}, - {"base_text": "Veuillez trouver ci-jointe la lettre", "base_tag": "BODY", "base_tag_list": ["BODY"], - "refined_tag": "BODY"}, - {"base_text": "La visualisation des fichiers PDF nécessite Adobe Reader.", "base_tag": "FOOTER", - "base_tag_list": ["FOOTER"], "refined_tag": "FOOTER"}, - {"base_text": "Sentiments mutualistes.", "base_tag": "GREETINGS", "base_tag_list": ["GREETINGS"], - "refined_tag": "GREETINGS"}, - {"base_text": "La MAIF", "base_tag": "BODY", "base_tag_list": ["BODY"], - "refined_tag": "SIGNATURE_NAME"}, + { + "base_text": "Veuillez trouver ci-jointe la lettre", + "base_tag": "BODY", + "base_tag_list": ["BODY"], + "refined_tag": "BODY", + }, + { + "base_text": "La visualisation des fichiers PDF nécessite Adobe Reader.", + "base_tag": "FOOTER", + "base_tag_list": ["FOOTER"], + "refined_tag": "FOOTER", + }, + { + "base_text": "Sentiments mutualistes.", + "base_tag": "GREETINGS", + "base_tag_list": ["GREETINGS"], + "refined_tag": "GREETINGS", + }, + { + "base_text": "La MAIF", + "base_tag": "BODY", + "base_tag_list": ["BODY"], + "refined_tag": "SIGNATURE_NAME", + }, ], ], }, @@ -202,7 +246,11 @@ "messages.tags": [ [ {"base_text": "Bonjour", "base_tag": "HELLO", "base_tag_list": ["HELLO"]}, - {"base_text": "Pouvez-vous me transmettre deux attestations au nom de mes enfants", "base_tag": "BODY", "base_tag_list": ["BODY"]}, + { + "base_text": "Pouvez-vous me transmettre deux attestations au nom de mes enfants", + "base_tag": "BODY", + "base_tag_list": ["BODY"], + }, {"base_text": "- Jane Dupond", "base_tag": "BODY", "base_tag_list": ["BODY"]}, {"base_text": "- Joe Dupond", "base_tag": "BODY", "base_tag_list": ["BODY"]}, {"base_text": "Merci par avance", "base_tag": "THANKS", "base_tag_list": ["THANKS"]}, @@ -215,16 +263,32 @@ "messages.tags": [ [ {"base_text": "Bonjour", "base_tag": "HELLO", "base_tag_list": ["HELLO"], "refined_tag": "HELLO"}, - {"base_text": "Pouvez-vous me transmettre deux attestations au nom de mes enfants", "base_tag": "BODY", - "base_tag_list": ["BODY"], "refined_tag": "BODY"}, + { + "base_text": "Pouvez-vous me transmettre deux attestations au nom de mes enfants", + "base_tag": "BODY", + "base_tag_list": ["BODY"], + "refined_tag": "BODY", + }, {"base_text": "- Jane Dupond", "base_tag": "BODY", "base_tag_list": ["BODY"], "refined_tag": "BODY"}, {"base_text": "- Joe Dupond", "base_tag": "BODY", "base_tag_list": ["BODY"], "refined_tag": "BODY"}, - {"base_text": "Merci par avance", "base_tag": "THANKS", "base_tag_list": ["THANKS"], - "refined_tag": "THANKS"}, - {"base_text": "Cordialement", "base_tag": "GREETINGS", "base_tag_list": ["GREETINGS"], - "refined_tag": "GREETINGS"}, - {"base_text": "Mr Jean Dupond", "base_tag": "BODY", "base_tag_list": ["BODY"], - "refined_tag": "SIGNATURE_NAME"}, + { + "base_text": "Merci par avance", + "base_tag": "THANKS", + "base_tag_list": ["THANKS"], + "refined_tag": "THANKS", + }, + { + "base_text": "Cordialement", + "base_tag": "GREETINGS", + "base_tag_list": ["GREETINGS"], + "refined_tag": "GREETINGS", + }, + { + "base_text": "Mr Jean Dupond", + "base_tag": "BODY", + "base_tag_list": ["BODY"], + "refined_tag": "SIGNATURE_NAME", + }, ] ], }, diff --git a/tests/gmail/test_gmail.py b/tests/gmail/test_gmail.py index 05d23e5..f4498db 100644 --- a/tests/gmail/test_gmail.py +++ b/tests/gmail/test_gmail.py @@ -3,11 +3,10 @@ import pytest import pandas as pd -from unittest.mock import MagicMock, patch - HttpRequestMock = pytest.importorskip('googleapiclient.http.HttpRequestMock') from google.oauth2.credentials import Credentials +from unittest.mock import MagicMock, patch from melusine.connectors.gmail import GmailConnector diff --git a/tests/processors/test_content_refined_tagger.py b/tests/processors/test_content_refined_tagger.py index 0091417..5fe58a4 100644 --- a/tests/processors/test_content_refined_tagger.py +++ b/tests/processors/test_content_refined_tagger.py @@ -3,14 +3,14 @@ import pytest from melusine.message import Message -from melusine.processors import BaseContentTagger, ContentTagger, Tag, RefinedTagger +from melusine.processors import BaseContentTagger, ContentTagger, RefinedTagger, Tag def test_content_tagger(): # Text segments (= individual messages in an email conversation) text_segments = [ "Envoye de mon iphone", - ("Bonjour Mme X,\nSuite a blh blah blah\nBien cordialement\nJane Dupond\n(See attached file: flex.jpg)"), + "Bonjour Mme X,\nSuite a blh blah blah\nBien cordialement\nJane Dupond\n(See attached file: flex.jpg)", ( "Bonjour,\nVeuillez trouver ci-joint blah\n" "Merci d'avance,\nCordialement,\n" @@ -35,8 +35,10 @@ def test_content_tagger(): {"base_text": "Veuillez trouver ci-joint blah", "base_tag": "BODY"}, {"base_text": "Merci d'avance,", "base_tag": "THANKS"}, {"base_text": "Cordialement,", "base_tag": "GREETINGS"}, - {"base_text": "Toute modification, edition, utilisation ou diffusion non autorisee est interdite", - "base_tag": "FOOTER"}, + { + "base_text": "Toute modification, edition, utilisation ou diffusion non autorisee est interdite", + "base_tag": "FOOTER", + }, ], ] @@ -125,12 +127,18 @@ def test_content_tagger_split_text(text, expected_parts): ), [ {"base_text": "Bonjour,", "base_tag": "HELLO", "refined_tag": "HELLO"}, - {"base_text": "Suite a notre intervention du 16.02.22 , un taux d'humidité de 50% a été relevé.", - "base_tag": "BODY", "refined_tag": "BODY"}, + { + "base_text": "Suite a notre intervention du 16.02.22 , un taux d'humidité de 50% a été relevé.", + "base_tag": "BODY", + "refined_tag": "BODY", + }, {"base_text": "Cordialement.", "base_tag": "GREETINGS", "refined_tag": "GREETINGS"}, {"base_text": "177, rue de la fée - 75000 Paris.", "base_tag": "SIGNATURE", "refined_tag": "SIGNATURE"}, - {"base_text": "Horaires : du lundi au jeudi de 08h00 à 16h30 et le vendredi de 08h00 à 16h00.", - "base_tag": "BODY", "refined_tag": "BODY"}, + { + "base_text": "Horaires : du lundi au jeudi de 08h00 à 16h30 et le vendredi de 08h00 à 16h00.", + "base_tag": "BODY", + "refined_tag": "BODY", + }, {"base_text": "Tel : 01.45.53.11.33", "base_tag": "SIGNATURE", "refined_tag": "SIGNATURE"}, ], ), @@ -143,7 +151,11 @@ def test_content_tagger_split_text(text, expected_parts): ), [ {"base_text": "bonjour", "base_tag": "HELLO", "refined_tag": "HELLO"}, - {"base_text": "15 jours après les premières réparations, un défaut a été détecté.", "base_tag": "BODY", "refined_tag": "BODY"}, + { + "base_text": "15 jours après les premières réparations, un défaut a été détecté.", + "base_tag": "BODY", + "refined_tag": "BODY", + }, {"base_text": "Bien à vous", "base_tag": "GREETINGS", "refined_tag": "GREETINGS"}, {"base_text": "Britney Spears", "base_tag": "BODY", "refined_tag": "SIGNATURE_NAME"}, ], @@ -173,7 +185,9 @@ def test_content_tagger_split_text(text, expected_parts): [ { "base_text": "Merci de me faire suivre les docs à ma nouvelle adresse qui est 0 rue du parc, 75000 Paris.", - "base_tag": "BODY", "refined_tag": "BODY"}, + "base_tag": "BODY", + "refined_tag": "BODY", + }, {"base_text": "Merci d'avance.", "base_tag": "THANKS", "refined_tag": "THANKS"}, {"base_text": "Acceptez notre salutation,", "base_tag": "GREETINGS", "refined_tag": "GREETINGS"}, ], @@ -194,7 +208,11 @@ def test_content_tagger_split_text(text, expected_parts): ), [ {"base_text": "Bonjour", "base_tag": "HELLO", "refined_tag": "HELLO"}, - {"base_text": "Je vous relance concernant ma télévision avec le devis en PJ.", "base_tag": "BODY", "refined_tag": "BODY"}, + { + "base_text": "Je vous relance concernant ma télévision avec le devis en PJ.", + "base_tag": "BODY", + "refined_tag": "BODY", + }, {"base_text": "Désolé pour la qualité.", "base_tag": "BODY", "refined_tag": "BODY"}, {"base_text": "Je l'ai envoyé à partir de mon ordi.", "base_tag": "BODY", "refined_tag": "BODY"}, {"base_text": "Excellente journée à vous,", "base_tag": "HELLO", "refined_tag": "HELLO"}, @@ -220,7 +238,11 @@ def test_content_tagger_split_text(text, expected_parts): ( "Impeccable, je vous remercie beaucoup pour votre rapidité.\nObtenir\nOutlook pour Android", [ - {"base_text": "Impeccable, je vous remercie beaucoup pour votre rapidité.", "base_tag": "THANKS", "refined_tag": "THANKS"}, + { + "base_text": "Impeccable, je vous remercie beaucoup pour votre rapidité.", + "base_tag": "THANKS", + "refined_tag": "THANKS", + }, {"base_text": "Obtenir", "base_tag": "FOOTER", "refined_tag": "FOOTER"}, {"base_text": "Outlook pour Android", "base_tag": "FOOTER", "refined_tag": "FOOTER"}, ], @@ -232,7 +254,11 @@ def test_content_tagger_split_text(text, expected_parts): ), [ {"base_text": "Cher Monsieur,", "base_tag": "HELLO", "refined_tag": "HELLO"}, - {"base_text": "Je vous confirme la bonne réception de votre précédent email.", "base_tag": "BODY", "refined_tag": "BODY"}, + { + "base_text": "Je vous confirme la bonne réception de votre précédent email.", + "base_tag": "BODY", + "refined_tag": "BODY", + }, {"base_text": "Je vous en remercie.", "base_tag": "THANKS", "refined_tag": "THANKS"}, {"base_text": "Bien cordialement,", "base_tag": "GREETINGS", "refined_tag": "GREETINGS"}, {"base_text": "John Smith", "base_tag": "BODY", "refined_tag": "SIGNATURE_NAME"}, @@ -250,10 +276,16 @@ def test_content_tagger_split_text(text, expected_parts): {"base_text": "URGENT URGENT", "base_tag": "BODY", "refined_tag": "BODY"}, { "base_text": "Merci de me faire suivre les docs à ma nouvelle adresse qui est 0 rue du parc, 75000 Paris.", - "base_tag": "BODY", "refined_tag": "BODY"}, + "base_tag": "BODY", + "refined_tag": "BODY", + }, {"base_text": "Merci d'avance.", "base_tag": "THANKS", "refined_tag": "THANKS"}, {"base_text": "Recevez nos salutations,", "base_tag": "GREETINGS", "refined_tag": "GREETINGS"}, - {"base_text": "Vous en souhaitant bonne réception", "base_tag": "GREETINGS", "refined_tag": "GREETINGS"}, + { + "base_text": "Vous en souhaitant bonne réception", + "base_tag": "GREETINGS", + "refined_tag": "GREETINGS", + }, ], ), pytest.param( @@ -303,7 +335,11 @@ def test_content_tagger_split_text(text, expected_parts): ( "\nBonjour Monsieur Stanislas von den hoeggenboord\n\nbien à toi\nJ. Smith\nChargé de clientèle", [ - {"base_text": "Bonjour Monsieur Stanislas von den hoeggenboord", "base_tag": "HELLO", "refined_tag": "HELLO"}, + { + "base_text": "Bonjour Monsieur Stanislas von den hoeggenboord", + "base_tag": "HELLO", + "refined_tag": "HELLO", + }, {"base_text": "bien à toi", "base_tag": "GREETINGS", "refined_tag": "GREETINGS"}, {"base_text": "J. Smith", "base_tag": "BODY", "refined_tag": "SIGNATURE_NAME"}, {"base_text": "Chargé de clientèle", "base_tag": "SIGNATURE", "refined_tag": "SIGNATURE"}, @@ -323,7 +359,11 @@ def test_content_tagger_split_text(text, expected_parts): {"base_text": "5bis rue Patrick Sebastien", "base_tag": "SIGNATURE", "refined_tag": "SIGNATURE"}, {"base_text": "6-8 cours mirabeau", "base_tag": "SIGNATURE", "refined_tag": "SIGNATURE"}, {"base_text": "7 ter place du dahu", "base_tag": "SIGNATURE", "refined_tag": "SIGNATURE"}, - {"base_text": "8 de la rue très longue qui ne doit pas être taggée signature", "base_tag": "BODY", "refined_tag": "BODY"}, + { + "base_text": "8 de la rue très longue qui ne doit pas être taggée signature", + "base_tag": "BODY", + "refined_tag": "BODY", + }, ], ), ( @@ -339,7 +379,9 @@ def test_content_tagger_split_text(text, expected_parts): {"base_text": "Bonjour,", "base_tag": "HELLO", "refined_tag": "HELLO"}, { "base_text": "Je vous informe que je vais accepter la proposition de L , à savoir le paiement d'une indemnité forfaitaire de résiliation du CCMI de 4000 € TTC pour clore cette affaire.", - "base_tag": "BODY", "refined_tag": "BODY"}, + "base_tag": "BODY", + "refined_tag": "BODY", + }, {"base_text": "Cordialement.", "base_tag": "GREETINGS", "refined_tag": "GREETINGS"}, {"base_text": "Bob Smith", "base_tag": "BODY", "refined_tag": "SIGNATURE_NAME"}, ], @@ -359,13 +401,23 @@ def test_content_tagger_split_text(text, expected_parts): ), [ {"base_text": "Monsieur Bob Smith", "base_tag": "HELLO", "refined_tag": "HELLO"}, - {"base_text": "Adresse mail : BobSmith90@gmail.com", "base_tag": "SIGNATURE", "refined_tag": "SIGNATURE"}, + { + "base_text": "Adresse mail : BobSmith90@gmail.com", + "base_tag": "SIGNATURE", + "refined_tag": "SIGNATURE", + }, {"base_text": "Lucy Ange", "base_tag": "BODY", "refined_tag": "SIGNATURE_NAME"}, {"base_text": "Bonjour Monsieur,", "base_tag": "HELLO", "refined_tag": "HELLO"}, { "base_text": "Suite à notre entretien téléphonique de ce matin, et au message que vous m'avez envoyé sur ma messagerie, je voudrais effectuer la réparation du véhicule Renault Twingo dans un garage partenaire de la Maif situé, si c'est possible.", - "base_tag": "BODY", "refined_tag": "BODY"}, - {"base_text": "Dans l'attente de votre réponse et en vous remerciant par avance,", "base_tag": "BODY", "refined_tag": "BODY"}, + "base_tag": "BODY", + "refined_tag": "BODY", + }, + { + "base_text": "Dans l'attente de votre réponse et en vous remerciant par avance,", + "base_tag": "BODY", + "refined_tag": "BODY", + }, {"base_text": "Monsieur Bob Smith", "base_tag": "HELLO", "refined_tag": "HELLO"}, {"base_text": "Envoyé à partir de", "base_tag": "FOOTER", "refined_tag": "FOOTER"}, {"base_text": "Courrier", "base_tag": "FOOTER", "refined_tag": "FOOTER"}, @@ -395,12 +447,16 @@ def test_content_tagger_split_text(text, expected_parts): {"base_text": "J’espère que vous allez bien.", "base_tag": "BODY", "refined_tag": "BODY"}, { "base_text": "Pour faire suite à mon mail du 21 février 2023, je me permets de revenir vers vous pour avoir votre avis sur le devis que j’ai demandé auprès d’un enquêteur.", - "base_tag": "BODY", "refined_tag": "BODY"}, + "base_tag": "BODY", + "refined_tag": "BODY", + }, {"base_text": "Voici son retour :", "base_tag": "BODY", "refined_tag": "BODY"}, {"base_text": "Qu’en pensez-vous svp ?", "base_tag": "BODY", "refined_tag": "BODY"}, { "base_text": "Je reste à votre disposition pour tout complément d’information et vous remercie de l’intérêt que vous porterez à ma demande,", - "base_tag": "BODY", "refined_tag": "BODY"}, + "base_tag": "BODY", + "refined_tag": "BODY", + }, {"base_text": "Bien Cordialement,", "base_tag": "GREETINGS", "refined_tag": "GREETINGS"}, {"base_text": "Bob Smith", "base_tag": "BODY", "refined_tag": "SIGNATURE_NAME"}, {"base_text": "Tél.", "base_tag": "SIGNATURE", "refined_tag": "SIGNATURE"}, @@ -417,12 +473,26 @@ def test_content_tagger_split_text(text, expected_parts): {"base_text": "cordialement", "base_tag": "GREETINGS", "refined_tag": "GREETINGS"}, {"base_text": "Contact e-mail", "base_tag": "SIGNATURE", "refined_tag": "SIGNATURE"}, {"base_text": "Contact téléphone", "base_tag": "SIGNATURE", "refined_tag": "SIGNATURE"}, - {"base_text": "01 23 45 67 89 / abcabc@hotmail.fr", "base_tag": "SIGNATURE", "refined_tag": "SIGNATURE"}, - {"base_text": "Torroella de Montgri, le 5 avril 2023", "base_tag": "SIGNATURE", "refined_tag": "SIGNATURE"}, + { + "base_text": "01 23 45 67 89 / abcabc@hotmail.fr", + "base_tag": "SIGNATURE", + "refined_tag": "SIGNATURE", + }, + { + "base_text": "Torroella de Montgri, le 5 avril 2023", + "base_tag": "SIGNATURE", + "refined_tag": "SIGNATURE", + }, { "base_text": "Les formats de fichiers acceptés sont : PDF, DOC, DOCX, JPEG, JPG, TIFF, TXT, ODT, XLS, XLSX", - "base_tag": "FOOTER", "refined_tag": "FOOTER"}, - {"base_text": "Tout autre format de fichiers ne sera pas transmis au dossier", "base_tag": "FOOTER", "refined_tag": "FOOTER"}, + "base_tag": "FOOTER", + "refined_tag": "FOOTER", + }, + { + "base_text": "Tout autre format de fichiers ne sera pas transmis au dossier", + "base_tag": "FOOTER", + "refined_tag": "FOOTER", + }, ], id="diverse_signature_patterns", ), @@ -439,12 +509,24 @@ def test_content_tagger_split_text(text, expected_parts): {"base_text": "J. Smith", "base_tag": "BODY", "refined_tag": "SIGNATURE_NAME"}, {"base_text": "01 23 45 67 89", "base_tag": "SIGNATURE", "refined_tag": "SIGNATURE"}, {"base_text": "Secrétaire en charge des avions", "base_tag": "SIGNATURE", "refined_tag": "SIGNATURE"}, - {"base_text": "Business Analyst – Tribu Sinistres – Squad Flux Entrants", "base_tag": "SIGNATURE", "refined_tag": "SIGNATURE"}, - {"base_text": "Société nationale des chemins de fer", "base_tag": "SIGNATURE", "refined_tag": "SIGNATURE"}, + { + "base_text": "Business Analyst – Tribu Sinistres – Squad Flux Entrants", + "base_tag": "SIGNATURE", + "refined_tag": "SIGNATURE", + }, + { + "base_text": "Société nationale des chemins de fer", + "base_tag": "SIGNATURE", + "refined_tag": "SIGNATURE", + }, {"base_text": "Conseiller MAIF", "base_tag": "SIGNATURE", "refined_tag": "SIGNATURE"}, {"base_text": "Gestionnaire sinistre - C99G", "base_tag": "SIGNATURE", "refined_tag": "SIGNATURE"}, {"base_text": "Service des lettres anonymes", "base_tag": "SIGNATURE", "refined_tag": "SIGNATURE"}, - {"base_text": "Technicienne de gestion - EQUIPE ABC", "base_tag": "SIGNATURE", "refined_tag": "SIGNATURE"}, + { + "base_text": "Technicienne de gestion - EQUIPE ABC", + "base_tag": "SIGNATURE", + "refined_tag": "SIGNATURE", + }, ], id="signature_jobs", ), @@ -457,8 +539,11 @@ def test_content_tagger_split_text(text, expected_parts): {"base_text": "bonjour", "base_tag": "HELLO", "refined_tag": "HELLO"}, {"base_text": "mon body", "base_tag": "BODY", "refined_tag": "BODY"}, {"base_text": "Cordialement", "base_tag": "GREETINGS", "refined_tag": "GREETINGS"}, - {"base_text": "analyste -------------------------------------- test test test test test test test", - "base_tag": "BODY", "refined_tag": "BODY"}, + { + "base_text": "analyste -------------------------------------- test test test test test test test", + "base_tag": "BODY", + "refined_tag": "BODY", + }, ], id="check_catastrophic_backtracking", ), diff --git a/tests/processors/test_processors.py b/tests/processors/test_processors.py index 03cac2a..00c1ca0 100644 --- a/tests/processors/test_processors.py +++ b/tests/processors/test_processors.py @@ -184,21 +184,33 @@ def test_text_extractor_error(): def test_text_extractor_multiple_messages(): """Unit test""" message_list = [ - Message(meta="", text="", tags=[ - {"base_text": "A", "base_tag": "BODY"}, - {"base_text": "G", "base_tag": "GREETINGS"}, - {"base_text": "A", "base_tag": "BODY"}, - ]), - Message(meta="", text="", tags=[ - {"base_text": "B", "base_tag": "BODY"}, - {"base_text": "B", "base_tag": "BODY"}, - {"base_text": "B", "base_tag": "BODY"}, - ]), - Message(meta="", text="", tags=[ - {"base_text": "G", "base_tag": "GREETINGS"}, - {"base_text": "C", "base_tag": "BODY"}, - {"base_text": "C", "base_tag": "BODY"}, - ]), + Message( + meta="", + text="", + tags=[ + {"base_text": "A", "base_tag": "BODY"}, + {"base_text": "G", "base_tag": "GREETINGS"}, + {"base_text": "A", "base_tag": "BODY"}, + ], + ), + Message( + meta="", + text="", + tags=[ + {"base_text": "B", "base_tag": "BODY"}, + {"base_text": "B", "base_tag": "BODY"}, + {"base_text": "B", "base_tag": "BODY"}, + ], + ), + Message( + meta="", + text="", + tags=[ + {"base_text": "G", "base_tag": "GREETINGS"}, + {"base_text": "C", "base_tag": "BODY"}, + {"base_text": "C", "base_tag": "BODY"}, + ], + ), ] expected_output = "A\nB\nB\nB" @@ -368,7 +380,7 @@ def test_date_processor(date_str: str, expected_iso_format: str) -> None: text="Envoyé depuis mon Iphone", tags=[ {"base_text": "Envoyé depuis mon Iphone", "base_tag": "FOOTER"}, - ] + ], ), Message( meta="De: test.test@test.fr \n"