diff --git a/modules/ratings_data_cleaner/main.py b/modules/ratings_data_cleaner/main.py index 59c1fa3..f0a213b 100644 --- a/modules/ratings_data_cleaner/main.py +++ b/modules/ratings_data_cleaner/main.py @@ -122,6 +122,7 @@ def _create_quality_review_table(self, df: pd.DataFrame) -> pd.DataFrame: df["value"] = df["value"].replace(r"[^A-Za-z0-9 ]+", "", regex=True) df["value"] = df["value"].str.lower().apply(lambda x: filter_stopwords(x)) + df["value"] = df["value"].str.replace(" ", " ") df["quality_review"] = df["value"].apply(evaluate_quality_words_over_thresh) df = df[df["quality_review"] == True]