diff --git a/.gitignore b/.gitignore index c65a17d..1f4259e 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ __pycache__ .vscode py_readability_metrics.egg-info dist -build \ No newline at end of file +build +*.rsp \ No newline at end of file diff --git a/readability/readability.py b/readability/readability.py index 3a48d42..d6dfc19 100644 --- a/readability/readability.py +++ b/readability/readability.py @@ -15,41 +15,41 @@ def __init__(self, text, min_words=100): def ari(self): """Calculate Automated Readability Index (ARI).""" - return ARI(self._statistics, self._min_words).score() + return ARI(self._statistics, self._min_words).results() def coleman_liau(self): """Calculate Coleman Liau Index.""" - return ColemanLiau(self._statistics, self._min_words).score() + return ColemanLiau(self._statistics, self._min_words).results() def dale_chall(self): """Calculate Dale Chall.""" - return DaleChall(self._statistics, self._min_words).score() + return DaleChall(self._statistics, self._min_words).results() def flesch(self): """Calculate Flesch Reading Ease score.""" - return Flesch(self._statistics, self._min_words).score() + return Flesch(self._statistics, self._min_words).results() def flesch_kincaid(self): """Calculate Flesch-Kincaid Grade Level.""" - return FleschKincaid(self._statistics, self._min_words).score() + return FleschKincaid(self._statistics, self._min_words).results() def gunning_fog(self): """Calculate Gunning Fog score.""" - return GunningFog(self._statistics, self._min_words).score() + return GunningFog(self._statistics, self._min_words).results() def linsear_write(self): """Calculate Linsear Write.""" - return LinsearWrite(self._statistics, self._min_words).score() + return LinsearWrite(self._statistics, self._min_words).results() def smog(self,all_sentences=False, ignore_length=False): """SMOG Index. `all_sentences` indicates whether SMOG should use a sample of 30 sentences, as described in the original paper, or if it should use all sentences in the text""" return Smog(self._statistics, self._analyzer.sentences, - all_sentences=all_sentences, ignore_length=ignore_length).score() + all_sentences=all_sentences).results() def spache(self): """Spache Index.""" - return Spache(self._statistics, self._min_words).score() + return Spache(self._statistics, self._min_words).results() def statistics(self): return { diff --git a/readability/scorers/ari.py b/readability/scorers/ari.py index 0c2baa8..a6004eb 100644 --- a/readability/scorers/ari.py +++ b/readability/scorers/ari.py @@ -1,39 +1,21 @@ import math -from readability.exceptions import ReadabilityException +from readability.scorers.base_scorer import ReadabilityScorer -class Result: - def __init__(self, score, grade_levels, ages): - self.score = score - self.grade_levels = grade_levels - self.ages = ages - def __str__(self): - return "score: {}, grade_levels: {}, ages: {}". \ - format(self.score, self.grade_levels, self.ages) - - -class ARI: +class ARI(ReadabilityScorer): def __init__(self, stats, min_words=100): - self._stats = stats - if stats.num_words < min_words: - raise ReadabilityException('{} words required.'.format(min_words)) - - def score(self): - score = self._score() - return Result( - score=score, - grade_levels=self._grade_levels(score), - ages=self._ages(score)) + super().__init__(stats, min_words) + self.scorer_name = "ARI" - def _score(self): + def _raw_score(self): s = self._stats letters_per_word = s.num_letters / s.num_words words_per_sent = s.num_words / s.num_sentences return 4.71 * letters_per_word + 0.5 * words_per_sent - 21.43 - def _grade_levels(self, score): - score = math.ceil(score) + def _grade_level(self): + score = math.ceil(self._score) if score <= 1: return ['K'] elif score <= 2: @@ -63,8 +45,8 @@ def _grade_levels(self, score): else: return ['college_graduate'] - def _ages(self, score): - score = math.ceil(score) + def _age(self): + score = math.ceil(self._score) if score <= 1: return [5, 6] elif score <= 2: diff --git a/readability/scorers/base_scorer.py b/readability/scorers/base_scorer.py new file mode 100644 index 0000000..966c952 --- /dev/null +++ b/readability/scorers/base_scorer.py @@ -0,0 +1,78 @@ +from readability.exceptions import ReadabilityException +from readability.text import AnalyzerStatistics + + +class Result: + def __init__(self, scorer_name, score, grade_level, age, scale_value, cloze_score, description): + self.name = scorer_name + + self.score = score + self.grade_level = grade_level + self.age = age + self.scale_value = scale_value + self.cloze_score = cloze_score + self.description = description + + def __str__(self): + if self.name is not None: + result = "{}: ".format(self.name) + else: + result = "" + + if self.score is not None: + if isinstance(self.score, float): + result += "score = {:.2f}, ".format(self.score) + else: + result += "score = {}, ".format(self.score) + + if self.grade_level is not None: + result += "grade_level = {}, ".format(self.grade_level) + if self.age is not None: + result += "age = {}, ".format(self.age) + if self.scale_value is not None: + result += "scale_value = {:.2f}, ".format(self.scale_value) + if self.cloze_score is not None: + result += "cloze_score = {:.2f}, ".format(self.cloze_score) + if self.description is not None: + result += "{}, ".format(self.description) + + return result[:-2] + + +class ReadabilityScorer: + def __init__(self, stats: AnalyzerStatistics, min_words=100): + if stats.num_words < min_words: + raise ReadabilityException('{} words required.'.format(min_words)) + + self._stats = stats + self.scorer_name = None + + def results(self): + self._score = self._raw_score() + return Result( + scorer_name=self.scorer_name, + score=self._score, + grade_level=self._grade_level(), + age=self._age(), + scale_value=self._scale_value(), + cloze_score=self._cloze_score(), + description=self._description(), + ) + + def _raw_score(self): + return None + + def _grade_level(self): + return None + + def _age(self): + return None + + def _scale_value(self): + return None + + def _cloze_score(self): + return None + + def _description(self): + return None diff --git a/readability/scorers/coleman_liau.py b/readability/scorers/coleman_liau.py index 8283894..cf82a56 100644 --- a/readability/scorers/coleman_liau.py +++ b/readability/scorers/coleman_liau.py @@ -1,30 +1,14 @@ -from readability.exceptions import ReadabilityException +from readability.scorers.base_scorer import ReadabilityScorer -class Result: - def __init__(self, score, grade_level): - self.score = score - self.grade_level = grade_level - def __str__(self): - return "score: {}, grade_level: '{}'". \ - format(self.score, self.grade_level) - -class ColemanLiau: +class ColemanLiau(ReadabilityScorer): def __init__(self, stats, min_words=100): - self._stats = stats - if stats.num_words < min_words: - raise ReadabilityException('{} words required.'.format(min_words)) - - def score(self): - score = self._score() - return Result( - score=score, - grade_level=self._grade_level(score) - ) + super().__init__(stats, min_words) + self.scorer_name = 'Coleman-Liau' - def _score(self): + def _raw_score(self): s = self._stats scalar = s.num_words / 100 letters_per_100_words = s.num_letters / scalar @@ -32,5 +16,5 @@ def _score(self): return 0.0588 * letters_per_100_words - \ 0.296 * sentences_per_100_words - 15.8 - def _grade_level(self, score): - return str(round(score)) + def _grade_level(self): + return str(round(self._score)) diff --git a/readability/scorers/dale_chall.py b/readability/scorers/dale_chall.py index 2ca9b15..f70809d 100644 --- a/readability/scorers/dale_chall.py +++ b/readability/scorers/dale_chall.py @@ -1,29 +1,12 @@ -from readability.exceptions import ReadabilityException +from readability.scorers.base_scorer import ReadabilityScorer -class Result: - def __init__(self, score, grade_levels): - self.score = score - self.grade_levels = grade_levels - - def __str__(self): - return "score: {}, grade_levels: {}". \ - format(self.score, self.grade_levels) - - -class DaleChall: +class DaleChall(ReadabilityScorer): def __init__(self, stats, min_words=100): - self._stats = stats - if stats.num_words < min_words: - raise ReadabilityException('{} words required.'.format(min_words)) - - def score(self): - score = self._score() - return Result( - score=score, - grade_levels=self._grade_levels(score)) + super().__init__(stats, min_words) + self.scorer_name = "Dale-Chall" - def _score(self): + def _raw_score(self): stats = self._stats words_per_sent = stats.num_words / stats.num_sentences percent_difficult_words = \ @@ -34,7 +17,8 @@ def _score(self): else raw_score return adjusted_score - def _grade_levels(self, score): + def _grade_level(self): + score = self._score if score <= 4.9: return ['1', '2', '3', '4'] elif score >= 5 and score < 6: diff --git a/readability/scorers/flesch.py b/readability/scorers/flesch.py index 7d35cc0..6a83ddc 100644 --- a/readability/scorers/flesch.py +++ b/readability/scorers/flesch.py @@ -1,37 +1,19 @@ -from readability.exceptions import ReadabilityException +from readability.scorers.base_scorer import ReadabilityScorer -class Result: - def __init__(self, score, grade_levels, ease): - self.score = score - self.ease = ease - self.grade_levels = grade_levels - - def __str__(self): - return "score: {}, ease: '{}', grade_levels: {}". \ - format(self.score, self.ease, self.grade_levels) - - -class Flesch: +class Flesch(ReadabilityScorer): def __init__(self, stats, min_words=100): - self._stats = stats - if stats.num_words < min_words: - raise ReadabilityException('{} words required.'.format(min_words)) - - def score(self): - score = self._score() - return Result( - score=score, - ease=self._ease(score), - grade_levels=self._grade_levels(score)) + super().__init__(stats, min_words) + self.scorer_name = "Flesch" - def _score(self): + def _raw_score(self): stats = self._stats words_per_sent = stats.num_words / stats.num_sentences syllables_per_word = stats.num_syllables / stats.num_words return 206.835 - (1.015 * words_per_sent) - (84.6 * syllables_per_word) - def _ease(self, score): + def _description(self): + score = self._score if score >= 90 and score <= 100: return 'very_easy' elif score >= 80 and score < 90: @@ -47,7 +29,8 @@ def _ease(self, score): else: return 'very_confusing' - def _grade_levels(self, score): + def _grade_level(self): + score = self._score if score >= 90 and score <= 100: return ['5'] elif score >= 80 and score < 90: diff --git a/readability/scorers/flesch_kincaid.py b/readability/scorers/flesch_kincaid.py index 67286c8..94edd7b 100644 --- a/readability/scorers/flesch_kincaid.py +++ b/readability/scorers/flesch_kincaid.py @@ -1,33 +1,15 @@ -from readability.exceptions import ReadabilityException +from readability.scorers.base_scorer import ReadabilityScorer -class Result: - def __init__(self, score, grade_level): - self.score = score - self.grade_level = grade_level - - def __str__(self): - return "score: {}, grade_level: '{}'". \ - format(self.score, self.grade_level) - - -class FleschKincaid: +class FleschKincaid(ReadabilityScorer): def __init__(self, stats, min_words=100): - self._stats = stats - if stats.num_words < min_words: - raise ReadabilityException('{} words required.'.format(min_words)) - - def score(self): - score = self._score() - return Result( - score=score, - grade_level=self._grade_level(score) - ) + super().__init__(stats, min_words) + self.scorer_name = "Flesh-Kincaid" - def _score(self): + def _raw_score(self): stats = self._stats return (0.38 * stats.avg_words_per_sentence + 11.8 * stats.avg_syllables_per_word) - 15.59 - def _grade_level(self, score): - return str(round(score)) + def _grade_level(self): + return str(round(self._score)) diff --git a/readability/scorers/gunning_fog.py b/readability/scorers/gunning_fog.py index b3109f1..8f8037e 100644 --- a/readability/scorers/gunning_fog.py +++ b/readability/scorers/gunning_fog.py @@ -1,42 +1,24 @@ -from readability.exceptions import ReadabilityException +from readability.scorers.base_scorer import ReadabilityScorer -class Result: - def __init__(self, score, grade_level): - self.score = score - self.grade_level = grade_level - - def __str__(self): - return "score: {}, grade_level: '{}'". \ - format(self.score, self.grade_level) - - -class GunningFog: +class GunningFog(ReadabilityScorer): def __init__(self, stats, min_words=100): - self._stats = stats - if stats.num_words < min_words: - raise ReadabilityException('{} words required.'.format(min_words)) - - def score(self): - score = self._score() - return Result( - score=score, - grade_level=self._grade_level(score) - ) + super().__init__(stats, min_words) + self.scorer_name = "Gunning Fog" - def _score(self): + def _raw_score(self): s = self._stats word_per_sent = s.num_words / s.num_sentences poly_syllables_per_word = s.num_gunning_complex / s.num_words return 0.4 * (word_per_sent + 100 * poly_syllables_per_word) - def _grade_level(self, score): - rounded = round(score) + def _grade_level(self): + rounded = round(self._score) if rounded < 6: return 'na' - elif rounded >= 6 and rounded <= 12: + elif 6 <= rounded <= 12: return str(rounded) - elif rounded >= 13 and rounded <= 16: + elif 13 <= rounded <= 16: return 'college' else: return 'college_graduate' diff --git a/readability/scorers/linsear_write.py b/readability/scorers/linsear_write.py index 98da289..0dc46cf 100644 --- a/readability/scorers/linsear_write.py +++ b/readability/scorers/linsear_write.py @@ -1,30 +1,12 @@ -from readability.exceptions import ReadabilityException +from readability.scorers.base_scorer import ReadabilityScorer -class Result: - def __init__(self, score, grade_level): - self.score = score - self.grade_level = grade_level - - def __str__(self): - return "score: {}, grade_level: '{}'". \ - format(self.score, self.grade_level) - - -class LinsearWrite: +class LinsearWrite(ReadabilityScorer): def __init__(self, stats, min_words=100): - self._stats = stats - if stats.num_words < min_words: - raise ReadabilityException('{} words required.'.format(min_words)) - - def score(self): - score = self._score() - return Result( - score=score, - grade_level=self._grade_level(score) - ) + super().__init__(stats, min_words) + self.scorer_name = "Linsear Write" - def _score(self): + def _raw_score(self): s = self._stats num_easy_words = s.num_words - s.num_poly_syllable_words num_hard_words = s.num_poly_syllable_words @@ -33,5 +15,5 @@ def _score(self): return inter_score / 2 return (inter_score - 2) / 2 - def _grade_level(self, score): - return str(round(score)) + def _grade_level(self): + return str(round(self._score)) diff --git a/readability/scorers/smog.py b/readability/scorers/smog.py index ae73fd4..2e662a2 100644 --- a/readability/scorers/smog.py +++ b/readability/scorers/smog.py @@ -1,51 +1,30 @@ import math -from readability.text.analyzer import Analyzer -from readability.exceptions import ReadabilityException import warnings - -class Result: - def __init__(self, score, grade_level): - self.score = score - self.grade_level = grade_level - - def __str__(self): - return "score: {}, grade_level: {}". \ - format(self.score, self.grade_level) +from readability.text.analyzer import Analyzer +from readability.scorers.base_scorer import ReadabilityScorer -class Smog: - def __init__(self, stats, sentences, all_sentences=False, ignore_length=False): +class Smog(ReadabilityScorer): + def __init__(self, stats, sentences, all_sentences=False): """ Computes the SMOG readability score (Harry McLaughlin, 1969 https://ogg.osu.edu/media/documents/health_lit/WRRSMOG_Readability_Formula_G._Harry_McLaughlin__1969_.pdf) If all_sentences is false, computes the score as described in McLaughlin, 1969, using exactly 30 sentences If all_sentences is true, adjusts the score to use all sentences in the text """ if stats.num_sentences < 30: - if not ignore_length: - raise ReadabilityException( - 'SMOG requires 30 sentences. {} found' - .format(stats.num_sentences)) - else: - warnings.warn( - 'SMOG requires 30 sentences. {} found' - .format(stats.num_sentences)) + warnings.warn( + 'SMOG requires 30 sentences. {} found' + .format(stats.num_sentences)) + super().__init__(stats, 0) - self._stats = stats self.all_sentences = all_sentences if not self.all_sentences: self._smog_stats = self._smog_text_stats(sentences) - - def score(self): - score = self._score() - grade_level = self._grade_level(score) - return Result( - score=score, - grade_level=grade_level - ) + self.scorer_name = "SMOG" - def _score(self): + def _raw_score(self): if self.all_sentences: smog_stats = self._stats num_sentences = smog_stats.num_sentences @@ -56,8 +35,8 @@ def _score(self): num_complex_words = smog_stats.num_poly_syllable_words return 1.0430 * math.sqrt(30 * num_complex_words / num_sentences) + 3.1291 - def _grade_level(self, score): - return str(round(score)) + def _grade_level(self): + return str(round(self._score)) def _smog_text_stats(self, sentences): mid = int(math.floor(len(sentences) / 2)) diff --git a/readability/scorers/spache.py b/readability/scorers/spache.py index 18b8f78..d2a8169 100644 --- a/readability/scorers/spache.py +++ b/readability/scorers/spache.py @@ -1,29 +1,13 @@ -from readability.exceptions import ReadabilityException +from readability.scorers.base_scorer import ReadabilityScorer -class Result: - def __init__(self, score, grade_level): - self.score = score - self.grade_level = grade_level - def __str__(self): - return "score: {}, grade_level: '{}'". \ - format(self.score, self.grade_level) - - -class Spache: +class Spache(ReadabilityScorer): def __init__(self, stats, min_words=100): - self._stats = stats - if stats.num_words < min_words: - raise ReadabilityException('{} words required.'.format(min_words)) - - def score(self): - score = self._score() - return Result( - score=score, - grade_level=self._grade_level(score)) + super().__init__(stats, min_words) + self.scorer_name = "Spache" - def _score(self): + def _raw_score(self): stats = self._stats avg_sentence_len = stats.num_words / stats.num_sentences percent_difficult_words = \ @@ -31,5 +15,5 @@ def _score(self): return (0.141 * avg_sentence_len) + (0.086 * percent_difficult_words) + 0.839 - def _grade_level(self, score): - return str(round(score)) + def _grade_level(self): + return str(round(self._score)) diff --git a/readability/text/__init__.py b/readability/text/__init__.py index 263d485..516d0f4 100644 --- a/readability/text/__init__.py +++ b/readability/text/__init__.py @@ -1 +1,2 @@ from .analyzer import Analyzer +from .analyzer import AnalyzerStatistics diff --git a/test/test_readability.py b/test/test_readability.py index 46e0d1b..152e317 100644 --- a/test/test_readability.py +++ b/test/test_readability.py @@ -14,8 +14,8 @@ def test_ari(self): r = self.readability.ari() print(r) self.assertEqual(9.551245421245422, r.score) - self.assertEqual(['10'], r.grade_levels) - self.assertEqual([15, 16], r.ages) + self.assertEqual(['10'], r.grade_level) + self.assertEqual([15, 16], r.age) def test_coleman_liau(self): r = self.readability.coleman_liau() @@ -27,14 +27,14 @@ def test_dale_chall(self): r = self.readability.dale_chall() print(r) self.assertEqual(9.32399010989011, r.score) - self.assertEqual(['college'], r.grade_levels) + self.assertEqual(['college'], r.grade_level) def test_flesch(self): r = self.readability.flesch() print(r) self.assertEqual(51.039230769230784, r.score) - self.assertEqual(['10', '11', '12'], r.grade_levels) - self.assertEqual('fairly_difficult', r.ease) + self.assertEqual(['10', '11', '12'], r.grade_level) + self.assertEqual('fairly_difficult', r.description) def test_flesch_kincaid(self): r = self.readability.flesch_kincaid()