diff --git a/README.md b/README.md index a2ca0a7..735bce3 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ ![Travis Build](https://travis-ci.org/cdimascio/py-readability-metrics.svg?branch=master) ![Python](https://img.shields.io/badge/python-3.x-blue.svg) [![Documentation Status](https://readthedocs.org/projects/py-readability-metrics/badge/?version=latest)](https://py-readability-metrics.readthedocs.io/en/latest/?badge=latest) [![wheel](https://img.shields.io/badge/wheel-yes-ff00c9.svg)](https://pypi.org/project/py-readability-metrics/) [![](https://img.shields.io/gitter/room/cdimascio-oss/community?color=%23eb205a)](https://gitter.im/cdimascio-oss/community) [![All Contributors](https://img.shields.io/badge/all_contributors-1-orange.svg?style=flat-square)](#contributors-) [![MIT license](https://img.shields.io/badge/License-MIT-green.svg)](https://lbesson.mit-license.org/) -Score the _readability_ of text using popular readability formulas and metrics including: [Flesch Kincaid Grade Level](#flesch-kincaid-grade-level), [Flesch Reading Ease](#flesch-reading-ease), [Gunning Fog Index](#gunning-fog), [Dale Chall Readability](#dale-chall-readability), [Automated Readability Index (ARI)](#automated-readability-index-ari), [Coleman Liau Index](#coleman-liau-index), [Linsear Write](#linsear-write), [SMOG](#smog), and [SPACHE](#spache). 📗 +Score the _readability_ of text using popular readability formulas and metrics including: [Flesch Kincaid Grade Level](#flesch-kincaid-grade-level), [Flesch Reading Ease](#flesch-reading-ease), [Gunning Fog Index](#gunning-fog), [Dale Chall Readability](#dale-chall-readability), [Automated Readability Index (ARI)](#automated-readability-index-ari), [Coleman Liau Index](#coleman-liau-index), [Linsear Write](#linsear-write), [SMOG](#smog), [SPACHE](#spache) and [Lix](#lix). 📗 [![GitHub stars](https://img.shields.io/github/stars/cdimascio/py-readability-metrics.svg?style=social&label=Star&maxAge=2592000)](https://GitHub.com/cdimascio/py-readability-metrics/stargazers/) [![Twitter URL](https://img.shields.io/twitter/url/https/github.com/cdimascio/py-readability-metrics.svg?style=social)](https://twitter.com/intent/tweet?text=Check%20out%20py-readability-metrics%20by%20%40CarmineDiMascio%20https%3A%2F%2Fgithub.com%2Fcdimascio%2Fpy-readability-metrics%20%F0%9F%91%8D) @@ -34,6 +34,7 @@ r.ari() r.linsear_write() r.smog() r.spache() +r.lix() ``` **\*Note:** `text` must contain >= 100 words\* @@ -49,6 +50,7 @@ r.spache() - [SMOG](#smog) - [Spache](#spache) - [Linsear Write](#linsear-write) +- [Lix](#lix) ## Readability Metric Details and Properties @@ -240,6 +242,24 @@ print(lw.score) print(lw.grade_level) ``` +### Lix + +Lix (abbreviation of Swedish läsbarhetsindex, "readability index") is a readability measure for Scandinavian and West European languages developed by Carl-Hugo Björnsson. It is defined as the sum of average sentence length and the percentage of words with more than six letters. + +**_call:_** + +```python +r.lix() +``` + +**_example:_** + +```python +s = r.lix() +print(s.score) +print(s.ease) +``` + ## [Contributing](CONTRIBUTING.md) Contributions are welcome! diff --git a/docs/source/.Rhistory b/docs/source/.Rhistory new file mode 100644 index 0000000..e69de29 diff --git a/docs/source/lix.rst b/docs/source/lix.rst new file mode 100644 index 0000000..e375adc --- /dev/null +++ b/docs/source/lix.rst @@ -0,0 +1,25 @@ +Läsbarhetsindex +=============== + +About +^^^^^ + +Readability index for Swedish and other European Languages. [reference]_ + +Usage +^^^^^ + +.. code-block:: python + + r = Readability(text) + + f = r.lix() + + print(f.score) + print(f.ease) + + +References +---------- + +.. [reference] `Lix (readability test) `_ diff --git a/readability/readability.py b/readability/readability.py index 3a48d42..790ae65 100644 --- a/readability/readability.py +++ b/readability/readability.py @@ -1,7 +1,6 @@ from .text import Analyzer from .scorers import ARI, ColemanLiau, DaleChall, Flesch, \ - FleschKincaid, GunningFog, LinsearWrite, Smog, Spache -import warnings + FleschKincaid, GunningFog, LinsearWrite, Smog, Spache, Lix class Readability: def __init__(self, text, min_words=100): @@ -50,11 +49,16 @@ def smog(self,all_sentences=False, ignore_length=False): def spache(self): """Spache Index.""" return Spache(self._statistics, self._min_words).score() + + def lix(self): + """Läsbarhetsindex.""" + return Lix(self._statistics, self._min_words).score() def statistics(self): return { 'num_letters': self._statistics.num_letters, 'num_words': self._statistics.num_words, + 'num_long_words': self._statistics.num_long_words, 'num_sentences': self._statistics.num_sentences, 'num_polysyllabic_words': self._statistics.num_poly_syllable_words, 'avg_words_per_sentence': self._statistics.avg_words_per_sentence, diff --git a/readability/scorers/__init__.py b/readability/scorers/__init__.py index df708e8..a18c8a1 100644 --- a/readability/scorers/__init__.py +++ b/readability/scorers/__init__.py @@ -1,5 +1,6 @@ from .flesch import Flesch +from .lix import Lix from .flesch_kincaid import FleschKincaid from .gunning_fog import GunningFog from .coleman_liau import ColemanLiau diff --git a/readability/scorers/lix.py b/readability/scorers/lix.py new file mode 100644 index 0000000..b8e981c --- /dev/null +++ b/readability/scorers/lix.py @@ -0,0 +1,42 @@ +from readability.exceptions import ReadabilityException + + +class Result: + def __init__(self, score, ease): + self.score = score + self.ease = ease + + def __str__(self): + return "score: {}, ease: '{}'". \ + format(self.score, self.ease) + + +class Lix: + def __init__(self, stats, min_words=100): + self._stats = stats + if stats.num_words < min_words: + raise ReadabilityException('{} words required.'.format(min_words)) + + def score(self): + score = self._score() + return Result( + score=score, + ease=self._ease(score)) + + def _score(self): + stats = self._stats + words_per_sent = stats.num_words / stats.num_sentences + percentage_long_words = stats.num_long_words / stats.num_words * 100 + return words_per_sent + percentage_long_words + + def _ease(self, score): + if score > 60: + return 'very_difficult' + elif score > 50 and score <= 60: + return 'difficult' + elif score > 40 and score <= 50: + return 'medium difficulty' + elif score > 30 and score <= 40: + return 'easy reading' + else: + return 'very easy' diff --git a/readability/text/analyzer.py b/readability/text/analyzer.py index dce409e..c989864 100644 --- a/readability/text/analyzer.py +++ b/readability/text/analyzer.py @@ -25,6 +25,10 @@ def num_letters(self): def num_words(self): return self.stats['num_words'] + @property + def num_long_words(self): + return self.stats['num_long_words'] + @property def num_sentences(self): return self.stats['num_sentences'] @@ -71,6 +75,7 @@ def _statistics(self, text): syllable_count = 0 poly_syllable_count = 0 word_count = 0 + long_word_count = 0 letters_count = 0 gunning_complex_count = 0 dale_chall_complex_count = 0 @@ -92,11 +97,14 @@ def is_spache_complex(t): for t in tokens: + num_word_letters = 0 if not self._is_punctuation(t): word_count += 1 word_syllable_count = count_syllables(t) syllable_count += word_syllable_count letters_count += len(t) + word_num_letters = len(t) + long_word_count += 1 if word_num_letters > 6 else 0 poly_syllable_count += 1 if word_syllable_count >= 3 else 0 gunning_complex_count += \ 1 if is_gunning_complex(t, word_syllable_count) \ @@ -113,6 +121,7 @@ def is_spache_complex(t): 'num_syllables': syllable_count, 'num_poly_syllable_words': poly_syllable_count, 'num_words': word_count, + 'num_long_words': long_word_count, 'num_sentences': sentence_count, 'num_letters': letters_count, 'num_gunning_complex': gunning_complex_count, diff --git a/test/test_readability.py b/test/test_readability.py index 46e0d1b..aa6ae18 100644 --- a/test/test_readability.py +++ b/test/test_readability.py @@ -36,6 +36,16 @@ def test_flesch(self): self.assertEqual(['10', '11', '12'], r.grade_levels) self.assertEqual('fairly_difficult', r.ease) + def test_lix(self): + text = """Läsbarhetsindex (LIX) kan användas för att få uppfattning om hur lätt eller svår en text är att läsa. LIX är baserat på medeltalet ord per mening och andelen långa ord (ord med fler än 6 bokstäver) uttryckt i procent. Det finns flera olika läsbarhetsindex, men i Sverige är LIX det mest använda. LIX utvecklades på 1960-talet av pedagogikforskaren Carl-Hugo Björnsson. + Läsbarhetsindex (LIX) kan användas för att få uppfattning om hur lätt eller svår en text är att läsa. LIX är baserat på medeltalet ord per mening och andelen långa ord (ord med fler än 6 bokstäver) uttryckt i procent. Det finns flera olika läsbarhetsindex, men i Sverige är LIX det mest använda. LIX utvecklades på 1960-talet av pedagogikforskaren Carl-Hugo Björnsson. + """ + readability = Readability(text) + r = readability.lix() + print(r) + self.assertEqual(41.47950819672131, r.score) + self.assertEqual('medium difficulty', r.ease) + def test_flesch_kincaid(self): r = self.readability.flesch_kincaid() print(r) @@ -90,3 +100,7 @@ def test_print_stats(self): self.assertEqual(117, stats['num_words']) self.assertEqual(7, stats['num_sentences']) self.assertEqual(20, stats['num_polysyllabic_words']) + + + +