From 472523ec0386fcfd1e71fbe076dfbfda041c5d22 Mon Sep 17 00:00:00 2001 From: Kazu Version Bump Date: Tue, 15 Aug 2023 20:49:46 +0000 Subject: [PATCH] =?UTF-8?q?Bump=20version:=201.0.2=20=E2=86=92=201.0.3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 31 +++++++++++++++++++ docs/_changelog.d/+deadcode.removal.rst | 2 -- docs/_changelog.d/+docformatter.doc.rst | 2 -- .../+improvespacytokenization.feature.rst | 2 -- docs/_changelog.d/+jsontypehint.removal.rst | 2 -- .../+spanfinderreturn.feature.rst | 1 - docs/_changelog.d/+strictmypy.feature.rst | 1 - docs/_changelog.d/+tfidfthashing.bugfix.rst | 2 -- kazu/__init__.py | 2 +- 9 files changed, 32 insertions(+), 13 deletions(-) delete mode 100644 docs/_changelog.d/+deadcode.removal.rst delete mode 100644 docs/_changelog.d/+docformatter.doc.rst delete mode 100644 docs/_changelog.d/+improvespacytokenization.feature.rst delete mode 100644 docs/_changelog.d/+jsontypehint.removal.rst delete mode 100644 docs/_changelog.d/+spanfinderreturn.feature.rst delete mode 100644 docs/_changelog.d/+strictmypy.feature.rst delete mode 100644 docs/_changelog.d/+tfidfthashing.bugfix.rst diff --git a/CHANGELOG.md b/CHANGELOG.md index dfb14f463..e0a38e438 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,37 @@ and this project adheres to [Semantic Versioning](http://semver.org/). +## 1.0.3 - 2023-08-15 + + +### Features + +- Improved spacy tokenization for the ExplosionStringMatchingStep. + Previously, this caused us to miss entities that ended with a single-letter uppercase token at the end (like 'Haemophilia A') if it was at the end of a sentence. +- Make SpanFinder return found spans directly, rather than having to access `.closed_spans` after calling, which is easier. Note that `.closed_spans` remains, so this is backwards-compatible. +- Turned on 'strict' mypy checking (with some exceptions as to the exact flags used), and fixed issues that this raised. + + +### Bugfixes + +- Fix incorrect caching behaviour of Index TfidfVectorizer builds. + This meant they got rebuilt every time, which meant in turn that the cache and therefore the model pack size grew after use. + + +### Improved Documentation + +- Started using docformatter to automatically format docstrings, and tweak minor issues this brought up. + This will help us comply with PEP257 and be consistent across the codebase. + + +### Deprecations and Removals + +- Removed various pieces of dead code. + These are very unlikely to have been used by end users, so not deprecating/doing a major version bump. +- Rename Type Alias JsonDictType to JsonEncodable - which is more straightforward/correct what it actually means. + This was used internally to Kazu rather than being expected to be used by end users, so no deprecation/major version bump. + + ## 1.0.2 - 2023-08-07 diff --git a/docs/_changelog.d/+deadcode.removal.rst b/docs/_changelog.d/+deadcode.removal.rst deleted file mode 100644 index 9b98a06c0..000000000 --- a/docs/_changelog.d/+deadcode.removal.rst +++ /dev/null @@ -1,2 +0,0 @@ -Removed various pieces of dead code. -These are very unlikely to have been used by end users, so not deprecating/doing a major version bump. diff --git a/docs/_changelog.d/+docformatter.doc.rst b/docs/_changelog.d/+docformatter.doc.rst deleted file mode 100644 index 981ac579c..000000000 --- a/docs/_changelog.d/+docformatter.doc.rst +++ /dev/null @@ -1,2 +0,0 @@ -Started using docformatter to automatically format docstrings, and tweak minor issues this brought up. -This will help us comply with PEP257 and be consistent across the codebase. diff --git a/docs/_changelog.d/+improvespacytokenization.feature.rst b/docs/_changelog.d/+improvespacytokenization.feature.rst deleted file mode 100644 index 82f3c155a..000000000 --- a/docs/_changelog.d/+improvespacytokenization.feature.rst +++ /dev/null @@ -1,2 +0,0 @@ -Improved spacy tokenization for the ExplosionStringMatchingStep. -Previously, this caused us to miss entities that ended with a single-letter uppercase token at the end (like 'Haemophilia A') if it was at the end of a sentence. diff --git a/docs/_changelog.d/+jsontypehint.removal.rst b/docs/_changelog.d/+jsontypehint.removal.rst deleted file mode 100644 index 624642df5..000000000 --- a/docs/_changelog.d/+jsontypehint.removal.rst +++ /dev/null @@ -1,2 +0,0 @@ -Rename Type Alias JsonDictType to JsonEncodable - which is more straightforward/correct what it actually means. -This was used internally to Kazu rather than being expected to be used by end users, so no deprecation/major version bump. diff --git a/docs/_changelog.d/+spanfinderreturn.feature.rst b/docs/_changelog.d/+spanfinderreturn.feature.rst deleted file mode 100644 index 2181f1f78..000000000 --- a/docs/_changelog.d/+spanfinderreturn.feature.rst +++ /dev/null @@ -1 +0,0 @@ -Make SpanFinder return found spans directly, rather than having to access `.closed_spans` after calling, which is easier. Note that `.closed_spans` remains, so this is backwards-compatible. diff --git a/docs/_changelog.d/+strictmypy.feature.rst b/docs/_changelog.d/+strictmypy.feature.rst deleted file mode 100644 index 075cc4231..000000000 --- a/docs/_changelog.d/+strictmypy.feature.rst +++ /dev/null @@ -1 +0,0 @@ -Turned on 'strict' mypy checking (with some exceptions as to the exact flags used), and fixed issues that this raised. diff --git a/docs/_changelog.d/+tfidfthashing.bugfix.rst b/docs/_changelog.d/+tfidfthashing.bugfix.rst deleted file mode 100644 index bb1194168..000000000 --- a/docs/_changelog.d/+tfidfthashing.bugfix.rst +++ /dev/null @@ -1,2 +0,0 @@ -Fix incorrect caching behaviour of Index TfidfVectorizer builds. -This meant they got rebuilt every time, which meant in turn that the cache and therefore the model pack size grew after use. diff --git a/kazu/__init__.py b/kazu/__init__.py index 7863915fa..976498ab9 100644 --- a/kazu/__init__.py +++ b/kazu/__init__.py @@ -1 +1 @@ -__version__ = "1.0.2" +__version__ = "1.0.3"