diff --git a/CHANGELOG.md b/CHANGELOG.md index 15f68a5e..d6ed560a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,20 @@ and this project adheres to [Semantic Versioning](http://semver.org/). +## 2.3.0 - 2024-12-17 + +### Features + +- Release new multilabel biomedBERT model trained on LLM (Gemini) synthetically generated NER data. The model was trained on over 7000 LLM annoted documents with a total of 295822 samples. + The model was trained for 21 epochs and achieved an F1 score of 95.6% on a held out test set. (multilabel_bert) +- added multilabel NER training example and config. +- added scaling kazu with Ray docs and example. + +### Bugfixes + +- Fix issue with TransformersModelForTokenClassificationNerStep when processing large amounts of documents. The fix offloads tensors onto cpu before performin the torch.cat operation which lead to a zero tensor before. (pytorch_memory_issue) + + ## 2.2.1 - 2024-10-21 ### Features diff --git a/docs/_changelog.d/+scaling.feature.rst b/docs/_changelog.d/+scaling.feature.rst deleted file mode 100644 index 20602a00..00000000 --- a/docs/_changelog.d/+scaling.feature.rst +++ /dev/null @@ -1 +0,0 @@ -added scaling kazu with Ray docs and example. diff --git a/docs/_changelog.d/+training.feature.rst b/docs/_changelog.d/+training.feature.rst deleted file mode 100644 index e8862b2e..00000000 --- a/docs/_changelog.d/+training.feature.rst +++ /dev/null @@ -1 +0,0 @@ -added multilabel NER training example and config. diff --git a/docs/_changelog.d/multilabel_bert.feature.rst b/docs/_changelog.d/multilabel_bert.feature.rst deleted file mode 100644 index f471e1cc..00000000 --- a/docs/_changelog.d/multilabel_bert.feature.rst +++ /dev/null @@ -1,2 +0,0 @@ -Release new multilabel biomedBERT model trained on LLM (Gemini) synthetically generated NER data. The model was trained on over 7000 LLM annoted documents with a total of 295822 samples. -The model was trained for 21 epochs and achieved an F1 score of 95.6% on a held out test set. diff --git a/docs/_changelog.d/pytorch_memory_issue.bugfix.rst b/docs/_changelog.d/pytorch_memory_issue.bugfix.rst deleted file mode 100644 index b47fec63..00000000 --- a/docs/_changelog.d/pytorch_memory_issue.bugfix.rst +++ /dev/null @@ -1 +0,0 @@ -Fix issue with TransformersModelForTokenClassificationNerStep when processing large amounts of documents. The fix offloads tensors onto cpu before performin the torch.cat operation which lead to a zero tensor before. diff --git a/kazu/__init__.py b/kazu/__init__.py index b19ee4b7..55e47090 100644 --- a/kazu/__init__.py +++ b/kazu/__init__.py @@ -1 +1 @@ -__version__ = "2.2.1" +__version__ = "2.3.0"