From 044ce52fe91c5b06c153c610576db0b3d27a704d Mon Sep 17 00:00:00 2001 From: Koichi Akabe Date: Thu, 13 Apr 2023 10:21:45 +0900 Subject: [PATCH] Update example in documentation (#9) --- docs/source/examples.rst | 17 +++++++++++++++-- src/lib.rs | 2 +- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/docs/source/examples.rst b/docs/source/examples.rst index e9cfc68..21aad6a 100644 --- a/docs/source/examples.rst +++ b/docs/source/examples.rst @@ -11,7 +11,7 @@ You can check the version number as shown below to use compatible models: >>> import vibrato >>> vibrato.VIBRATO_VERSION - '0.3.3' + '0.5.0' Examples: @@ -20,7 +20,7 @@ Examples: >>> import vibrato >>> with open('path/to/system.dic', 'rb') as fp: - >>> dict_data = fp.read() + ... dict_data = fp.read() >>> tokenizer = vibrato.Vibrato(dict_data) >>> tokens = tokenizer.tokenize('社長は火星猫だ') @@ -46,3 +46,16 @@ Examples: >>> tokens[0].end() 2 + +The distributed models are compressed in zstd format. If you want to load these compressed models, +you must decompress them outside the API: + +.. code-block:: python + + >>> import vibrato + >>> import zstandard # zstandard package in PyPI + + >>> dctx = zstandard.ZstdDecompressor() + >>> with open('path/to/system.dic.zst', 'rb') as fp: + ... dict_reader = dctx.stream_reader(fp) + >>> tokenizer = vibrato.Vibrato(dict_reader.read()) diff --git a/src/lib.rs b/src/lib.rs index b957ca1..fbfd459 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -152,7 +152,7 @@ pub struct TokenizerWrapper { /// Examples: /// >>> import vibrato /// >>> with open('path/to/system.dic', 'rb') as fp: -/// >>> dict_data = fp.read() +/// ... dict_data = fp.read() /// >>> tokenizer = vibrato.Vibrato(dict_data) /// >>> tokens = tokenizer.tokenize('社長は火星猫だ') /// >>> len(tokens)