-
-
Notifications
You must be signed in to change notification settings - Fork 34
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat!(jieba): upgrade to [email protected]
Also provide customizable API for Jieba and TfIdf
- Loading branch information
1 parent
ec14da0
commit 1fb5877
Showing
49 changed files
with
619,831 additions
and
913 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,39 +1,43 @@ | ||
import test from 'ava' | ||
|
||
import { cut, tag, extract, loadTFIDFDict, loadDict } from '../index' | ||
import { Jieba, TfIdf } from '../index.js' | ||
import { dict, idf } from '../dict.js' | ||
|
||
const sentence = '我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,走上人生巅峰。' | ||
|
||
const jieba = Jieba.withDict(dict) | ||
const tfIdf = TfIdf.withDict(idf) | ||
|
||
test('cut result should be equal to nodejieba', (t) => { | ||
t.snapshot(cut(sentence)) | ||
t.snapshot(jieba.cut(sentence)) | ||
}) | ||
|
||
test('tag result shoule be equal to nodejieba', (t) => { | ||
t.snapshot(tag(sentence)) | ||
t.snapshot(jieba.tag(sentence)) | ||
}) | ||
|
||
test('extract should be equal to nodejieba', (t) => { | ||
const sentence = | ||
'今天纽约的天气真好啊,京华大酒店的张尧经理吃了一只北京烤鸭。后天纽约的天气不好,昨天纽约的天气也不好,北京烤鸭真好吃' | ||
const topn = 3 | ||
t.snapshot( | ||
extract(sentence, topn).map((t) => ({ | ||
tfIdf.extractKeywords(jieba, sentence, topn).map((t) => ({ | ||
keyword: t.keyword, | ||
weight: typeof t.weight, | ||
})), | ||
) | ||
}) | ||
|
||
test.skip('should be able to load custom TFID dict', (t) => { | ||
test('should be able to load custom TFID dict', (t) => { | ||
const userdict = Buffer.from('专业 20.19') | ||
loadTFIDFDict(userdict) | ||
const tfIdf = TfIdf.withDict(userdict) | ||
const fixture = '我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。' | ||
t.snapshot(extract(fixture, 3)) | ||
t.snapshot(tfIdf.extractKeywords(jieba, fixture, 3)) | ||
}) | ||
|
||
test.skip('should be able to load custom dict', (t) => { | ||
test('should be able to load custom dict', (t) => { | ||
const userdict = Buffer.from('出了 10000') | ||
loadDict(userdict) | ||
const jieba = Jieba.withDict(userdict) | ||
const fixture = '我们中出了一个叛徒' | ||
t.notThrows(() => cut(fixture)) | ||
t.notThrows(() => jieba.cut(fixture)) | ||
}) |
Oops, something went wrong.