Skip to content

Commit

Permalink
compare output between test tokenizer and the builtin simple tokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
hideaki-t committed Feb 15, 2024
1 parent 10d53f7 commit ee95e40
Showing 1 changed file with 16 additions and 1 deletion.
17 changes: 16 additions & 1 deletion tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,10 +283,25 @@ def test_tokenizer_output(c, tokenizer_module):
(t, expect[-1][2] + 1, expect[-1][2] + 1 + len(t.encode("utf-8")), i)
)
expect = expect[1:]
a = c.execute(
"SELECT token, start, end, position FROM tok1 WHERE input=?", [s]
).fetchall()
for a, e in zip(
c.execute(
"SELECT token, start, end, position " "FROM tok1 WHERE input=?", [s]
"SELECT token, start, end, position FROM tok1 WHERE input=?", [s]
),
expect,
):
assert e == a

c.execute("CREATE VIRTUAL TABLE tok2 USING fts3tokenize()")
s = '"binding" OR "あいうえお"'
for a, e in zip(
c.execute(
"SELECT token, start, end, position FROM tok1 WHERE input=?", [s]
),
c.execute(
"SELECT token, start, end, position FROM tok2 WHERE input=?", [s]
),
):
assert a == e

0 comments on commit ee95e40

Please sign in to comment.