From 275e16e1a647b196592a7118a731b2a5dedf56a7 Mon Sep 17 00:00:00 2001 From: 0xZensh Date: Fri, 1 Sep 2023 14:15:14 +0800 Subject: [PATCH] Improve tokens estimating --- src/content/content.go | 21 +++++++++---- src/content/content_test.go | 59 +++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 6 deletions(-) diff --git a/src/content/content.go b/src/content/content.go index 900a78a..d667ff2 100644 --- a/src/content/content.go +++ b/src/content/content.go @@ -1,8 +1,10 @@ package content import ( + "bytes" "encoding/json" "errors" + "strconv" "github.com/fxamacker/cbor/v2" "github.com/jaevor/go-nanoid" @@ -187,6 +189,8 @@ func (a *DocumentNodeAmender) AmendNode(node *DocumentNode) { } } +// EstimateTranslatingString estimates the translating string from content. +// It is not right string for translating, just for estimating translating tokens. func EstimateTranslatingString(content *util.Bytes) (string, error) { if content == nil { return "", gear.ErrInternalServerError.WithMsg("empty content") @@ -197,13 +201,18 @@ func EstimateTranslatingString(content *util.Bytes) (string, error) { return "", gear.ErrInternalServerError.From(err) } contents := doc.ToTEContents() + buf := bytes.Buffer{} + en := json.NewEncoder(&buf) for i := range contents { - contents[i].ID = "" + if texts := contents[i].Texts; len(texts) > 0 { + if err := en.Encode([]string{strconv.Itoa(i)}); err != nil { + return "", gear.ErrInternalServerError.From(err) + } + if err := en.Encode(texts); err != nil { + return "", gear.ErrInternalServerError.From(err) + } + } } - teTokens, err := json.Marshal(contents) - if err != nil { - return "", gear.ErrInternalServerError.From(err) - } - return string(teTokens), nil + return string(buf.String()), nil } diff --git a/src/content/content_test.go b/src/content/content_test.go index 02b2a5d..5417daa 100644 --- a/src/content/content_test.go +++ b/src/content/content_test.go @@ -146,3 +146,62 @@ func TestDocumentNodeAmender(t *testing.T) { fmt.Println(string(data)) assert.Nil(err) } + +func TestEstimateTranslatingString(t *testing.T) { + assert := assert.New(t) + obj := DocumentNode{ + Type: "doc", + Content: []DocumentNode{ + { + Type: "heading", + Attrs: map[string]AttrValue{ + "id": String("abcdef"), + "level": Int64(1), + }, + Content: []DocumentNode{ + { + Type: "text", + Text: util.Ptr("Hello"), + }, + }, + }, + { + Type: "heading", + Attrs: map[string]AttrValue{ + "id": String("123456"), + "level": Int64(1), + }, + Content: []DocumentNode{ + { + Type: "text", + Text: util.Ptr("world"), + }, + }, + }, + { + Type: "paragraph", + Content: []DocumentNode{ + { + Type: "text", + Text: util.Ptr("some text"), + }, + }, + }, + }, + } + + te := obj.ToTEContents() + require.Equal(t, 4, len(te)) + assert.Equal("abcdef", te[0].ID) + assert.Equal("------", te[1].ID) + assert.Equal("123456", te[2].ID) + assert.Equal("------", te[3].ID) + + data, err := cbor.Marshal(&obj) + require.Nil(t, err) + + str, err := EstimateTranslatingString(util.Ptr(util.Bytes(data))) + require.Nil(t, err) + fmt.Println(str) + assert.Equal("[\"0\"]\n[\"Hello\"]\n[\"2\"]\n[\"world\"]\n[\"4\"]\n[\"some text\"]\n", str) +}