Skip to content

Commit

Permalink
Improve tokens estimating
Browse files Browse the repository at this point in the history
  • Loading branch information
zensh committed Sep 1, 2023
1 parent 4d450ed commit 275e16e
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 6 deletions.
21 changes: 15 additions & 6 deletions src/content/content.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package content

import (
"bytes"
"encoding/json"
"errors"
"strconv"

"github.com/fxamacker/cbor/v2"
"github.com/jaevor/go-nanoid"
Expand Down Expand Up @@ -187,6 +189,8 @@ func (a *DocumentNodeAmender) AmendNode(node *DocumentNode) {
}
}

// EstimateTranslatingString estimates the translating string from content.
// It is not right string for translating, just for estimating translating tokens.
func EstimateTranslatingString(content *util.Bytes) (string, error) {
if content == nil {
return "", gear.ErrInternalServerError.WithMsg("empty content")
Expand All @@ -197,13 +201,18 @@ func EstimateTranslatingString(content *util.Bytes) (string, error) {
return "", gear.ErrInternalServerError.From(err)
}
contents := doc.ToTEContents()
buf := bytes.Buffer{}
en := json.NewEncoder(&buf)
for i := range contents {
contents[i].ID = ""
if texts := contents[i].Texts; len(texts) > 0 {
if err := en.Encode([]string{strconv.Itoa(i)}); err != nil {
return "", gear.ErrInternalServerError.From(err)
}
if err := en.Encode(texts); err != nil {
return "", gear.ErrInternalServerError.From(err)
}
}
}

teTokens, err := json.Marshal(contents)
if err != nil {
return "", gear.ErrInternalServerError.From(err)
}
return string(teTokens), nil
return string(buf.String()), nil
}
59 changes: 59 additions & 0 deletions src/content/content_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,3 +146,62 @@ func TestDocumentNodeAmender(t *testing.T) {
fmt.Println(string(data))
assert.Nil(err)
}

func TestEstimateTranslatingString(t *testing.T) {
assert := assert.New(t)
obj := DocumentNode{
Type: "doc",
Content: []DocumentNode{
{
Type: "heading",
Attrs: map[string]AttrValue{
"id": String("abcdef"),
"level": Int64(1),
},
Content: []DocumentNode{
{
Type: "text",
Text: util.Ptr("Hello"),
},
},
},
{
Type: "heading",
Attrs: map[string]AttrValue{
"id": String("123456"),
"level": Int64(1),
},
Content: []DocumentNode{
{
Type: "text",
Text: util.Ptr("world"),
},
},
},
{
Type: "paragraph",
Content: []DocumentNode{
{
Type: "text",
Text: util.Ptr("some text"),
},
},
},
},
}

te := obj.ToTEContents()
require.Equal(t, 4, len(te))
assert.Equal("abcdef", te[0].ID)
assert.Equal("------", te[1].ID)
assert.Equal("123456", te[2].ID)
assert.Equal("------", te[3].ID)

data, err := cbor.Marshal(&obj)
require.Nil(t, err)

str, err := EstimateTranslatingString(util.Ptr(util.Bytes(data)))
require.Nil(t, err)
fmt.Println(str)
assert.Equal("[\"0\"]\n[\"Hello\"]\n[\"2\"]\n[\"world\"]\n[\"4\"]\n[\"some text\"]\n", str)
}

0 comments on commit 275e16e

Please sign in to comment.