Skip to content

Commit

Permalink
Add enforce stop tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
hupe1980 committed Sep 30, 2023
1 parent e69dfe9 commit fa647f2
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 0 deletions.
23 changes: 23 additions & 0 deletions model/llm/llm.go
Original file line number Diff line number Diff line change
@@ -1,2 +1,25 @@
// Package llm provides functionalities for working with Large Language Models (LLMs).
package llm

import (
"regexp"
"strings"
)

// EnforceStopTokens cuts off the text as soon as any stop words occur.
func EnforceStopTokens(text string, stop []string) string {
if len(stop) == 0 {
return text
}

// Create a regular expression pattern by joining stop words with "|"
pattern := strings.Join(stop, "|")

// Compile the regular expression pattern
re := regexp.MustCompile(pattern)

// Split the text using the regular expression and return the first part
parts := re.Split(text, 2)

return parts[0]
}
49 changes: 49 additions & 0 deletions model/llm/llm_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package llm

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestEnforceStopTokens(t *testing.T) {
tests := []struct {
name string
text string
stop []string
want string
}{
{
name: "NoStopWords",
text: "This is a test sentence.",
stop: []string{"stop1", "stop2"},
want: "This is a test sentence.",
},
{
name: "StopWordsPresent",
text: "Stop the text here.",
stop: []string{"stop", "text"},
want: "Stop the ",
},
{
name: "EmptyText",
text: "",
stop: []string{"stop"},
want: "",
},
{
name: "EmptyStopWords",
text: "This is a test sentence.",
stop: []string{},
want: "This is a test sentence.",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := EnforceStopTokens(tt.text, tt.stop)

assert.Equal(t, tt.want, got, "unexpected result")
})
}
}

0 comments on commit fa647f2

Please sign in to comment.