-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
72 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,25 @@ | ||
// Package llm provides functionalities for working with Large Language Models (LLMs). | ||
package llm | ||
|
||
import ( | ||
"regexp" | ||
"strings" | ||
) | ||
|
||
// EnforceStopTokens cuts off the text as soon as any stop words occur. | ||
func EnforceStopTokens(text string, stop []string) string { | ||
if len(stop) == 0 { | ||
return text | ||
} | ||
|
||
// Create a regular expression pattern by joining stop words with "|" | ||
pattern := strings.Join(stop, "|") | ||
|
||
// Compile the regular expression pattern | ||
re := regexp.MustCompile(pattern) | ||
|
||
// Split the text using the regular expression and return the first part | ||
parts := re.Split(text, 2) | ||
|
||
return parts[0] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
package llm | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
func TestEnforceStopTokens(t *testing.T) { | ||
tests := []struct { | ||
name string | ||
text string | ||
stop []string | ||
want string | ||
}{ | ||
{ | ||
name: "NoStopWords", | ||
text: "This is a test sentence.", | ||
stop: []string{"stop1", "stop2"}, | ||
want: "This is a test sentence.", | ||
}, | ||
{ | ||
name: "StopWordsPresent", | ||
text: "Stop the text here.", | ||
stop: []string{"stop", "text"}, | ||
want: "Stop the ", | ||
}, | ||
{ | ||
name: "EmptyText", | ||
text: "", | ||
stop: []string{"stop"}, | ||
want: "", | ||
}, | ||
{ | ||
name: "EmptyStopWords", | ||
text: "This is a test sentence.", | ||
stop: []string{}, | ||
want: "This is a test sentence.", | ||
}, | ||
} | ||
|
||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
got := EnforceStopTokens(tt.text, tt.stop) | ||
|
||
assert.Equal(t, tt.want, got, "unexpected result") | ||
}) | ||
} | ||
} |