Skip to content

Commit

Permalink
feat: auto ban error rate and auto test unban
Browse files Browse the repository at this point in the history
  • Loading branch information
zijiren233 committed Dec 20, 2024
1 parent ecc44b6 commit 674a6ca
Show file tree
Hide file tree
Showing 7 changed files with 202 additions and 119 deletions.
27 changes: 9 additions & 18 deletions service/aiproxy/common/config/config.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package config

import (
"math"
"os"
"slices"
"strconv"
Expand Down Expand Up @@ -31,10 +32,8 @@ var (
var (
// 重试次数
retryTimes atomic.Int64
// 模型可重试的失败次数上限
modelFailDisableTimes atomic.Int64
// 模型禁用时间
modelFailDisableTime atomic.Int64
// 模型错误率自动封禁
modelErrorAutoBanRate = math.Float64bits(0.5)
// 模型类型超时时间,单位秒
timeoutWithModelType atomic.Value
)
Expand All @@ -43,24 +42,16 @@ func GetRetryTimes() int64 {
return retryTimes.Load()
}

func SetRetryTimes(times int64) {
retryTimes.Store(times)
}

func GetModelFailDisableTimes() int64 {
return modelFailDisableTimes.Load()
func GetModelErrorAutoBanRate() float64 {
return math.Float64frombits(atomic.LoadUint64(&modelErrorAutoBanRate))
}

func SetModelFailDisableTimes(times int64) {
modelFailDisableTimes.Store(times)
func SetModelErrorAutoBanRate(rate float64) {
atomic.StoreUint64(&modelErrorAutoBanRate, math.Float64bits(rate))
}

func GetModelFailDisableTime() int64 {
return modelFailDisableTime.Load()
}

func SetModelFailDisableTime(time int64) {
modelFailDisableTime.Store(time)
func SetRetryTimes(times int64) {
retryTimes.Store(times)
}

func init() {
Expand Down
42 changes: 40 additions & 2 deletions service/aiproxy/controller/channel-test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package controller

import (
"context"
"errors"
"fmt"
"io"
Expand All @@ -19,6 +20,7 @@ import (
"github.com/labring/sealos/service/aiproxy/common/render"
"github.com/labring/sealos/service/aiproxy/middleware"
"github.com/labring/sealos/service/aiproxy/model"
"github.com/labring/sealos/service/aiproxy/monitor"
"github.com/labring/sealos/service/aiproxy/relay/meta"
"github.com/labring/sealos/service/aiproxy/relay/utils"
log "github.com/sirupsen/logrus"
Expand Down Expand Up @@ -51,11 +53,17 @@ func testSingleModel(channel *model.Channel, modelName string) (*model.ChannelTe
meta.WithChannelTest(true),
)
bizErr := relayHelper(meta, newc)
success := bizErr == nil
var respStr string
var code int
if bizErr == nil {
if success {
respStr = w.Body.String()
code = w.Code
log.Infof("model %s(%d) test success, unban it", modelName, channel.ID)
err := monitor.ClearChannelModelErrors(context.Background(), modelName, channel.ID)
if err != nil {
log.Errorf("clear channel errors failed: %+v", err)
}
} else {
respStr = bizErr.String()
code = bizErr.StatusCode
Expand All @@ -67,7 +75,7 @@ func testSingleModel(channel *model.Channel, modelName string) (*model.ChannelTe
meta.ActualModelName,
meta.Mode,
time.Since(meta.RequestAt).Seconds(),
bizErr == nil,
success,
respStr,
code,
)
Expand Down Expand Up @@ -349,3 +357,33 @@ func TestAllChannels(c *gin.Context) {
})
}
}

func AutoTestBannedModels() {
log := log.WithFields(log.Fields{
"auto_test_banned_models": "true",
})
channels, err := monitor.GetAllBannedChannels(context.Background())
if err != nil {
log.Errorf("failed to get banned channels: %s", err.Error())
}
if len(channels) == 0 {
return
}

for modelName, ids := range channels {
for _, id := range ids {
channel, err := model.LoadChannelByID(int(id))
if err != nil {
log.Errorf("failed to get channel by model %s: %s", modelName, err.Error())
continue
}
result, err := testSingleModel(channel, modelName)
if err != nil {
log.Errorf("failed to test channel %s(%d) model %s: %s", channel.Name, channel.ID, modelName, err.Error())
}
if !result.Success {
log.Infof("model %s(%d) test failed", modelName, channel.ID)
}
}
}
}
10 changes: 10 additions & 0 deletions service/aiproxy/controller/channel.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ import (
"github.com/gin-gonic/gin"
"github.com/labring/sealos/service/aiproxy/middleware"
"github.com/labring/sealos/service/aiproxy/model"
"github.com/labring/sealos/service/aiproxy/monitor"
"github.com/labring/sealos/service/aiproxy/relay/channeltype"
log "github.com/sirupsen/logrus"
)

func ChannelTypeNames(c *gin.Context) {
Expand Down Expand Up @@ -223,6 +225,10 @@ func UpdateChannel(c *gin.Context) {
middleware.ErrorResponse(c, http.StatusOK, err.Error())
return
}
err = monitor.ClearChannelAllModelErrors(c.Request.Context(), id)
if err != nil {
log.Errorf("failed to clear channel all model errors: %+v", err)
}
middleware.SuccessResponse(c, ch)
}

Expand All @@ -243,5 +249,9 @@ func UpdateChannelStatus(c *gin.Context) {
middleware.ErrorResponse(c, http.StatusOK, err.Error())
return
}
err = monitor.ClearChannelAllModelErrors(c.Request.Context(), id)
if err != nil {
log.Errorf("failed to clear channel all model errors: %+v", err)
}
middleware.SuccessResponse(c, nil)
}
26 changes: 13 additions & 13 deletions service/aiproxy/controller/relay.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"errors"
"io"
"net/http"
"time"

"github.com/gin-gonic/gin"
"github.com/labring/sealos/service/aiproxy/common"
Expand All @@ -18,6 +17,7 @@ import (
"github.com/labring/sealos/service/aiproxy/relay/meta"
"github.com/labring/sealos/service/aiproxy/relay/model"
"github.com/labring/sealos/service/aiproxy/relay/relaymode"
log "github.com/sirupsen/logrus"
)

// https://platform.openai.com/docs/api-reference/chat
Expand All @@ -41,14 +41,22 @@ func relayHelper(meta *meta.Meta, c *gin.Context) *model.ErrorWithStatusCode {
}
}

func RelayHelper(meta *meta.Meta, c *gin.Context) *model.ErrorWithStatusCode {
err := relayHelper(meta, c)
if err := monitor.AddRequest(c.Request.Context(), meta.OriginModelName, int64(meta.Channel.ID), err != nil); err != nil {
log.Errorf("add request failed: %+v", err)
}
return err
}

func Relay(c *gin.Context) {
log := middleware.GetLogger(c)

requestModel := c.MustGet(string(ctxkey.OriginalModel)).(string)

ids, err := monitor.GetChannelsWithErrors(c.Request.Context(), requestModel, 10*time.Minute, 1)
ids, err := monitor.GetBannedChannels(c.Request.Context(), requestModel)
if err != nil {
log.Errorf("get channels with errors failed: %+v", err)
log.Errorf("get %s auto banned channels failed: %+v", requestModel, err)
}

failedChannelIDs := []int{}
Expand All @@ -69,22 +77,14 @@ func Relay(c *gin.Context) {
}

meta := middleware.NewMetaByContext(c, channel)
bizErr := relayHelper(meta, c)
bizErr := RelayHelper(meta, c)
if bizErr == nil {
err = monitor.ClearChannelErrors(c.Request.Context(), requestModel, channel.ID)
if err != nil {
log.Errorf("clear channel errors failed: %+v", err)
}
return
}
failedChannelIDs = append(failedChannelIDs, channel.ID)
requestID := c.GetString(ctxkey.RequestID)
var retryTimes int64
if shouldRetry(c, bizErr.StatusCode) {
err = monitor.AddError(c.Request.Context(), requestModel, int64(channel.ID), 10*time.Second)
if err != nil {
log.Errorf("add error failed: %+v", err)
}
retryTimes = config.GetRetryTimes()
}
for i := retryTimes; i > 0; i-- {
Expand All @@ -107,7 +107,7 @@ func Relay(c *gin.Context) {
}
c.Request.Body = io.NopCloser(bytes.NewBuffer(requestBody))
meta.Reset(newChannel)
bizErr = relayHelper(meta, c)
bizErr = RelayHelper(meta, c)
if bizErr == nil {
return
}
Expand Down
13 changes: 13 additions & 0 deletions service/aiproxy/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/labring/sealos/service/aiproxy/common"
"github.com/labring/sealos/service/aiproxy/common/balance"
"github.com/labring/sealos/service/aiproxy/common/config"
"github.com/labring/sealos/service/aiproxy/controller"
"github.com/labring/sealos/service/aiproxy/middleware"
"github.com/labring/sealos/service/aiproxy/model"
relaycontroller "github.com/labring/sealos/service/aiproxy/relay/controller"
Expand Down Expand Up @@ -137,6 +138,16 @@ func setupHTTPServer() (*http.Server, *gin.Engine) {
}, server
}

func autoTestBannedModels() {
log.Info("auto test banned models start")
ticker := time.NewTicker(time.Second * 15)
defer ticker.Stop()

for range ticker.C {
controller.AutoTestBannedModels()
}
}

func main() {
if err := initializeServices(); err != nil {
log.Fatal("failed to initialize services: " + err.Error())
Expand All @@ -163,6 +174,8 @@ func main() {
}
}()

go autoTestBannedModels()

<-ctx.Done()
log.Info("shutting down server...")
log.Info("max wait time: 120s")
Expand Down
7 changes: 7 additions & 0 deletions service/aiproxy/model/option.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ func InitOption2DB() error {
OptionMap["ApproximateTokenEnabled"] = strconv.FormatBool(config.GetApproximateTokenEnabled())
OptionMap["BillingEnabled"] = strconv.FormatBool(config.GetBillingEnabled())
OptionMap["RetryTimes"] = strconv.FormatInt(config.GetRetryTimes(), 10)
OptionMap["ModelErrorAutoBanRate"] = strconv.FormatFloat(config.GetModelErrorAutoBanRate(), 'f', -1, 64)
timeoutWithModelTypeJSON, _ := json.Marshal(config.GetTimeoutWithModelType())
OptionMap["TimeoutWithModelType"] = conv.BytesToString(timeoutWithModelTypeJSON)
OptionMap["GlobalApiRateLimitNum"] = strconv.FormatInt(config.GetGlobalAPIRateLimitNum(), 10)
Expand Down Expand Up @@ -226,6 +227,12 @@ func updateOption(key string, value string, isInit bool) (err error) {
return err
}
config.SetRetryTimes(retryTimes)
case "ModelErrorAutoBanRate":
modelErrorAutoBanRate, err := strconv.ParseFloat(value, 64)
if err != nil {
return err
}
config.SetModelErrorAutoBanRate(modelErrorAutoBanRate)
case "TimeoutWithModelType":
var newTimeoutWithModelType map[int]int64
err := json.Unmarshal(conv.StringToBytes(value), &newTimeoutWithModelType)
Expand Down
Loading

0 comments on commit 674a6ca

Please sign in to comment.