relay.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637
  1. package controller
  2. import (
  3. "errors"
  4. "fmt"
  5. "io"
  6. "log"
  7. "net/http"
  8. "strings"
  9. "time"
  10. "github.com/QuantumNous/new-api/common"
  11. "github.com/QuantumNous/new-api/constant"
  12. "github.com/QuantumNous/new-api/dto"
  13. "github.com/QuantumNous/new-api/logger"
  14. "github.com/QuantumNous/new-api/middleware"
  15. "github.com/QuantumNous/new-api/model"
  16. "github.com/QuantumNous/new-api/relay"
  17. relaycommon "github.com/QuantumNous/new-api/relay/common"
  18. relayconstant "github.com/QuantumNous/new-api/relay/constant"
  19. "github.com/QuantumNous/new-api/relay/helper"
  20. "github.com/QuantumNous/new-api/service"
  21. "github.com/QuantumNous/new-api/setting"
  22. "github.com/QuantumNous/new-api/setting/operation_setting"
  23. "github.com/QuantumNous/new-api/types"
  24. "github.com/bytedance/gopkg/util/gopool"
  25. "github.com/gin-gonic/gin"
  26. "github.com/gorilla/websocket"
  27. )
  28. func relayHandler(c *gin.Context, info *relaycommon.RelayInfo) *types.NewAPIError {
  29. var err *types.NewAPIError
  30. switch info.RelayMode {
  31. case relayconstant.RelayModeImagesGenerations, relayconstant.RelayModeImagesEdits:
  32. err = relay.ImageHelper(c, info)
  33. case relayconstant.RelayModeAudioSpeech:
  34. fallthrough
  35. case relayconstant.RelayModeAudioTranslation:
  36. fallthrough
  37. case relayconstant.RelayModeAudioTranscription:
  38. err = relay.AudioHelper(c, info)
  39. case relayconstant.RelayModeRerank:
  40. err = relay.RerankHelper(c, info)
  41. case relayconstant.RelayModeEmbeddings:
  42. err = relay.EmbeddingHelper(c, info)
  43. case relayconstant.RelayModeResponses, relayconstant.RelayModeResponsesCompact:
  44. err = relay.ResponsesHelper(c, info)
  45. default:
  46. err = relay.TextHelper(c, info)
  47. }
  48. return err
  49. }
  50. func geminiRelayHandler(c *gin.Context, info *relaycommon.RelayInfo) *types.NewAPIError {
  51. var err *types.NewAPIError
  52. if strings.Contains(c.Request.URL.Path, "embed") {
  53. err = relay.GeminiEmbeddingHandler(c, info)
  54. } else {
  55. err = relay.GeminiHelper(c, info)
  56. }
  57. return err
  58. }
  59. func Relay(c *gin.Context, relayFormat types.RelayFormat) {
  60. requestId := c.GetString(common.RequestIdKey)
  61. //group := common.GetContextKeyString(c, constant.ContextKeyUsingGroup)
  62. //originalModel := common.GetContextKeyString(c, constant.ContextKeyOriginalModel)
  63. var (
  64. newAPIError *types.NewAPIError
  65. ws *websocket.Conn
  66. )
  67. if relayFormat == types.RelayFormatOpenAIRealtime {
  68. var err error
  69. ws, err = upgrader.Upgrade(c.Writer, c.Request, nil)
  70. if err != nil {
  71. helper.WssError(c, ws, types.NewError(err, types.ErrorCodeGetChannelFailed, types.ErrOptionWithSkipRetry()).ToOpenAIError())
  72. return
  73. }
  74. defer ws.Close()
  75. }
  76. defer func() {
  77. if newAPIError != nil {
  78. logger.LogError(c, fmt.Sprintf("relay error: %s", newAPIError.Error()))
  79. newAPIError.SetMessage(common.MessageWithRequestId(newAPIError.Error(), requestId))
  80. switch relayFormat {
  81. case types.RelayFormatOpenAIRealtime:
  82. helper.WssError(c, ws, newAPIError.ToOpenAIError())
  83. case types.RelayFormatClaude:
  84. c.JSON(newAPIError.StatusCode, gin.H{
  85. "type": "error",
  86. "error": newAPIError.ToClaudeError(),
  87. })
  88. default:
  89. c.JSON(newAPIError.StatusCode, gin.H{
  90. "error": newAPIError.ToOpenAIError(),
  91. })
  92. }
  93. }
  94. }()
  95. request, err := helper.GetAndValidateRequest(c, relayFormat)
  96. if err != nil {
  97. // Map "request body too large" to 413 so clients can handle it correctly
  98. if common.IsRequestBodyTooLargeError(err) || errors.Is(err, common.ErrRequestBodyTooLarge) {
  99. newAPIError = types.NewErrorWithStatusCode(err, types.ErrorCodeReadRequestBodyFailed, http.StatusRequestEntityTooLarge, types.ErrOptionWithSkipRetry())
  100. } else {
  101. newAPIError = types.NewError(err, types.ErrorCodeInvalidRequest)
  102. }
  103. return
  104. }
  105. relayInfo, err := relaycommon.GenRelayInfo(c, relayFormat, request, ws)
  106. if err != nil {
  107. newAPIError = types.NewError(err, types.ErrorCodeGenRelayInfoFailed)
  108. return
  109. }
  110. needSensitiveCheck := setting.ShouldCheckPromptSensitive()
  111. needCountToken := constant.CountToken
  112. // Avoid building huge CombineText (strings.Join) when token counting and sensitive check are both disabled.
  113. var meta *types.TokenCountMeta
  114. if needSensitiveCheck || needCountToken {
  115. meta = request.GetTokenCountMeta()
  116. } else {
  117. meta = fastTokenCountMetaForPricing(request)
  118. }
  119. if needSensitiveCheck && meta != nil {
  120. contains, words := service.CheckSensitiveText(meta.CombineText)
  121. if contains {
  122. logger.LogWarn(c, fmt.Sprintf("user sensitive words detected: %s", strings.Join(words, ", ")))
  123. newAPIError = types.NewError(err, types.ErrorCodeSensitiveWordsDetected)
  124. return
  125. }
  126. }
  127. tokens, err := service.EstimateRequestToken(c, meta, relayInfo)
  128. if err != nil {
  129. newAPIError = types.NewError(err, types.ErrorCodeCountTokenFailed)
  130. return
  131. }
  132. relayInfo.SetEstimatePromptTokens(tokens)
  133. priceData, err := helper.ModelPriceHelper(c, relayInfo, tokens, meta)
  134. if err != nil {
  135. newAPIError = types.NewError(err, types.ErrorCodeModelPriceError)
  136. return
  137. }
  138. // common.SetContextKey(c, constant.ContextKeyTokenCountMeta, meta)
  139. if priceData.FreeModel {
  140. logger.LogInfo(c, fmt.Sprintf("模型 %s 免费,跳过预扣费", relayInfo.OriginModelName))
  141. } else {
  142. newAPIError = service.PreConsumeBilling(c, priceData.QuotaToPreConsume, relayInfo)
  143. if newAPIError != nil {
  144. return
  145. }
  146. }
  147. defer func() {
  148. // Only return quota if downstream failed and quota was actually pre-consumed
  149. if newAPIError != nil {
  150. newAPIError = service.NormalizeViolationFeeError(newAPIError)
  151. if relayInfo.Billing != nil {
  152. relayInfo.Billing.Refund(c)
  153. }
  154. service.ChargeViolationFeeIfNeeded(c, relayInfo, newAPIError)
  155. }
  156. }()
  157. retryParam := &service.RetryParam{
  158. Ctx: c,
  159. TokenGroup: relayInfo.TokenGroup,
  160. ModelName: relayInfo.OriginModelName,
  161. Retry: common.GetPointer(0),
  162. }
  163. for ; retryParam.GetRetry() <= common.RetryTimes; retryParam.IncreaseRetry() {
  164. channel, channelErr := getChannel(c, relayInfo, retryParam)
  165. if channelErr != nil {
  166. logger.LogError(c, channelErr.Error())
  167. newAPIError = channelErr
  168. break
  169. }
  170. addUsedChannel(c, channel.Id)
  171. bodyStorage, bodyErr := common.GetBodyStorage(c)
  172. if bodyErr != nil {
  173. // Ensure consistent 413 for oversized bodies even when error occurs later (e.g., retry path)
  174. if common.IsRequestBodyTooLargeError(bodyErr) || errors.Is(bodyErr, common.ErrRequestBodyTooLarge) {
  175. newAPIError = types.NewErrorWithStatusCode(bodyErr, types.ErrorCodeReadRequestBodyFailed, http.StatusRequestEntityTooLarge, types.ErrOptionWithSkipRetry())
  176. } else {
  177. newAPIError = types.NewErrorWithStatusCode(bodyErr, types.ErrorCodeReadRequestBodyFailed, http.StatusBadRequest, types.ErrOptionWithSkipRetry())
  178. }
  179. break
  180. }
  181. c.Request.Body = io.NopCloser(bodyStorage)
  182. switch relayFormat {
  183. case types.RelayFormatOpenAIRealtime:
  184. newAPIError = relay.WssHelper(c, relayInfo)
  185. case types.RelayFormatClaude:
  186. newAPIError = relay.ClaudeHelper(c, relayInfo)
  187. case types.RelayFormatGemini:
  188. newAPIError = geminiRelayHandler(c, relayInfo)
  189. default:
  190. newAPIError = relayHandler(c, relayInfo)
  191. }
  192. if newAPIError == nil {
  193. return
  194. }
  195. newAPIError = service.NormalizeViolationFeeError(newAPIError)
  196. processChannelError(c, *types.NewChannelError(channel.Id, channel.Type, channel.Name, channel.ChannelInfo.IsMultiKey, common.GetContextKeyString(c, constant.ContextKeyChannelKey), channel.GetAutoBan()), newAPIError)
  197. if !shouldRetry(c, newAPIError, common.RetryTimes-retryParam.GetRetry()) {
  198. break
  199. }
  200. }
  201. useChannel := c.GetStringSlice("use_channel")
  202. if len(useChannel) > 1 {
  203. retryLogStr := fmt.Sprintf("重试:%s", strings.Trim(strings.Join(strings.Fields(fmt.Sprint(useChannel)), "->"), "[]"))
  204. logger.LogInfo(c, retryLogStr)
  205. }
  206. }
  207. var upgrader = websocket.Upgrader{
  208. Subprotocols: []string{"realtime"}, // WS 握手支持的协议,如果有使用 Sec-WebSocket-Protocol,则必须在此声明对应的 Protocol TODO add other protocol
  209. CheckOrigin: func(r *http.Request) bool {
  210. return true // 允许跨域
  211. },
  212. }
  213. func addUsedChannel(c *gin.Context, channelId int) {
  214. useChannel := c.GetStringSlice("use_channel")
  215. useChannel = append(useChannel, fmt.Sprintf("%d", channelId))
  216. c.Set("use_channel", useChannel)
  217. }
  218. func fastTokenCountMetaForPricing(request dto.Request) *types.TokenCountMeta {
  219. if request == nil {
  220. return &types.TokenCountMeta{}
  221. }
  222. meta := &types.TokenCountMeta{
  223. TokenType: types.TokenTypeTokenizer,
  224. }
  225. switch r := request.(type) {
  226. case *dto.GeneralOpenAIRequest:
  227. if r.MaxCompletionTokens > r.MaxTokens {
  228. meta.MaxTokens = int(r.MaxCompletionTokens)
  229. } else {
  230. meta.MaxTokens = int(r.MaxTokens)
  231. }
  232. case *dto.OpenAIResponsesRequest:
  233. meta.MaxTokens = int(r.MaxOutputTokens)
  234. case *dto.ClaudeRequest:
  235. meta.MaxTokens = int(r.MaxTokens)
  236. case *dto.ImageRequest:
  237. // Pricing for image requests depends on ImagePriceRatio; safe to compute even when CountToken is disabled.
  238. return r.GetTokenCountMeta()
  239. default:
  240. // Best-effort: leave CombineText empty to avoid large allocations.
  241. }
  242. return meta
  243. }
  244. func getChannel(c *gin.Context, info *relaycommon.RelayInfo, retryParam *service.RetryParam) (*model.Channel, *types.NewAPIError) {
  245. if info.ChannelMeta == nil {
  246. autoBan := c.GetBool("auto_ban")
  247. autoBanInt := 1
  248. if !autoBan {
  249. autoBanInt = 0
  250. }
  251. return &model.Channel{
  252. Id: c.GetInt("channel_id"),
  253. Type: c.GetInt("channel_type"),
  254. Name: c.GetString("channel_name"),
  255. AutoBan: &autoBanInt,
  256. }, nil
  257. }
  258. channel, selectGroup, err := service.CacheGetRandomSatisfiedChannel(retryParam)
  259. info.PriceData.GroupRatioInfo = helper.HandleGroupRatio(c, info)
  260. if err != nil {
  261. return nil, types.NewError(fmt.Errorf("获取分组 %s 下模型 %s 的可用渠道失败(retry): %s", selectGroup, info.OriginModelName, err.Error()), types.ErrorCodeGetChannelFailed, types.ErrOptionWithSkipRetry())
  262. }
  263. if channel == nil {
  264. return nil, types.NewError(fmt.Errorf("分组 %s 下模型 %s 的可用渠道不存在(retry)", selectGroup, info.OriginModelName), types.ErrorCodeGetChannelFailed, types.ErrOptionWithSkipRetry())
  265. }
  266. newAPIError := middleware.SetupContextForSelectedChannel(c, channel, info.OriginModelName)
  267. if newAPIError != nil {
  268. return nil, newAPIError
  269. }
  270. return channel, nil
  271. }
  272. func shouldRetry(c *gin.Context, openaiErr *types.NewAPIError, retryTimes int) bool {
  273. if openaiErr == nil {
  274. return false
  275. }
  276. if service.ShouldSkipRetryAfterChannelAffinityFailure(c) {
  277. return false
  278. }
  279. if types.IsChannelError(openaiErr) {
  280. return true
  281. }
  282. if types.IsSkipRetryError(openaiErr) {
  283. return false
  284. }
  285. if retryTimes <= 0 {
  286. return false
  287. }
  288. if _, ok := c.Get("specific_channel_id"); ok {
  289. return false
  290. }
  291. code := openaiErr.StatusCode
  292. if code >= 200 && code < 300 {
  293. return false
  294. }
  295. if code < 100 || code > 599 {
  296. return true
  297. }
  298. return operation_setting.ShouldRetryByStatusCode(code)
  299. }
  300. func processChannelError(c *gin.Context, channelError types.ChannelError, err *types.NewAPIError) {
  301. logger.LogError(c, fmt.Sprintf("channel error (channel #%d, status code: %d): %s", channelError.ChannelId, err.StatusCode, err.Error()))
  302. // 不要使用context获取渠道信息,异步处理时可能会出现渠道信息不一致的情况
  303. // do not use context to get channel info, there may be inconsistent channel info when processing asynchronously
  304. if service.ShouldDisableChannel(channelError.ChannelType, err) && channelError.AutoBan {
  305. gopool.Go(func() {
  306. service.DisableChannel(channelError, err.ErrorWithStatusCode())
  307. })
  308. }
  309. if constant.ErrorLogEnabled && types.IsRecordErrorLog(err) {
  310. // 保存错误日志到mysql中
  311. userId := c.GetInt("id")
  312. tokenName := c.GetString("token_name")
  313. modelName := c.GetString("original_model")
  314. tokenId := c.GetInt("token_id")
  315. userGroup := c.GetString("group")
  316. channelId := c.GetInt("channel_id")
  317. other := make(map[string]interface{})
  318. if c.Request != nil && c.Request.URL != nil {
  319. other["request_path"] = c.Request.URL.Path
  320. }
  321. other["error_type"] = err.GetErrorType()
  322. other["error_code"] = err.GetErrorCode()
  323. other["status_code"] = err.StatusCode
  324. other["channel_id"] = channelId
  325. other["channel_name"] = c.GetString("channel_name")
  326. other["channel_type"] = c.GetInt("channel_type")
  327. adminInfo := make(map[string]interface{})
  328. adminInfo["use_channel"] = c.GetStringSlice("use_channel")
  329. isMultiKey := common.GetContextKeyBool(c, constant.ContextKeyChannelIsMultiKey)
  330. if isMultiKey {
  331. adminInfo["is_multi_key"] = true
  332. adminInfo["multi_key_index"] = common.GetContextKeyInt(c, constant.ContextKeyChannelMultiKeyIndex)
  333. }
  334. service.AppendChannelAffinityAdminInfo(c, adminInfo)
  335. other["admin_info"] = adminInfo
  336. startTime := common.GetContextKeyTime(c, constant.ContextKeyRequestStartTime)
  337. if startTime.IsZero() {
  338. startTime = time.Now()
  339. }
  340. useTimeSeconds := int(time.Since(startTime).Seconds())
  341. model.RecordErrorLog(c, userId, channelId, modelName, tokenName, err.MaskSensitiveErrorWithStatusCode(), tokenId, useTimeSeconds, false, userGroup, other)
  342. }
  343. }
  344. func RelayMidjourney(c *gin.Context) {
  345. relayInfo, err := relaycommon.GenRelayInfo(c, types.RelayFormatMjProxy, nil, nil)
  346. if err != nil {
  347. c.JSON(http.StatusInternalServerError, gin.H{
  348. "description": fmt.Sprintf("failed to generate relay info: %s", err.Error()),
  349. "type": "upstream_error",
  350. "code": 4,
  351. })
  352. return
  353. }
  354. var mjErr *dto.MidjourneyResponse
  355. switch relayInfo.RelayMode {
  356. case relayconstant.RelayModeMidjourneyNotify:
  357. mjErr = relay.RelayMidjourneyNotify(c)
  358. case relayconstant.RelayModeMidjourneyTaskFetch, relayconstant.RelayModeMidjourneyTaskFetchByCondition:
  359. mjErr = relay.RelayMidjourneyTask(c, relayInfo.RelayMode)
  360. case relayconstant.RelayModeMidjourneyTaskImageSeed:
  361. mjErr = relay.RelayMidjourneyTaskImageSeed(c)
  362. case relayconstant.RelayModeSwapFace:
  363. mjErr = relay.RelaySwapFace(c, relayInfo)
  364. default:
  365. mjErr = relay.RelayMidjourneySubmit(c, relayInfo)
  366. }
  367. //err = relayMidjourneySubmit(c, relayMode)
  368. log.Println(mjErr)
  369. if mjErr != nil {
  370. statusCode := http.StatusBadRequest
  371. if mjErr.Code == 30 {
  372. mjErr.Result = "当前分组负载已饱和,请稍后再试,或升级账户以提升服务质量。"
  373. statusCode = http.StatusTooManyRequests
  374. }
  375. c.JSON(statusCode, gin.H{
  376. "description": fmt.Sprintf("%s %s", mjErr.Description, mjErr.Result),
  377. "type": "upstream_error",
  378. "code": mjErr.Code,
  379. })
  380. channelId := c.GetInt("channel_id")
  381. logger.LogError(c, fmt.Sprintf("relay error (channel #%d, status code %d): %s", channelId, statusCode, fmt.Sprintf("%s %s", mjErr.Description, mjErr.Result)))
  382. }
  383. }
  384. func RelayNotImplemented(c *gin.Context) {
  385. err := types.OpenAIError{
  386. Message: "API not implemented",
  387. Type: "new_api_error",
  388. Param: "",
  389. Code: "api_not_implemented",
  390. }
  391. c.JSON(http.StatusNotImplemented, gin.H{
  392. "error": err,
  393. })
  394. }
  395. func RelayNotFound(c *gin.Context) {
  396. err := types.OpenAIError{
  397. Message: fmt.Sprintf("Invalid URL (%s %s)", c.Request.Method, c.Request.URL.Path),
  398. Type: "invalid_request_error",
  399. Param: "",
  400. Code: "",
  401. }
  402. c.JSON(http.StatusNotFound, gin.H{
  403. "error": err,
  404. })
  405. }
  406. func RelayTask(c *gin.Context) {
  407. channelId := c.GetInt("channel_id")
  408. c.Set("use_channel", []string{fmt.Sprintf("%d", channelId)})
  409. relayInfo, err := relaycommon.GenRelayInfo(c, types.RelayFormatTask, nil, nil)
  410. if err != nil {
  411. c.JSON(http.StatusInternalServerError, &dto.TaskError{
  412. Code: "gen_relay_info_failed",
  413. Message: err.Error(),
  414. StatusCode: http.StatusInternalServerError,
  415. })
  416. return
  417. }
  418. // Fetch 操作是纯 DB 查询(或 task 自带 channelId 的上游查询),不依赖上下文 channel,无需重试
  419. // TODO: 在video-route层面优化,避免无谓的 channel 选择和上下文设置,也没必要吧代码放到这里来写这么多屎山
  420. switch relayInfo.RelayMode {
  421. case relayconstant.RelayModeSunoFetch, relayconstant.RelayModeSunoFetchByID, relayconstant.RelayModeVideoFetchByID:
  422. if taskErr := relay.RelayTaskFetch(c, relayInfo.RelayMode); taskErr != nil {
  423. respondTaskError(c, taskErr)
  424. }
  425. return
  426. }
  427. // ── Submit 路径 ─────────────────────────────────────────────────
  428. // 1. 解析原始任务(remix / continuation),一次性,可能锁定渠道并禁止重试
  429. if taskErr := relay.ResolveOriginTask(c, relayInfo); taskErr != nil {
  430. respondTaskError(c, taskErr)
  431. return
  432. }
  433. // 2. defer Refund(全部失败时回滚预扣费)
  434. var result *relay.TaskSubmitResult
  435. var taskErr *dto.TaskError
  436. defer func() {
  437. if taskErr != nil && relayInfo.Billing != nil {
  438. relayInfo.Billing.Refund(c)
  439. }
  440. }()
  441. // 3. 执行 + 重试(RelayTaskSubmit 内部在首次调用时自动预扣费)
  442. taskErr = taskSubmitWithRetry(c, relayInfo, channelId, common.RetryTimes, func() *dto.TaskError {
  443. var te *dto.TaskError
  444. result, te = relay.RelayTaskSubmit(c, relayInfo)
  445. return te
  446. })
  447. // 4. 成功:结算 + 日志 + 插入任务
  448. if taskErr == nil {
  449. if settleErr := service.SettleBilling(c, relayInfo, result.Quota); settleErr != nil {
  450. common.SysError("settle task billing error: " + settleErr.Error())
  451. }
  452. service.LogTaskConsumption(c, relayInfo, result.ModelName)
  453. task := model.InitTask(result.Platform, relayInfo)
  454. task.PrivateData.UpstreamTaskID = result.UpstreamTaskID
  455. task.PrivateData.BillingSource = relayInfo.BillingSource
  456. task.PrivateData.SubscriptionId = relayInfo.SubscriptionId
  457. task.PrivateData.TokenId = relayInfo.TokenId
  458. task.Quota = result.Quota
  459. task.Data = result.TaskData
  460. task.Action = relayInfo.Action
  461. if insertErr := task.Insert(); insertErr != nil {
  462. //taskErr = service.TaskErrorWrapper(insertErr, "insert_task_failed", http.StatusInternalServerError)
  463. common.SysError("insert task error: " + insertErr.Error())
  464. }
  465. }
  466. if taskErr != nil {
  467. respondTaskError(c, taskErr)
  468. }
  469. }
  470. // respondTaskError 统一输出 Task 错误响应(含 429 限流提示改写)
  471. func respondTaskError(c *gin.Context, taskErr *dto.TaskError) {
  472. if taskErr.StatusCode == http.StatusTooManyRequests {
  473. taskErr.Message = "当前分组上游负载已饱和,请稍后再试"
  474. }
  475. c.JSON(taskErr.StatusCode, taskErr)
  476. }
  477. // taskSubmitWithRetry 执行首次尝试并在失败时切换渠道重试,返回最终的 taskErr。
  478. // attempt 闭包负责实际的上游请求,不涉及计费。
  479. func taskSubmitWithRetry(c *gin.Context, relayInfo *relaycommon.RelayInfo,
  480. channelId int, retryTimes int, attempt func() *dto.TaskError) *dto.TaskError {
  481. taskErr := attempt()
  482. if taskErr == nil {
  483. return nil
  484. }
  485. if !taskErr.LocalError {
  486. processChannelError(c,
  487. *types.NewChannelError(channelId, c.GetInt("channel_type"), c.GetString("channel_name"), common.GetContextKeyBool(c, constant.ContextKeyChannelIsMultiKey),
  488. common.GetContextKeyString(c, constant.ContextKeyChannelKey), common.GetContextKeyBool(c, constant.ContextKeyChannelAutoBan)),
  489. types.NewOpenAIError(taskErr.Error, types.ErrorCodeBadResponseStatusCode, taskErr.StatusCode))
  490. }
  491. retryParam := &service.RetryParam{
  492. Ctx: c,
  493. TokenGroup: relayInfo.TokenGroup,
  494. ModelName: relayInfo.OriginModelName,
  495. Retry: common.GetPointer(0),
  496. }
  497. for ; shouldRetryTaskRelay(c, channelId, taskErr, retryTimes) && retryParam.GetRetry() < retryTimes; retryParam.IncreaseRetry() {
  498. channel, newAPIError := getChannel(c, relayInfo, retryParam)
  499. if newAPIError != nil {
  500. logger.LogError(c, fmt.Sprintf("CacheGetRandomSatisfiedChannel failed: %s", newAPIError.Error()))
  501. taskErr = service.TaskErrorWrapperLocal(newAPIError.Err, "get_channel_failed", http.StatusInternalServerError)
  502. break
  503. }
  504. channelId = channel.Id
  505. useChannel := c.GetStringSlice("use_channel")
  506. useChannel = append(useChannel, fmt.Sprintf("%d", channelId))
  507. c.Set("use_channel", useChannel)
  508. logger.LogInfo(c, fmt.Sprintf("using channel #%d to retry (remain times %d)", channel.Id, retryParam.GetRetry()))
  509. middleware.SetupContextForSelectedChannel(c, channel, c.GetString("original_model"))
  510. bodyStorage, err := common.GetBodyStorage(c)
  511. if err != nil {
  512. if common.IsRequestBodyTooLargeError(err) || errors.Is(err, common.ErrRequestBodyTooLarge) {
  513. taskErr = service.TaskErrorWrapperLocal(err, "read_request_body_failed", http.StatusRequestEntityTooLarge)
  514. } else {
  515. taskErr = service.TaskErrorWrapperLocal(err, "read_request_body_failed", http.StatusBadRequest)
  516. }
  517. break
  518. }
  519. c.Request.Body = io.NopCloser(bodyStorage)
  520. taskErr = attempt()
  521. if taskErr != nil && !taskErr.LocalError {
  522. processChannelError(c,
  523. *types.NewChannelError(channel.Id, channel.Type, channel.Name, channel.ChannelInfo.IsMultiKey,
  524. common.GetContextKeyString(c, constant.ContextKeyChannelKey), channel.GetAutoBan()),
  525. types.NewOpenAIError(taskErr.Error, types.ErrorCodeBadResponseStatusCode, taskErr.StatusCode))
  526. }
  527. }
  528. useChannel := c.GetStringSlice("use_channel")
  529. if len(useChannel) > 1 {
  530. retryLogStr := fmt.Sprintf("重试:%s", strings.Trim(strings.Join(strings.Fields(fmt.Sprint(useChannel)), "->"), "[]"))
  531. logger.LogInfo(c, retryLogStr)
  532. }
  533. return taskErr
  534. }
  535. func shouldRetryTaskRelay(c *gin.Context, channelId int, taskErr *dto.TaskError, retryTimes int) bool {
  536. if taskErr == nil {
  537. return false
  538. }
  539. if service.ShouldSkipRetryAfterChannelAffinityFailure(c) {
  540. return false
  541. }
  542. if retryTimes <= 0 {
  543. return false
  544. }
  545. if _, ok := c.Get("specific_channel_id"); ok {
  546. return false
  547. }
  548. if taskErr.StatusCode == http.StatusTooManyRequests {
  549. return true
  550. }
  551. if taskErr.StatusCode == 307 {
  552. return true
  553. }
  554. if taskErr.StatusCode/100 == 5 {
  555. // 超时不重试
  556. if taskErr.StatusCode == 504 || taskErr.StatusCode == 524 {
  557. return false
  558. }
  559. return true
  560. }
  561. if taskErr.StatusCode == http.StatusBadRequest {
  562. return false
  563. }
  564. if taskErr.StatusCode == 408 {
  565. // azure处理超时不重试
  566. return false
  567. }
  568. if taskErr.LocalError {
  569. return false
  570. }
  571. if taskErr.StatusCode/100 == 2 {
  572. return false
  573. }
  574. return true
  575. }