relay.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541
  1. package controller
  2. import (
  3. "bytes"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "log"
  8. "net/http"
  9. "strings"
  10. "github.com/QuantumNous/new-api/common"
  11. "github.com/QuantumNous/new-api/constant"
  12. "github.com/QuantumNous/new-api/dto"
  13. "github.com/QuantumNous/new-api/logger"
  14. "github.com/QuantumNous/new-api/middleware"
  15. "github.com/QuantumNous/new-api/model"
  16. "github.com/QuantumNous/new-api/relay"
  17. relaycommon "github.com/QuantumNous/new-api/relay/common"
  18. relayconstant "github.com/QuantumNous/new-api/relay/constant"
  19. "github.com/QuantumNous/new-api/relay/helper"
  20. "github.com/QuantumNous/new-api/service"
  21. "github.com/QuantumNous/new-api/setting"
  22. "github.com/QuantumNous/new-api/types"
  23. "github.com/bytedance/gopkg/util/gopool"
  24. "github.com/gin-gonic/gin"
  25. "github.com/gorilla/websocket"
  26. )
  27. func relayHandler(c *gin.Context, info *relaycommon.RelayInfo) *types.NewAPIError {
  28. var err *types.NewAPIError
  29. switch info.RelayMode {
  30. case relayconstant.RelayModeImagesGenerations, relayconstant.RelayModeImagesEdits:
  31. err = relay.ImageHelper(c, info)
  32. case relayconstant.RelayModeAudioSpeech:
  33. fallthrough
  34. case relayconstant.RelayModeAudioTranslation:
  35. fallthrough
  36. case relayconstant.RelayModeAudioTranscription:
  37. err = relay.AudioHelper(c, info)
  38. case relayconstant.RelayModeRerank:
  39. err = relay.RerankHelper(c, info)
  40. case relayconstant.RelayModeEmbeddings:
  41. err = relay.EmbeddingHelper(c, info)
  42. case relayconstant.RelayModeResponses:
  43. err = relay.ResponsesHelper(c, info)
  44. default:
  45. err = relay.TextHelper(c, info)
  46. }
  47. return err
  48. }
  49. func geminiRelayHandler(c *gin.Context, info *relaycommon.RelayInfo) *types.NewAPIError {
  50. var err *types.NewAPIError
  51. if strings.Contains(c.Request.URL.Path, "embed") {
  52. err = relay.GeminiEmbeddingHandler(c, info)
  53. } else {
  54. err = relay.GeminiHelper(c, info)
  55. }
  56. return err
  57. }
  58. func Relay(c *gin.Context, relayFormat types.RelayFormat) {
  59. requestId := c.GetString(common.RequestIdKey)
  60. //group := common.GetContextKeyString(c, constant.ContextKeyUsingGroup)
  61. //originalModel := common.GetContextKeyString(c, constant.ContextKeyOriginalModel)
  62. var (
  63. newAPIError *types.NewAPIError
  64. ws *websocket.Conn
  65. )
  66. if relayFormat == types.RelayFormatOpenAIRealtime {
  67. var err error
  68. ws, err = upgrader.Upgrade(c.Writer, c.Request, nil)
  69. if err != nil {
  70. helper.WssError(c, ws, types.NewError(err, types.ErrorCodeGetChannelFailed, types.ErrOptionWithSkipRetry()).ToOpenAIError())
  71. return
  72. }
  73. defer ws.Close()
  74. }
  75. defer func() {
  76. if newAPIError != nil {
  77. logger.LogError(c, fmt.Sprintf("relay error: %s", newAPIError.Error()))
  78. newAPIError.SetMessage(common.MessageWithRequestId(newAPIError.Error(), requestId))
  79. switch relayFormat {
  80. case types.RelayFormatOpenAIRealtime:
  81. helper.WssError(c, ws, newAPIError.ToOpenAIError())
  82. case types.RelayFormatClaude:
  83. c.JSON(newAPIError.StatusCode, gin.H{
  84. "type": "error",
  85. "error": newAPIError.ToClaudeError(),
  86. })
  87. default:
  88. c.JSON(newAPIError.StatusCode, gin.H{
  89. "error": newAPIError.ToOpenAIError(),
  90. })
  91. }
  92. }
  93. }()
  94. request, err := helper.GetAndValidateRequest(c, relayFormat)
  95. if err != nil {
  96. // Map "request body too large" to 413 so clients can handle it correctly
  97. if common.IsRequestBodyTooLargeError(err) || errors.Is(err, common.ErrRequestBodyTooLarge) {
  98. newAPIError = types.NewErrorWithStatusCode(err, types.ErrorCodeReadRequestBodyFailed, http.StatusRequestEntityTooLarge, types.ErrOptionWithSkipRetry())
  99. } else {
  100. newAPIError = types.NewError(err, types.ErrorCodeInvalidRequest)
  101. }
  102. return
  103. }
  104. relayInfo, err := relaycommon.GenRelayInfo(c, relayFormat, request, ws)
  105. if err != nil {
  106. newAPIError = types.NewError(err, types.ErrorCodeGenRelayInfoFailed)
  107. return
  108. }
  109. needSensitiveCheck := setting.ShouldCheckPromptSensitive()
  110. needCountToken := constant.CountToken
  111. // Avoid building huge CombineText (strings.Join) when token counting and sensitive check are both disabled.
  112. var meta *types.TokenCountMeta
  113. if needSensitiveCheck || needCountToken {
  114. meta = request.GetTokenCountMeta()
  115. } else {
  116. meta = fastTokenCountMetaForPricing(request)
  117. }
  118. if needSensitiveCheck && meta != nil {
  119. contains, words := service.CheckSensitiveText(meta.CombineText)
  120. if contains {
  121. logger.LogWarn(c, fmt.Sprintf("user sensitive words detected: %s", strings.Join(words, ", ")))
  122. newAPIError = types.NewError(err, types.ErrorCodeSensitiveWordsDetected)
  123. return
  124. }
  125. }
  126. tokens, err := service.EstimateRequestToken(c, meta, relayInfo)
  127. if err != nil {
  128. newAPIError = types.NewError(err, types.ErrorCodeCountTokenFailed)
  129. return
  130. }
  131. relayInfo.SetEstimatePromptTokens(tokens)
  132. priceData, err := helper.ModelPriceHelper(c, relayInfo, tokens, meta)
  133. if err != nil {
  134. newAPIError = types.NewError(err, types.ErrorCodeModelPriceError)
  135. return
  136. }
  137. // common.SetContextKey(c, constant.ContextKeyTokenCountMeta, meta)
  138. if priceData.FreeModel {
  139. logger.LogInfo(c, fmt.Sprintf("模型 %s 免费,跳过预扣费", relayInfo.OriginModelName))
  140. } else {
  141. newAPIError = service.PreConsumeQuota(c, priceData.QuotaToPreConsume, relayInfo)
  142. if newAPIError != nil {
  143. return
  144. }
  145. }
  146. defer func() {
  147. // Only return quota if downstream failed and quota was actually pre-consumed
  148. if newAPIError != nil && relayInfo.FinalPreConsumedQuota != 0 {
  149. service.ReturnPreConsumedQuota(c, relayInfo)
  150. }
  151. }()
  152. retryParam := &service.RetryParam{
  153. Ctx: c,
  154. TokenGroup: relayInfo.TokenGroup,
  155. ModelName: relayInfo.OriginModelName,
  156. Retry: common.GetPointer(0),
  157. }
  158. for ; retryParam.GetRetry() <= common.RetryTimes; retryParam.IncreaseRetry() {
  159. channel, err := getChannel(c, relayInfo, retryParam)
  160. if err != nil {
  161. logger.LogError(c, err.Error())
  162. newAPIError = err
  163. break
  164. }
  165. addUsedChannel(c, channel.Id)
  166. requestBody, _ := common.GetRequestBody(c)
  167. c.Request.Body = io.NopCloser(bytes.NewBuffer(requestBody))
  168. switch relayFormat {
  169. case types.RelayFormatOpenAIRealtime:
  170. newAPIError = relay.WssHelper(c, relayInfo)
  171. case types.RelayFormatClaude:
  172. newAPIError = relay.ClaudeHelper(c, relayInfo)
  173. case types.RelayFormatGemini:
  174. newAPIError = geminiRelayHandler(c, relayInfo)
  175. default:
  176. newAPIError = relayHandler(c, relayInfo)
  177. }
  178. if newAPIError == nil {
  179. return
  180. }
  181. processChannelError(c, *types.NewChannelError(channel.Id, channel.Type, channel.Name, channel.ChannelInfo.IsMultiKey, common.GetContextKeyString(c, constant.ContextKeyChannelKey), channel.GetAutoBan()), newAPIError)
  182. if !shouldRetry(c, newAPIError, common.RetryTimes-retryParam.GetRetry()) {
  183. break
  184. }
  185. }
  186. useChannel := c.GetStringSlice("use_channel")
  187. if len(useChannel) > 1 {
  188. retryLogStr := fmt.Sprintf("重试:%s", strings.Trim(strings.Join(strings.Fields(fmt.Sprint(useChannel)), "->"), "[]"))
  189. logger.LogInfo(c, retryLogStr)
  190. }
  191. }
  192. var upgrader = websocket.Upgrader{
  193. Subprotocols: []string{"realtime"}, // WS 握手支持的协议,如果有使用 Sec-WebSocket-Protocol,则必须在此声明对应的 Protocol TODO add other protocol
  194. CheckOrigin: func(r *http.Request) bool {
  195. return true // 允许跨域
  196. },
  197. }
  198. func addUsedChannel(c *gin.Context, channelId int) {
  199. useChannel := c.GetStringSlice("use_channel")
  200. useChannel = append(useChannel, fmt.Sprintf("%d", channelId))
  201. c.Set("use_channel", useChannel)
  202. }
  203. func fastTokenCountMetaForPricing(request dto.Request) *types.TokenCountMeta {
  204. if request == nil {
  205. return &types.TokenCountMeta{}
  206. }
  207. meta := &types.TokenCountMeta{
  208. TokenType: types.TokenTypeTokenizer,
  209. }
  210. switch r := request.(type) {
  211. case *dto.GeneralOpenAIRequest:
  212. if r.MaxCompletionTokens > r.MaxTokens {
  213. meta.MaxTokens = int(r.MaxCompletionTokens)
  214. } else {
  215. meta.MaxTokens = int(r.MaxTokens)
  216. }
  217. case *dto.OpenAIResponsesRequest:
  218. meta.MaxTokens = int(r.MaxOutputTokens)
  219. case *dto.ClaudeRequest:
  220. meta.MaxTokens = int(r.MaxTokens)
  221. case *dto.ImageRequest:
  222. // Pricing for image requests depends on ImagePriceRatio; safe to compute even when CountToken is disabled.
  223. return r.GetTokenCountMeta()
  224. default:
  225. // Best-effort: leave CombineText empty to avoid large allocations.
  226. }
  227. return meta
  228. }
  229. func getChannel(c *gin.Context, info *relaycommon.RelayInfo, retryParam *service.RetryParam) (*model.Channel, *types.NewAPIError) {
  230. if info.ChannelMeta == nil {
  231. autoBan := c.GetBool("auto_ban")
  232. autoBanInt := 1
  233. if !autoBan {
  234. autoBanInt = 0
  235. }
  236. return &model.Channel{
  237. Id: c.GetInt("channel_id"),
  238. Type: c.GetInt("channel_type"),
  239. Name: c.GetString("channel_name"),
  240. AutoBan: &autoBanInt,
  241. }, nil
  242. }
  243. channel, selectGroup, err := service.CacheGetRandomSatisfiedChannel(retryParam)
  244. info.PriceData.GroupRatioInfo = helper.HandleGroupRatio(c, info)
  245. if err != nil {
  246. return nil, types.NewError(fmt.Errorf("获取分组 %s 下模型 %s 的可用渠道失败(retry): %s", selectGroup, info.OriginModelName, err.Error()), types.ErrorCodeGetChannelFailed, types.ErrOptionWithSkipRetry())
  247. }
  248. if channel == nil {
  249. return nil, types.NewError(fmt.Errorf("分组 %s 下模型 %s 的可用渠道不存在(retry)", selectGroup, info.OriginModelName), types.ErrorCodeGetChannelFailed, types.ErrOptionWithSkipRetry())
  250. }
  251. newAPIError := middleware.SetupContextForSelectedChannel(c, channel, info.OriginModelName)
  252. if newAPIError != nil {
  253. return nil, newAPIError
  254. }
  255. return channel, nil
  256. }
  257. func shouldRetry(c *gin.Context, openaiErr *types.NewAPIError, retryTimes int) bool {
  258. if openaiErr == nil {
  259. return false
  260. }
  261. if types.IsChannelError(openaiErr) {
  262. return true
  263. }
  264. if types.IsSkipRetryError(openaiErr) {
  265. return false
  266. }
  267. if retryTimes <= 0 {
  268. return false
  269. }
  270. if _, ok := c.Get("specific_channel_id"); ok {
  271. return false
  272. }
  273. if openaiErr.StatusCode == http.StatusTooManyRequests {
  274. return true
  275. }
  276. if openaiErr.StatusCode == 307 {
  277. return true
  278. }
  279. if openaiErr.StatusCode/100 == 5 {
  280. // 超时不重试
  281. if openaiErr.StatusCode == 504 || openaiErr.StatusCode == 524 {
  282. return false
  283. }
  284. return true
  285. }
  286. if openaiErr.StatusCode == http.StatusBadRequest {
  287. return false
  288. }
  289. if openaiErr.StatusCode == 408 {
  290. // azure处理超时不重试
  291. return false
  292. }
  293. if openaiErr.StatusCode/100 == 2 {
  294. return false
  295. }
  296. return true
  297. }
  298. func processChannelError(c *gin.Context, channelError types.ChannelError, err *types.NewAPIError) {
  299. logger.LogError(c, fmt.Sprintf("channel error (channel #%d, status code: %d): %s", channelError.ChannelId, err.StatusCode, err.Error()))
  300. // 不要使用context获取渠道信息,异步处理时可能会出现渠道信息不一致的情况
  301. // do not use context to get channel info, there may be inconsistent channel info when processing asynchronously
  302. if service.ShouldDisableChannel(channelError.ChannelType, err) && channelError.AutoBan {
  303. gopool.Go(func() {
  304. service.DisableChannel(channelError, err.Error())
  305. })
  306. }
  307. if constant.ErrorLogEnabled && types.IsRecordErrorLog(err) {
  308. // 保存错误日志到mysql中
  309. userId := c.GetInt("id")
  310. tokenName := c.GetString("token_name")
  311. modelName := c.GetString("original_model")
  312. tokenId := c.GetInt("token_id")
  313. userGroup := c.GetString("group")
  314. channelId := c.GetInt("channel_id")
  315. other := make(map[string]interface{})
  316. if c.Request != nil && c.Request.URL != nil {
  317. other["request_path"] = c.Request.URL.Path
  318. }
  319. other["error_type"] = err.GetErrorType()
  320. other["error_code"] = err.GetErrorCode()
  321. other["status_code"] = err.StatusCode
  322. other["channel_id"] = channelId
  323. other["channel_name"] = c.GetString("channel_name")
  324. other["channel_type"] = c.GetInt("channel_type")
  325. adminInfo := make(map[string]interface{})
  326. adminInfo["use_channel"] = c.GetStringSlice("use_channel")
  327. isMultiKey := common.GetContextKeyBool(c, constant.ContextKeyChannelIsMultiKey)
  328. if isMultiKey {
  329. adminInfo["is_multi_key"] = true
  330. adminInfo["multi_key_index"] = common.GetContextKeyInt(c, constant.ContextKeyChannelMultiKeyIndex)
  331. }
  332. other["admin_info"] = adminInfo
  333. model.RecordErrorLog(c, userId, channelId, modelName, tokenName, err.MaskSensitiveError(), tokenId, 0, false, userGroup, other)
  334. }
  335. }
  336. func RelayMidjourney(c *gin.Context) {
  337. relayInfo, err := relaycommon.GenRelayInfo(c, types.RelayFormatMjProxy, nil, nil)
  338. if err != nil {
  339. c.JSON(http.StatusInternalServerError, gin.H{
  340. "description": fmt.Sprintf("failed to generate relay info: %s", err.Error()),
  341. "type": "upstream_error",
  342. "code": 4,
  343. })
  344. return
  345. }
  346. var mjErr *dto.MidjourneyResponse
  347. switch relayInfo.RelayMode {
  348. case relayconstant.RelayModeMidjourneyNotify:
  349. mjErr = relay.RelayMidjourneyNotify(c)
  350. case relayconstant.RelayModeMidjourneyTaskFetch, relayconstant.RelayModeMidjourneyTaskFetchByCondition:
  351. mjErr = relay.RelayMidjourneyTask(c, relayInfo.RelayMode)
  352. case relayconstant.RelayModeMidjourneyTaskImageSeed:
  353. mjErr = relay.RelayMidjourneyTaskImageSeed(c)
  354. case relayconstant.RelayModeSwapFace:
  355. mjErr = relay.RelaySwapFace(c, relayInfo)
  356. default:
  357. mjErr = relay.RelayMidjourneySubmit(c, relayInfo)
  358. }
  359. //err = relayMidjourneySubmit(c, relayMode)
  360. log.Println(mjErr)
  361. if mjErr != nil {
  362. statusCode := http.StatusBadRequest
  363. if mjErr.Code == 30 {
  364. mjErr.Result = "当前分组负载已饱和,请稍后再试,或升级账户以提升服务质量。"
  365. statusCode = http.StatusTooManyRequests
  366. }
  367. c.JSON(statusCode, gin.H{
  368. "description": fmt.Sprintf("%s %s", mjErr.Description, mjErr.Result),
  369. "type": "upstream_error",
  370. "code": mjErr.Code,
  371. })
  372. channelId := c.GetInt("channel_id")
  373. logger.LogError(c, fmt.Sprintf("relay error (channel #%d, status code %d): %s", channelId, statusCode, fmt.Sprintf("%s %s", mjErr.Description, mjErr.Result)))
  374. }
  375. }
  376. func RelayNotImplemented(c *gin.Context) {
  377. err := types.OpenAIError{
  378. Message: "API not implemented",
  379. Type: "new_api_error",
  380. Param: "",
  381. Code: "api_not_implemented",
  382. }
  383. c.JSON(http.StatusNotImplemented, gin.H{
  384. "error": err,
  385. })
  386. }
  387. func RelayNotFound(c *gin.Context) {
  388. err := types.OpenAIError{
  389. Message: fmt.Sprintf("Invalid URL (%s %s)", c.Request.Method, c.Request.URL.Path),
  390. Type: "invalid_request_error",
  391. Param: "",
  392. Code: "",
  393. }
  394. c.JSON(http.StatusNotFound, gin.H{
  395. "error": err,
  396. })
  397. }
  398. func RelayTask(c *gin.Context) {
  399. retryTimes := common.RetryTimes
  400. channelId := c.GetInt("channel_id")
  401. c.Set("use_channel", []string{fmt.Sprintf("%d", channelId)})
  402. relayInfo, err := relaycommon.GenRelayInfo(c, types.RelayFormatTask, nil, nil)
  403. if err != nil {
  404. return
  405. }
  406. taskErr := taskRelayHandler(c, relayInfo)
  407. if taskErr == nil {
  408. retryTimes = 0
  409. }
  410. retryParam := &service.RetryParam{
  411. Ctx: c,
  412. TokenGroup: relayInfo.TokenGroup,
  413. ModelName: relayInfo.OriginModelName,
  414. Retry: common.GetPointer(0),
  415. }
  416. for ; shouldRetryTaskRelay(c, channelId, taskErr, retryTimes) && retryParam.GetRetry() < retryTimes; retryParam.IncreaseRetry() {
  417. channel, newAPIError := getChannel(c, relayInfo, retryParam)
  418. if newAPIError != nil {
  419. logger.LogError(c, fmt.Sprintf("CacheGetRandomSatisfiedChannel failed: %s", newAPIError.Error()))
  420. taskErr = service.TaskErrorWrapperLocal(newAPIError.Err, "get_channel_failed", http.StatusInternalServerError)
  421. break
  422. }
  423. channelId = channel.Id
  424. useChannel := c.GetStringSlice("use_channel")
  425. useChannel = append(useChannel, fmt.Sprintf("%d", channelId))
  426. c.Set("use_channel", useChannel)
  427. logger.LogInfo(c, fmt.Sprintf("using channel #%d to retry (remain times %d)", channel.Id, retryParam.GetRetry()))
  428. //middleware.SetupContextForSelectedChannel(c, channel, originalModel)
  429. requestBody, _ := common.GetRequestBody(c)
  430. c.Request.Body = io.NopCloser(bytes.NewBuffer(requestBody))
  431. taskErr = taskRelayHandler(c, relayInfo)
  432. }
  433. useChannel := c.GetStringSlice("use_channel")
  434. if len(useChannel) > 1 {
  435. retryLogStr := fmt.Sprintf("重试:%s", strings.Trim(strings.Join(strings.Fields(fmt.Sprint(useChannel)), "->"), "[]"))
  436. logger.LogInfo(c, retryLogStr)
  437. }
  438. if taskErr != nil {
  439. if taskErr.StatusCode == http.StatusTooManyRequests {
  440. taskErr.Message = "当前分组上游负载已饱和,请稍后再试"
  441. }
  442. c.JSON(taskErr.StatusCode, taskErr)
  443. }
  444. }
  445. func taskRelayHandler(c *gin.Context, relayInfo *relaycommon.RelayInfo) *dto.TaskError {
  446. var err *dto.TaskError
  447. switch relayInfo.RelayMode {
  448. case relayconstant.RelayModeSunoFetch, relayconstant.RelayModeSunoFetchByID, relayconstant.RelayModeVideoFetchByID:
  449. err = relay.RelayTaskFetch(c, relayInfo.RelayMode)
  450. default:
  451. err = relay.RelayTaskSubmit(c, relayInfo)
  452. }
  453. return err
  454. }
  455. func shouldRetryTaskRelay(c *gin.Context, channelId int, taskErr *dto.TaskError, retryTimes int) bool {
  456. if taskErr == nil {
  457. return false
  458. }
  459. if retryTimes <= 0 {
  460. return false
  461. }
  462. if _, ok := c.Get("specific_channel_id"); ok {
  463. return false
  464. }
  465. if taskErr.StatusCode == http.StatusTooManyRequests {
  466. return true
  467. }
  468. if taskErr.StatusCode == 307 {
  469. return true
  470. }
  471. if taskErr.StatusCode/100 == 5 {
  472. // 超时不重试
  473. if taskErr.StatusCode == 504 || taskErr.StatusCode == 524 {
  474. return false
  475. }
  476. return true
  477. }
  478. if taskErr.StatusCode == http.StatusBadRequest {
  479. return false
  480. }
  481. if taskErr.StatusCode == 408 {
  482. // azure处理超时不重试
  483. return false
  484. }
  485. if taskErr.LocalError {
  486. return false
  487. }
  488. if taskErr.StatusCode/100 == 2 {
  489. return false
  490. }
  491. return true
  492. }