package helper import ( "fmt" "strings" "github.com/QuantumNous/new-api/common" "github.com/QuantumNous/new-api/logger" "github.com/QuantumNous/new-api/model" "github.com/QuantumNous/new-api/pkg/billingexpr" relaycommon "github.com/QuantumNous/new-api/relay/common" "github.com/QuantumNous/new-api/setting/billing_setting" "github.com/QuantumNous/new-api/setting/operation_setting" "github.com/QuantumNous/new-api/setting/ratio_setting" "github.com/QuantumNous/new-api/types" "github.com/gin-gonic/gin" ) func modelPriceNotConfiguredError(modelName string, userId int) error { if model.IsAdmin(userId) { return fmt.Errorf( "模型 %s 的价格未配置。请前往「系统设置 → 运营设置」开启自用模式,或在「系统设置 → 分组与模型定价设置」中为该模型配置价格;"+ "Model %s price not configured. Go to System Settings → Operation Settings to enable self-use mode, or configure the model price in System Settings → Group & Model Pricing.", modelName, modelName, ) } return fmt.Errorf( "模型 %s 的价格尚未由管理员配置,暂时无法使用,请联系站点管理员开启该模型;"+ "Model %s has not been priced by the administrator yet. Please contact the site administrator to enable this model.", modelName, modelName, ) } // https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration const claudeCacheCreation1hMultiplier = 6 / 3.75 // HandleGroupRatio checks for "auto_group" in the context and updates the group ratio and relayInfo.UsingGroup if present func HandleGroupRatio(ctx *gin.Context, relayInfo *relaycommon.RelayInfo) types.GroupRatioInfo { groupRatioInfo := types.GroupRatioInfo{ GroupRatio: 1.0, // default ratio GroupSpecialRatio: -1, } // check auto group autoGroup, exists := ctx.Get("auto_group") if exists { logger.LogDebug(ctx, fmt.Sprintf("final group: %s", autoGroup)) relayInfo.UsingGroup = autoGroup.(string) } // check user group special ratio userGroupRatio, ok := ratio_setting.GetGroupGroupRatio(relayInfo.UserGroup, relayInfo.UsingGroup) if ok { // user group special ratio groupRatioInfo.GroupSpecialRatio = userGroupRatio groupRatioInfo.GroupRatio = userGroupRatio groupRatioInfo.HasSpecialRatio = true } else { // normal group ratio groupRatioInfo.GroupRatio = ratio_setting.GetGroupRatio(relayInfo.UsingGroup) } return groupRatioInfo } func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens int, meta *types.TokenCountMeta) (types.PriceData, error) { modelPrice, usePrice := ratio_setting.GetModelPrice(info.OriginModelName, false) groupRatioInfo := HandleGroupRatio(c, info) // Check if this model uses tiered_expr billing if billing_setting.GetBillingMode(info.OriginModelName) == billing_setting.BillingModeTieredExpr { return modelPriceHelperTiered(c, info, promptTokens, meta, groupRatioInfo) } var preConsumedQuota int var modelRatio float64 var completionRatio float64 var cacheRatio float64 var imageRatio float64 var cacheCreationRatio float64 var cacheCreationRatio5m float64 var cacheCreationRatio1h float64 var audioRatio float64 var audioCompletionRatio float64 var freeModel bool if !usePrice { preConsumedTokens := common.Max(promptTokens, common.PreConsumedQuota) if meta.MaxTokens != 0 { preConsumedTokens += meta.MaxTokens } var success bool var matchName string modelRatio, success, matchName = ratio_setting.GetModelRatio(info.OriginModelName) if !success { acceptUnsetRatio := false if info.UserSetting.AcceptUnsetRatioModel { acceptUnsetRatio = true } if !acceptUnsetRatio { return types.PriceData{}, modelPriceNotConfiguredError(matchName, info.UserId) } } completionRatio = ratio_setting.GetCompletionRatio(info.OriginModelName) cacheRatio, _ = ratio_setting.GetCacheRatio(info.OriginModelName) cacheCreationRatio, _ = ratio_setting.GetCreateCacheRatio(info.OriginModelName) cacheCreationRatio5m = cacheCreationRatio // 固定1h和5min缓存写入价格的比例 cacheCreationRatio1h = cacheCreationRatio * claudeCacheCreation1hMultiplier imageRatio, _ = ratio_setting.GetImageRatio(info.OriginModelName) audioRatio = ratio_setting.GetAudioRatio(info.OriginModelName) audioCompletionRatio = ratio_setting.GetAudioCompletionRatio(info.OriginModelName) ratio := modelRatio * groupRatioInfo.GroupRatio preConsumedQuota = int(float64(preConsumedTokens) * ratio) } else { if meta.ImagePriceRatio != 0 { modelPrice = modelPrice * meta.ImagePriceRatio } preConsumedQuota = int(modelPrice * common.QuotaPerUnit * groupRatioInfo.GroupRatio) } // check if free model pre-consume is disabled if !operation_setting.GetQuotaSetting().EnableFreeModelPreConsume { // if model price or ratio is 0, do not pre-consume quota if groupRatioInfo.GroupRatio == 0 { preConsumedQuota = 0 freeModel = true } else if usePrice { if modelPrice == 0 { preConsumedQuota = 0 freeModel = true } } else { if modelRatio == 0 { preConsumedQuota = 0 freeModel = true } } } priceData := types.PriceData{ FreeModel: freeModel, ModelPrice: modelPrice, ModelRatio: modelRatio, CompletionRatio: completionRatio, GroupRatioInfo: groupRatioInfo, UsePrice: usePrice, CacheRatio: cacheRatio, ImageRatio: imageRatio, AudioRatio: audioRatio, AudioCompletionRatio: audioCompletionRatio, CacheCreationRatio: cacheCreationRatio, CacheCreation5mRatio: cacheCreationRatio5m, CacheCreation1hRatio: cacheCreationRatio1h, QuotaToPreConsume: preConsumedQuota, } if common.DebugEnabled { println(fmt.Sprintf("model_price_helper result: %s", priceData.ToSetting())) } info.PriceData = priceData return priceData, nil } // ModelPriceHelperPerCall 按次/按量计费的 PriceHelper (MJ、Task) func ModelPriceHelperPerCall(c *gin.Context, info *relaycommon.RelayInfo) (types.PriceData, error) { groupRatioInfo := HandleGroupRatio(c, info) modelPrice, success := ratio_setting.GetModelPrice(info.OriginModelName, true) usePrice := success var modelRatio float64 if !success { defaultPrice, ok := ratio_setting.GetDefaultModelPriceMap()[info.OriginModelName] if ok { modelPrice = defaultPrice usePrice = true } else { var ratioSuccess bool var matchName string modelRatio, ratioSuccess, matchName = ratio_setting.GetModelRatio(info.OriginModelName) acceptUnsetRatio := false if info.UserSetting.AcceptUnsetRatioModel { acceptUnsetRatio = true } if !ratioSuccess && !acceptUnsetRatio { return types.PriceData{}, modelPriceNotConfiguredError(matchName, info.UserId) } } } var quota int freeModel := false if usePrice { quota = int(modelPrice * common.QuotaPerUnit * groupRatioInfo.GroupRatio) if !operation_setting.GetQuotaSetting().EnableFreeModelPreConsume { if groupRatioInfo.GroupRatio == 0 || modelPrice == 0 { quota = 0 freeModel = true } } } else { // 按量计费:以模型倍率的一半作为预扣额度 quota = int(modelRatio / 2 * common.QuotaPerUnit * groupRatioInfo.GroupRatio) modelPrice = -1 if !operation_setting.GetQuotaSetting().EnableFreeModelPreConsume { if groupRatioInfo.GroupRatio == 0 || modelRatio == 0 { quota = 0 freeModel = true } } } priceData := types.PriceData{ FreeModel: freeModel, ModelPrice: modelPrice, ModelRatio: modelRatio, UsePrice: usePrice, Quota: quota, GroupRatioInfo: groupRatioInfo, } return priceData, nil } func HasModelBillingConfig(modelName string) bool { if _, ok := ratio_setting.GetModelPrice(modelName, false); ok { return true } if _, ok, _ := ratio_setting.GetModelRatio(modelName); ok { return true } if billing_setting.GetBillingMode(modelName) != billing_setting.BillingModeTieredExpr { return false } expr, ok := billing_setting.GetBillingExpr(modelName) return ok && strings.TrimSpace(expr) != "" } func modelPriceHelperTiered(c *gin.Context, info *relaycommon.RelayInfo, promptTokens int, meta *types.TokenCountMeta, groupRatioInfo types.GroupRatioInfo) (types.PriceData, error) { exprStr, ok := billing_setting.GetBillingExpr(info.OriginModelName) if !ok { return types.PriceData{}, fmt.Errorf("model %s is configured as tiered_expr but has no billing expression", info.OriginModelName) } estimatedCompletionTokens := 0 if meta.MaxTokens != 0 { estimatedCompletionTokens = meta.MaxTokens } requestInput, err := ResolveIncomingBillingExprRequestInput(c, info) if err != nil { return types.PriceData{}, err } rawCost, trace, err := billingexpr.RunExprWithRequest(exprStr, billingexpr.TokenParams{ P: float64(promptTokens), C: float64(estimatedCompletionTokens), Len: float64(promptTokens), }, requestInput) if err != nil { return types.PriceData{}, fmt.Errorf("model %s tiered expr run failed: %w", info.OriginModelName, err) } // Expression coefficients are $/1M tokens prices; convert to quota the same way per-call billing does. quotaBeforeGroup := rawCost / 1_000_000 * common.QuotaPerUnit preConsumedQuota := billingexpr.QuotaRound(quotaBeforeGroup * groupRatioInfo.GroupRatio) freeModel := false if !operation_setting.GetQuotaSetting().EnableFreeModelPreConsume { if groupRatioInfo.GroupRatio == 0 { preConsumedQuota = 0 freeModel = true } } exprHash := billingexpr.ExprHashString(exprStr) snapshot := &billingexpr.BillingSnapshot{ BillingMode: billing_setting.BillingModeTieredExpr, ModelName: info.OriginModelName, ExprString: exprStr, ExprHash: exprHash, GroupRatio: groupRatioInfo.GroupRatio, EstimatedPromptTokens: promptTokens, EstimatedCompletionTokens: estimatedCompletionTokens, EstimatedQuotaBeforeGroup: quotaBeforeGroup, EstimatedQuotaAfterGroup: preConsumedQuota, EstimatedTier: trace.MatchedTier, QuotaPerUnit: common.QuotaPerUnit, ExprVersion: billingexpr.ExprVersion(exprStr), } info.TieredBillingSnapshot = snapshot info.BillingRequestInput = &requestInput priceData := types.PriceData{ FreeModel: freeModel, GroupRatioInfo: groupRatioInfo, QuotaToPreConsume: preConsumedQuota, } if common.DebugEnabled { println(fmt.Sprintf("model_price_helper_tiered result: model=%s preConsume=%d quotaBeforeGroup=%.2f groupRatio=%.2f tier=%s", info.OriginModelName, preConsumedQuota, quotaBeforeGroup, groupRatioInfo.GroupRatio, trace.MatchedTier)) } info.PriceData = priceData return priceData, nil }