Jelajahi Sumber

feat: collect model performance metrics (#4635)

Calcium-Ion 1 Minggu lalu
induk
melakukan
9acf5fecae

+ 46 - 0
controller/perf_metrics.go

@@ -0,0 +1,46 @@
+package controller
+
+import (
+	"net/http"
+	"strconv"
+
+	perfmetrics "github.com/QuantumNous/new-api/pkg/perf_metrics"
+
+	"github.com/gin-gonic/gin"
+)
+
+func GetPerfMetrics(c *gin.Context) {
+	modelName := c.Query("model")
+	if modelName == "" {
+		c.JSON(http.StatusBadRequest, gin.H{
+			"success": false,
+			"message": "model is required",
+		})
+		return
+	}
+
+	hours := 24
+	if rawHours := c.Query("hours"); rawHours != "" {
+		if parsed, err := strconv.Atoi(rawHours); err == nil {
+			hours = parsed
+		}
+	}
+
+	result, err := perfmetrics.Query(perfmetrics.QueryParams{
+		Model: modelName,
+		Group: c.Query("group"),
+		Hours: hours,
+	})
+	if err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{
+			"success": false,
+			"message": err.Error(),
+		})
+		return
+	}
+
+	c.JSON(http.StatusOK, gin.H{
+		"success": true,
+		"data":    result,
+	})
+}

+ 6 - 0
controller/relay.go

@@ -15,6 +15,7 @@ import (
 	"github.com/QuantumNous/new-api/logger"
 	"github.com/QuantumNous/new-api/middleware"
 	"github.com/QuantumNous/new-api/model"
+	perfmetrics "github.com/QuantumNous/new-api/pkg/perf_metrics"
 	"github.com/QuantumNous/new-api/relay"
 	relaycommon "github.com/QuantumNous/new-api/relay/common"
 	relayconstant "github.com/QuantumNous/new-api/relay/constant"
@@ -239,6 +240,11 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) {
 		retryLogStr := fmt.Sprintf("重试:%s", strings.Trim(strings.Join(strings.Fields(fmt.Sprint(useChannel)), "->"), "[]"))
 		logger.LogInfo(c, retryLogStr)
 	}
+	if newAPIError != nil {
+		gopool.Go(func() {
+			perfmetrics.RecordRelaySample(relayInfo, false)
+		})
+	}
 }
 
 var upgrader = websocket.Upgrader{

+ 3 - 0
main.go

@@ -19,6 +19,7 @@ import (
 	"github.com/QuantumNous/new-api/middleware"
 	"github.com/QuantumNous/new-api/model"
 	"github.com/QuantumNous/new-api/oauth"
+	perfmetrics "github.com/QuantumNous/new-api/pkg/perf_metrics"
 	"github.com/QuantumNous/new-api/relay"
 	"github.com/QuantumNous/new-api/router"
 	"github.com/QuantumNous/new-api/service"
@@ -306,6 +307,8 @@ func InitResources() error {
 		return err
 	}
 
+	perfmetrics.Init()
+
 	// 启动系统监控
 	common.StartSystemMonitor()
 

+ 2 - 0
model/main.go

@@ -280,6 +280,7 @@ func migrateDB() error {
 		&SubscriptionPreConsumeRecord{},
 		&CustomOAuthProvider{},
 		&UserOAuthBinding{},
+		&PerfMetric{},
 	)
 	if err != nil {
 		return err
@@ -328,6 +329,7 @@ func migrateDBFast() error {
 		{&SubscriptionPreConsumeRecord{}, "SubscriptionPreConsumeRecord"},
 		{&CustomOAuthProvider{}, "CustomOAuthProvider"},
 		{&UserOAuthBinding{}, "UserOAuthBinding"},
+		{&PerfMetric{}, "PerfMetric"},
 	}
 	// 动态计算migration数量,确保errChan缓冲区足够大
 	errChan := make(chan error, len(migrations))

+ 70 - 0
model/perf_metric.go

@@ -0,0 +1,70 @@
+package model
+
+import (
+	"time"
+
+	"gorm.io/gorm"
+	"gorm.io/gorm/clause"
+)
+
+// PerfMetric stores aggregated relay performance metrics for the model square.
+type PerfMetric struct {
+	Id             int    `json:"id" gorm:"primaryKey"`
+	ModelName      string `json:"model_name" gorm:"size:128;uniqueIndex:idx_perf_model_group_bucket,priority:1"`
+	Group          string `json:"group" gorm:"column:group;size:64;uniqueIndex:idx_perf_model_group_bucket,priority:2"`
+	BucketTs       int64  `json:"bucket_ts" gorm:"uniqueIndex:idx_perf_model_group_bucket,priority:3;index:idx_perf_bucket_ts"`
+	RequestCount   int64  `json:"request_count" gorm:"default:0"`
+	SuccessCount   int64  `json:"success_count" gorm:"default:0"`
+	TotalLatencyMs int64  `json:"total_latency_ms" gorm:"default:0"`
+	TtftSumMs      int64  `json:"ttft_sum_ms" gorm:"default:0"`
+	TtftCount      int64  `json:"ttft_count" gorm:"default:0"`
+}
+
+func (PerfMetric) TableName() string {
+	return "perf_metrics"
+}
+
+func UpsertPerfMetric(metric *PerfMetric) error {
+	if metric == nil || metric.RequestCount == 0 {
+		return nil
+	}
+	return DB.Clauses(clause.OnConflict{
+		Columns: []clause.Column{
+			{Name: "model_name"},
+			{Name: "group"},
+			{Name: "bucket_ts"},
+		},
+		DoUpdates: clause.Assignments(map[string]interface{}{
+			"request_count":    gorm.Expr("request_count + ?", metric.RequestCount),
+			"success_count":    gorm.Expr("success_count + ?", metric.SuccessCount),
+			"total_latency_ms": gorm.Expr("total_latency_ms + ?", metric.TotalLatencyMs),
+			"ttft_sum_ms":      gorm.Expr("ttft_sum_ms + ?", metric.TtftSumMs),
+			"ttft_count":       gorm.Expr("ttft_count + ?", metric.TtftCount),
+		}),
+	}).Create(metric).Error
+}
+
+func GetPerfMetrics(modelName string, group string, startTs int64, endTs int64) ([]PerfMetric, error) {
+	var metrics []PerfMetric
+	query := DB.Model(&PerfMetric{}).
+		Where("model_name = ? AND bucket_ts >= ? AND bucket_ts <= ?", modelName, startTs, endTs)
+	if group != "" {
+		query = query.Where(commonGroupCol+" = ?", group)
+	}
+	err := query.Order("bucket_ts ASC").Find(&metrics).Error
+	return metrics, err
+}
+
+func DeletePerfMetricsBefore(cutoffTs int64) error {
+	if cutoffTs <= 0 {
+		return nil
+	}
+	return DB.Where("bucket_ts < ?", cutoffTs).Delete(&PerfMetric{}).Error
+}
+
+func PerfMetricStartTime(hours int) int64 {
+	if hours <= 0 {
+		hours = 24
+	}
+	return time.Now().Add(-time.Duration(hours) * time.Hour).Unix()
+}

+ 94 - 0
pkg/perf_metrics/flush.go

@@ -0,0 +1,94 @@
+package perfmetrics
+
+import (
+	"fmt"
+	"strconv"
+	"time"
+
+	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/model"
+	"github.com/QuantumNous/new-api/setting/perf_metrics_setting"
+)
+
+func flushLoop() {
+	for {
+		interval := perf_metrics_setting.GetFlushIntervalMinutes()
+		time.Sleep(time.Duration(interval) * time.Minute)
+		setting := perf_metrics_setting.GetSetting()
+		if !setting.Enabled {
+			continue
+		}
+		flushCompletedBuckets()
+		cleanupExpiredMetrics(setting.RetentionDays)
+	}
+}
+
+func flushCompletedBuckets() {
+	currentBucket := bucketStart(time.Now().Unix())
+	hotBuckets.Range(func(key, value any) bool {
+		k := key.(bucketKey)
+		if k.bucketTs >= currentBucket {
+			return true
+		}
+
+		bucket := value.(*atomicBucket)
+		drained := bucket.drain()
+		if drained.requestCount == 0 {
+			deleteOldEmptyBucket(k, key)
+			return true
+		}
+
+		err := model.UpsertPerfMetric(&model.PerfMetric{
+			ModelName:      k.model,
+			Group:          k.group,
+			BucketTs:       k.bucketTs,
+			RequestCount:   drained.requestCount,
+			SuccessCount:   drained.successCount,
+			TotalLatencyMs: drained.totalLatencyMs,
+			TtftSumMs:      drained.ttftSumMs,
+			TtftCount:      drained.ttftCount,
+		})
+		if err != nil {
+			bucket.addCounters(drained)
+			common.SysError(fmt.Sprintf("failed to flush perf metric bucket model=%s group=%s bucket=%d: %s", k.model, k.group, k.bucketTs, err.Error()))
+			return true
+		}
+
+		deleteOldEmptyBucket(k, key)
+		return true
+	})
+}
+
+func deleteOldEmptyBucket(k bucketKey, rawKey any) {
+	if k.bucketTs < bucketStart(time.Now().Add(-24*time.Hour).Unix()) {
+		hotBuckets.Delete(rawKey)
+	}
+}
+
+func cleanupExpiredMetrics(retentionDays int) {
+	if retentionDays <= 0 {
+		return
+	}
+	cutoff := time.Now().Add(-time.Duration(retentionDays) * 24 * time.Hour).Unix()
+	if err := model.DeletePerfMetricsBefore(cutoff); err != nil {
+		common.SysError("failed to cleanup expired perf metrics: " + err.Error())
+	}
+}
+
+func redisCounters(values map[string]string) counters {
+	return counters{
+		requestCount:   parseRedisInt(values["req"]),
+		successCount:   parseRedisInt(values["ok"]),
+		totalLatencyMs: parseRedisInt(values["lat"]),
+		ttftSumMs:      parseRedisInt(values["ttft"]),
+		ttftCount:      parseRedisInt(values["ttft_n"]),
+	}
+}
+
+func parseRedisInt(value string) int64 {
+	if value == "" {
+		return 0
+	}
+	parsed, _ := strconv.ParseInt(value, 10, 64)
+	return parsed
+}

+ 261 - 0
pkg/perf_metrics/metrics.go

@@ -0,0 +1,261 @@
+package perfmetrics
+
+import (
+	"context"
+	"fmt"
+	"sort"
+	"sync"
+	"time"
+
+	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/model"
+	relaycommon "github.com/QuantumNous/new-api/relay/common"
+	"github.com/QuantumNous/new-api/setting/perf_metrics_setting"
+)
+
+var hotBuckets sync.Map
+
+const seriesSchema = "dbcd0a3c01b55203"
+
+func Init() {
+	go flushLoop()
+}
+
+func RecordRelaySample(info *relaycommon.RelayInfo, success bool) {
+	if info == nil {
+		return
+	}
+	now := time.Now()
+	hasTtft := info.IsStream && info.HasSendResponse()
+	ttftMs := int64(0)
+	if hasTtft {
+		ttftMs = info.FirstResponseTime.Sub(info.StartTime).Milliseconds()
+	}
+	Record(Sample{
+		Model:     info.OriginModelName,
+		Group:     info.UsingGroup,
+		LatencyMs: now.Sub(info.StartTime).Milliseconds(),
+		TtftMs:    ttftMs,
+		HasTtft:   hasTtft,
+		Success:   success,
+	})
+}
+
+func Record(sample Sample) {
+	setting := perf_metrics_setting.GetSetting()
+	if !setting.Enabled || sample.Model == "" {
+		return
+	}
+	if sample.Group == "" {
+		sample.Group = "default"
+	}
+	if sample.LatencyMs < 0 {
+		sample.LatencyMs = 0
+	}
+
+	key := bucketKey{
+		model:    sample.Model,
+		group:    sample.Group,
+		bucketTs: bucketStart(time.Now().Unix()),
+	}
+	actual, _ := hotBuckets.LoadOrStore(key, &atomicBucket{})
+	actual.(*atomicBucket).add(sample)
+	recordRedis(key, sample)
+}
+
+func Query(params QueryParams) (QueryResult, error) {
+	if params.Hours <= 0 {
+		params.Hours = 24
+	}
+	if params.Hours > 24*30 {
+		params.Hours = 24 * 30
+	}
+	endTs := time.Now().Unix()
+	startTs := endTs - int64(params.Hours)*3600
+
+	merged := map[bucketKey]counters{}
+	rows, err := model.GetPerfMetrics(params.Model, params.Group, startTs, endTs)
+	if err != nil {
+		return QueryResult{}, err
+	}
+	for _, row := range rows {
+		mergeCounters(merged, bucketKey{
+			model:    row.ModelName,
+			group:    row.Group,
+			bucketTs: row.BucketTs,
+		}, counters{
+			requestCount:   row.RequestCount,
+			successCount:   row.SuccessCount,
+			totalLatencyMs: row.TotalLatencyMs,
+			ttftSumMs:      row.TtftSumMs,
+			ttftCount:      row.TtftCount,
+		})
+	}
+
+	hotBuckets.Range(func(key, value any) bool {
+		k := key.(bucketKey)
+		if k.model != params.Model || k.bucketTs < startTs || k.bucketTs > endTs {
+			return true
+		}
+		if params.Group != "" && k.group != params.Group {
+			return true
+		}
+		mergeCounters(merged, k, value.(*atomicBucket).snapshot())
+		return true
+	})
+
+	return buildQueryResult(params.Model, merged), nil
+}
+
+func bucketStart(ts int64) int64 {
+	bucketSeconds := perf_metrics_setting.GetBucketSeconds()
+	if bucketSeconds <= 0 {
+		bucketSeconds = 3600
+	}
+	return ts - (ts % bucketSeconds)
+}
+
+func mergeCounters(merged map[bucketKey]counters, key bucketKey, value counters) {
+	if value.requestCount == 0 {
+		return
+	}
+	current := merged[key]
+	current.requestCount += value.requestCount
+	current.successCount += value.successCount
+	current.totalLatencyMs += value.totalLatencyMs
+	current.ttftSumMs += value.ttftSumMs
+	current.ttftCount += value.ttftCount
+	merged[key] = current
+}
+
+func buildQueryResult(modelName string, merged map[bucketKey]counters) QueryResult {
+	groupBuckets := map[string]map[int64]counters{}
+	for key, value := range merged {
+		if value.requestCount == 0 {
+			continue
+		}
+		if _, ok := groupBuckets[key.group]; !ok {
+			groupBuckets[key.group] = map[int64]counters{}
+		}
+		groupBuckets[key.group][key.bucketTs] = value
+	}
+
+	groups := make([]string, 0, len(groupBuckets))
+	for group := range groupBuckets {
+		groups = append(groups, group)
+	}
+	sort.Strings(groups)
+
+	results := make([]GroupResult, 0, len(groups))
+	for _, group := range groups {
+		buckets := groupBuckets[group]
+		timestamps := make([]int64, 0, len(buckets))
+		for ts := range buckets {
+			timestamps = append(timestamps, ts)
+		}
+		sort.Slice(timestamps, func(i, j int) bool {
+			return timestamps[i] < timestamps[j]
+		})
+
+		total := counters{}
+		series := make([]BucketPoint, 0, len(timestamps))
+		for _, ts := range timestamps {
+			value := buckets[ts]
+			total.requestCount += value.requestCount
+			total.successCount += value.successCount
+			total.totalLatencyMs += value.totalLatencyMs
+			total.ttftSumMs += value.ttftSumMs
+			total.ttftCount += value.ttftCount
+			series = append(series, bucketPoint(ts, value))
+		}
+
+		results = append(results, GroupResult{
+			Group:        group,
+			AvgTtftMs:    avg(total.ttftSumMs, total.ttftCount),
+			AvgLatencyMs: avg(total.totalLatencyMs, total.requestCount),
+			SuccessRate:  successRate(total),
+			RequestCount: total.requestCount,
+			SuccessCount: total.successCount,
+			TtftCount:    total.ttftCount,
+			Series:       series,
+		})
+	}
+
+	return QueryResult{
+		ModelName:    modelName,
+		SeriesSchema: seriesSchema,
+		Groups:       results,
+	}
+}
+
+func bucketPoint(ts int64, value counters) BucketPoint {
+	return BucketPoint{
+		Ts:           ts,
+		AvgTtftMs:    avg(value.ttftSumMs, value.ttftCount),
+		AvgLatencyMs: avg(value.totalLatencyMs, value.requestCount),
+		SuccessRate:  successRate(value),
+		Count:        value.requestCount,
+		SuccessCount: value.successCount,
+		TtftCount:    value.ttftCount,
+	}
+}
+
+func avg(sum int64, count int64) int64 {
+	if count <= 0 {
+		return 0
+	}
+	return sum / count
+}
+
+func successRate(value counters) float64 {
+	if value.requestCount <= 0 {
+		return 0
+	}
+	return float64(value.successCount) / float64(value.requestCount) * 100
+}
+
+func recordRedis(key bucketKey, sample Sample) {
+	if !common.RedisEnabled || common.RDB == nil {
+		return
+	}
+	ctx, cancel := context.WithTimeout(context.Background(), time.Second)
+	defer cancel()
+
+	redisKey := redisBucketKey(key)
+	pipe := common.RDB.TxPipeline()
+	pipe.HIncrBy(ctx, redisKey, "req", 1)
+	if sample.Success {
+		pipe.HIncrBy(ctx, redisKey, "ok", 1)
+	}
+	if sample.LatencyMs > 0 {
+		pipe.HIncrBy(ctx, redisKey, "lat", sample.LatencyMs)
+	}
+	if sample.HasTtft && sample.TtftMs >= 0 {
+		pipe.HIncrBy(ctx, redisKey, "ttft", sample.TtftMs)
+		pipe.HIncrBy(ctx, redisKey, "ttft_n", 1)
+	}
+	pipe.Expire(ctx, redisKey, time.Hour)
+	_, _ = pipe.Exec(ctx)
+}
+
+func mergeRedisActiveBuckets(merged map[bucketKey]counters, params QueryParams, startTs int64, endTs int64) {
+	if !common.RedisEnabled || common.RDB == nil || params.Model == "" || params.Group == "" {
+		return
+	}
+	active := bucketStart(time.Now().Unix())
+	if active < startTs || active > endTs {
+		return
+	}
+	key := bucketKey{model: params.Model, group: params.Group, bucketTs: active}
+	ctx, cancel := context.WithTimeout(context.Background(), time.Second)
+	defer cancel()
+	values, err := common.RDB.HGetAll(ctx, redisBucketKey(key)).Result()
+	if err != nil || len(values) == 0 {
+		return
+	}
+	mergeCounters(merged, key, redisCounters(values))
+}
+
+func redisBucketKey(key bucketKey) string {
+	return fmt.Sprintf("perf:%s:%s:%d", key.model, key.group, key.bucketTs)
+}

+ 124 - 0
pkg/perf_metrics/types.go

@@ -0,0 +1,124 @@
+package perfmetrics
+
+import "sync/atomic"
+
+type Store interface {
+	Record(sample Sample)
+	Query(params QueryParams) (QueryResult, error)
+}
+
+type Sample struct {
+	Model     string
+	Group     string
+	LatencyMs int64
+	TtftMs    int64
+	HasTtft   bool
+	Success   bool
+}
+
+type QueryParams struct {
+	Model string
+	Group string
+	Hours int
+}
+
+type BucketPoint struct {
+	Ts           int64   `json:"ts"`
+	AvgTtftMs    int64   `json:"avg_ttft_ms"`
+	AvgLatencyMs int64   `json:"avg_latency_ms"`
+	SuccessRate  float64 `json:"success_rate"`
+	Count        int64   `json:"count"`
+	SuccessCount int64   `json:"success_count"`
+	TtftCount    int64   `json:"ttft_count"`
+}
+
+type GroupResult struct {
+	Group        string        `json:"group"`
+	AvgTtftMs    int64         `json:"avg_ttft_ms"`
+	AvgLatencyMs int64         `json:"avg_latency_ms"`
+	SuccessRate  float64       `json:"success_rate"`
+	RequestCount int64         `json:"request_count"`
+	SuccessCount int64         `json:"success_count"`
+	TtftCount    int64         `json:"ttft_count"`
+	Series       []BucketPoint `json:"series"`
+}
+
+type QueryResult struct {
+	ModelName    string        `json:"model_name"`
+	SeriesSchema string        `json:"series_schema"`
+	Groups       []GroupResult `json:"groups"`
+}
+
+type bucketKey struct {
+	model    string
+	group    string
+	bucketTs int64
+}
+
+type counters struct {
+	requestCount   int64
+	successCount   int64
+	totalLatencyMs int64
+	ttftSumMs      int64
+	ttftCount      int64
+}
+
+type atomicBucket struct {
+	requestCount   atomic.Int64
+	successCount   atomic.Int64
+	totalLatencyMs atomic.Int64
+	ttftSumMs      atomic.Int64
+	ttftCount      atomic.Int64
+}
+
+func (b *atomicBucket) add(sample Sample) {
+	b.requestCount.Add(1)
+	if sample.Success {
+		b.successCount.Add(1)
+	}
+	if sample.LatencyMs > 0 {
+		b.totalLatencyMs.Add(sample.LatencyMs)
+	}
+	if sample.HasTtft && sample.TtftMs >= 0 {
+		b.ttftSumMs.Add(sample.TtftMs)
+		b.ttftCount.Add(1)
+	}
+}
+
+func (b *atomicBucket) snapshot() counters {
+	return counters{
+		requestCount:   b.requestCount.Load(),
+		successCount:   b.successCount.Load(),
+		totalLatencyMs: b.totalLatencyMs.Load(),
+		ttftSumMs:      b.ttftSumMs.Load(),
+		ttftCount:      b.ttftCount.Load(),
+	}
+}
+
+func (b *atomicBucket) drain() counters {
+	return counters{
+		requestCount:   b.requestCount.Swap(0),
+		successCount:   b.successCount.Swap(0),
+		totalLatencyMs: b.totalLatencyMs.Swap(0),
+		ttftSumMs:      b.ttftSumMs.Swap(0),
+		ttftCount:      b.ttftCount.Swap(0),
+	}
+}
+
+func (b *atomicBucket) addCounters(c counters) {
+	if c.requestCount != 0 {
+		b.requestCount.Add(c.requestCount)
+	}
+	if c.successCount != 0 {
+		b.successCount.Add(c.successCount)
+	}
+	if c.totalLatencyMs != 0 {
+		b.totalLatencyMs.Add(c.totalLatencyMs)
+	}
+	if c.ttftSumMs != 0 {
+		b.ttftSumMs.Add(c.ttftSumMs)
+	}
+	if c.ttftCount != 0 {
+		b.ttftCount.Add(c.ttftCount)
+	}
+}

+ 1 - 0
router/api-router.go

@@ -31,6 +31,7 @@ func SetApiRouter(router *gin.Engine) {
 		//apiRouter.GET("/midjourney", controller.GetMidjourney)
 		apiRouter.GET("/home_page_content", controller.GetHomePageContent)
 		apiRouter.GET("/pricing", middleware.TryUserAuth(), controller.GetPricing)
+		apiRouter.GET("/perf-metrics", middleware.TryUserAuth(), controller.GetPerfMetrics)
 		apiRouter.GET("/verification", middleware.EmailVerificationRateLimit(), middleware.TurnstileCheck(), controller.SendEmailVerification)
 		apiRouter.GET("/reset_password", middleware.CriticalRateLimit(), middleware.TurnstileCheck(), controller.SendPasswordResetEmail)
 		apiRouter.POST("/user/reset", middleware.CriticalRateLimit(), controller.ResetPassword)

+ 6 - 2
service/quota.go

@@ -14,6 +14,7 @@ import (
 	"github.com/QuantumNous/new-api/logger"
 	"github.com/QuantumNous/new-api/model"
 	"github.com/QuantumNous/new-api/pkg/billingexpr"
+	perfmetrics "github.com/QuantumNous/new-api/pkg/perf_metrics"
 	relaycommon "github.com/QuantumNous/new-api/relay/common"
 	"github.com/QuantumNous/new-api/setting/ratio_setting"
 	"github.com/QuantumNous/new-api/setting/system_setting"
@@ -219,7 +220,7 @@ func PostWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, mod
 		// in this case, must be some error happened
 		// we cannot just return, because we may have to return the pre-consumed quota
 		quota = 0
-		logContent += fmt.Sprintf("(可能是上游超时)")
+		logContent += "(可能是上游超时)"
 		logger.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
 			"tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, modelName, relayInfo.FinalPreConsumedQuota))
 	} else {
@@ -340,7 +341,7 @@ func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, u
 		// in this case, must be some error happened
 		// we cannot just return, because we may have to return the pre-consumed quota
 		quota = 0
-		logContent += fmt.Sprintf("(可能是上游超时)")
+		logContent += "(可能是上游超时)"
 		logger.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
 			"tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, relayInfo.OriginModelName, relayInfo.FinalPreConsumedQuota))
 	} else {
@@ -375,6 +376,9 @@ func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, u
 		Group:            relayInfo.UsingGroup,
 		Other:            other,
 	})
+	gopool.Go(func() {
+		perfmetrics.RecordRelaySample(relayInfo, true)
+	})
 }
 
 func PreConsumeTokenQuota(relayInfo *relaycommon.RelayInfo, quota int) error {

+ 5 - 0
service/text_quota.go

@@ -11,10 +11,12 @@ import (
 	"github.com/QuantumNous/new-api/logger"
 	"github.com/QuantumNous/new-api/model"
 	"github.com/QuantumNous/new-api/pkg/billingexpr"
+	perfmetrics "github.com/QuantumNous/new-api/pkg/perf_metrics"
 	relaycommon "github.com/QuantumNous/new-api/relay/common"
 	"github.com/QuantumNous/new-api/setting/operation_setting"
 	"github.com/QuantumNous/new-api/types"
 
+	"github.com/bytedance/gopkg/util/gopool"
 	"github.com/gin-gonic/gin"
 	"github.com/shopspring/decimal"
 )
@@ -471,4 +473,7 @@ func PostTextConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, us
 		Group:            relayInfo.UsingGroup,
 		Other:            other,
 	})
+	gopool.Go(func() {
+		perfmetrics.RecordRelaySample(relayInfo, true)
+	})
 }

+ 45 - 0
setting/perf_metrics_setting/config.go

@@ -0,0 +1,45 @@
+package perf_metrics_setting
+
+import "github.com/QuantumNous/new-api/setting/config"
+
+type PerfMetricsSetting struct {
+	Enabled       bool   `json:"enabled"`
+	FlushInterval int    `json:"flush_interval"`
+	BucketTime    string `json:"bucket_time"`
+	RetentionDays int    `json:"retention_days"`
+}
+
+var perfMetricsSetting = PerfMetricsSetting{
+	Enabled:       true,
+	FlushInterval: 5,
+	BucketTime:    "hour",
+	RetentionDays: 0,
+}
+
+func init() {
+	config.GlobalConfig.Register("perf_metrics_setting", &perfMetricsSetting)
+}
+
+func GetSetting() PerfMetricsSetting {
+	return perfMetricsSetting
+}
+
+func GetBucketSeconds() int64 {
+	switch perfMetricsSetting.BucketTime {
+	case "minute":
+		return 60
+	case "5min":
+		return 300
+	case "hour":
+		return 3600
+	default:
+		return 3600
+	}
+}
+
+func GetFlushIntervalMinutes() int {
+	if perfMetricsSetting.FlushInterval < 1 {
+		return 1
+	}
+	return perfMetricsSetting.FlushInterval
+}

+ 43 - 0
web/default/src/features/pricing/api.ts

@@ -10,3 +10,46 @@ export async function getPricing(): Promise<PricingData> {
   const res = await api.get('/api/pricing')
   return res.data
 }
+
+export type PerformanceSeriesPoint = {
+  ts: number
+  avg_ttft_ms: number
+  avg_latency_ms: number
+  success_rate: number
+  count: number
+  success_count: number
+  ttft_count: number
+}
+
+export type PerformanceGroup = {
+  group: string
+  avg_ttft_ms: number
+  avg_latency_ms: number
+  success_rate: number
+  request_count: number
+  success_count: number
+  ttft_count: number
+  series: PerformanceSeriesPoint[]
+}
+
+export type PerformanceMetricsData = {
+  success: boolean
+  message?: string
+  data: {
+    model_name: string
+    series_schema?: string
+    groups: PerformanceGroup[]
+  }
+}
+
+export async function getPerfMetrics(
+  modelName: string,
+  hours = 24
+): Promise<PerformanceMetricsData> {
+  const params = new URLSearchParams({
+    model: modelName,
+    hours: String(hours),
+  })
+  const res = await api.get(`/api/perf-metrics?${params.toString()}`)
+  return res.data
+}

+ 7 - 0
web/default/src/features/pricing/components/model-details-charts.tsx

@@ -14,6 +14,13 @@ function formatHourLabel(iso: string): string {
 
 function formatDayLabel(date: string): string {
   const parsed = new Date(date)
+  if (date.includes('T')) {
+    return parsed.toLocaleString(undefined, {
+      month: 'short',
+      day: 'numeric',
+      hour: '2-digit',
+    })
+  }
   return parsed.toLocaleDateString(undefined, {
     month: 'short',
     day: 'numeric',

+ 133 - 110
web/default/src/features/pricing/components/model-details-performance.tsx

@@ -1,8 +1,8 @@
 import { useMemo } from 'react'
+import { useQuery } from '@tanstack/react-query'
 import {
   Activity,
   AlertTriangle,
-  Gauge,
   HeartPulse,
   Timer,
   TrendingUp,
@@ -18,22 +18,14 @@ import {
   TableRow,
 } from '@/components/ui/table'
 import { GroupBadge } from '@/components/group-badge'
+import { getPerfMetrics, type PerformanceGroup } from '../api'
 import {
-  aggregateUptime,
-  buildGroupPerformance,
-  buildLatencyTimeSeries,
-  buildUptimeSeries,
   formatLatency,
-  formatThroughput,
   formatUptimePct,
   type UptimeDayPoint,
 } from '../lib/mock-stats'
 import type { PricingModel } from '../types'
-import {
-  LatencyTrendChart,
-  ThroughputBarChart,
-  UptimeBarChart,
-} from './model-details-charts'
+import { LatencyTrendChart, UptimeBarChart } from './model-details-charts'
 import { UptimeSparkline } from './model-details-uptime-sparkline'
 
 const COMPACT_NUMBER = new Intl.NumberFormat(undefined, {
@@ -74,33 +66,102 @@ function StatCard(props: {
   )
 }
 
+type PerformanceRow = {
+  group: string
+  avg_ttft_ms: number
+  avg_latency_ms: number
+  success_rate: number
+  request_count: number
+}
+
+function toLatencySeries(groups: PerformanceGroup[]) {
+  return groups.flatMap((group) =>
+    group.series
+      .filter((point) => point.ttft_count > 0 && point.avg_ttft_ms > 0)
+      .map((point) => ({
+        timestamp: new Date(point.ts * 1000).toISOString(),
+        group: group.group,
+        ttft_ms: point.avg_ttft_ms,
+      }))
+  )
+}
+
+function toUptimeSeries(groups: PerformanceGroup[]): UptimeDayPoint[] {
+  const byTs = new Map<number, { count: number; success: number }>()
+  for (const group of groups) {
+    for (const point of group.series) {
+      const current = byTs.get(point.ts) ?? { count: 0, success: 0 }
+      current.count += point.count
+      current.success += point.success_count
+      byTs.set(point.ts, current)
+    }
+  }
+  return Array.from(byTs.entries())
+    .sort(([a], [b]) => a - b)
+    .map(([ts, value]) => {
+      const uptime = value.count > 0 ? (value.success / value.count) * 100 : 0
+      return {
+        date: new Date(ts * 1000).toISOString(),
+        uptime_pct: Math.round(uptime * 100) / 100,
+        incidents: value.success < value.count ? 1 : 0,
+        outage_minutes: 0,
+      }
+    })
+}
+
+function toGroupUptimeSeries(group: PerformanceGroup): UptimeDayPoint[] {
+  return group.series.map((point) => ({
+    date: new Date(point.ts * 1000).toISOString(),
+    uptime_pct: Math.round(point.success_rate * 100) / 100,
+    incidents: point.success_count < point.count ? 1 : 0,
+    outage_minutes: 0,
+  }))
+}
+
+function weightedAverage(
+  rows: PerformanceRow[],
+  field: 'avg_ttft_ms' | 'avg_latency_ms'
+): number {
+  let total = 0
+  let count = 0
+  for (const row of rows) {
+    if (row[field] <= 0 || row.request_count <= 0) continue
+    total += row[field] * row.request_count
+    count += row.request_count
+  }
+  return count > 0 ? Math.round(total / count) : 0
+}
+
 export function ModelDetailsPerformance(props: { model: PricingModel }) {
   const { t } = useTranslation()
-  const performances = useMemo(
-    () => buildGroupPerformance(props.model),
-    [props.model]
-  )
-  const latencySeries = useMemo(
-    () => buildLatencyTimeSeries(props.model),
-    [props.model]
-  )
-  const uptimeSeries = useMemo(
-    () => buildUptimeSeries(props.model),
-    [props.model]
-  )
-  const aggregated = useMemo(
-    () => aggregateUptime(uptimeSeries),
-    [uptimeSeries]
+  const metricsQuery = useQuery({
+    queryKey: ['perf-metrics', props.model.model_name],
+    queryFn: () => getPerfMetrics(props.model.model_name, 24),
+    staleTime: 60 * 1000,
+  })
+  const groups = metricsQuery.data?.data.groups ?? []
+  const performances = useMemo<PerformanceRow[]>(
+    () =>
+      groups.map((group) => ({
+        group: group.group,
+        avg_ttft_ms: group.avg_ttft_ms,
+        avg_latency_ms: group.avg_latency_ms,
+        success_rate: group.success_rate,
+        request_count: group.request_count,
+      })),
+    [groups]
   )
+  const latencySeries = useMemo(() => toLatencySeries(groups), [groups])
+  const uptimeSeries = useMemo(() => toUptimeSeries(groups), [groups])
   const uptimeByGroup = useMemo<Record<string, UptimeDayPoint[]>>(() => {
     const map: Record<string, UptimeDayPoint[]> = {}
-    for (const perf of performances) {
-      map[perf.group] = buildUptimeSeries(props.model, perf.group)
+    for (const group of groups) {
+      map[group.group] = toGroupUptimeSeries(group)
     }
     return map
-  }, [performances, props.model])
+  }, [groups])
 
-  if (performances.length === 0) {
+  if (metricsQuery.isLoading || performances.length === 0) {
     return (
       <div className='text-muted-foreground rounded-lg border p-6 text-center text-sm'>
         {t('Performance data is not yet available for this model.')}
@@ -108,18 +169,22 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
     )
   }
 
-  const bestTtft = Math.min(...performances.map((p) => p.ttft_p50_ms))
-  const bestThroughput = Math.max(...performances.map((p) => p.throughput_tps))
-  const totalRequests = performances.reduce(
-    (s, p) => s + p.request_volume_24h,
-    0
-  )
-  const intent =
-    aggregated.uptime_pct >= 99.9
-      ? 'success'
-      : aggregated.uptime_pct >= 99
-        ? 'default'
-        : 'warning'
+  const ttftValues = performances
+    .map((p) => p.avg_ttft_ms)
+    .filter((value) => value > 0)
+  const bestTtft = ttftValues.length > 0 ? Math.min(...ttftValues) : 0
+  const avgLatency = weightedAverage(performances, 'avg_latency_ms')
+  const totalRequests = performances.reduce((s, p) => s + p.request_count, 0)
+  const totalSuccess = groups.reduce((s, p) => s + p.success_count, 0)
+  const successRate =
+    totalRequests > 0 ? (totalSuccess / totalRequests) * 100 : 0
+  const incidentCount = uptimeSeries.reduce((s, p) => s + p.incidents, 0)
+  let intent: 'default' | 'warning' | 'success' = 'warning'
+  if (successRate >= 99.9) {
+    intent = 'success'
+  } else if (successRate >= 99) {
+    intent = 'default'
+  }
 
   const headerCellClass =
     'text-muted-foreground py-2 text-[10px] font-medium tracking-wider uppercase'
@@ -134,21 +199,21 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
           hint={t('Lowest median first-token latency')}
         />
         <StatCard
-          icon={Gauge}
-          label={t('Peak throughput')}
-          value={formatThroughput(bestThroughput)}
+          icon={Timer}
+          label={t('Average latency')}
+          value={formatLatency(avgLatency)}
           hint={t('Across all groups')}
         />
         <StatCard
           icon={HeartPulse}
-          label={t('Uptime (30d)')}
-          value={formatUptimePct(aggregated.uptime_pct)}
+          label={t('Success rate')}
+          value={formatUptimePct(successRate)}
           hint={
-            aggregated.incidents > 0
-              ? t('{{count}} incidents in the last 30 days', {
-                  count: aggregated.incidents,
+            incidentCount > 0
+              ? t('{{count}} incidents in the last 24 hours', {
+                  count: incidentCount,
                 })
-              : t('No incidents in the last 30 days')
+              : t('No incidents in the last 24 hours')
           }
           intent={intent}
         />
@@ -164,9 +229,7 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
         <SectionHeader
           icon={Activity}
           title={t('Per-group performance')}
-          description={t(
-            'TTFT percentiles, throughput, and 30-day uptime by group'
-          )}
+          description={t('Average latency, TTFT, and success rate by group')}
         />
         <div className='overflow-x-auto rounded-lg border'>
           <Table className='text-sm'>
@@ -174,31 +237,24 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
               <TableRow className='hover:bg-transparent'>
                 <TableHead className={headerCellClass}>{t('Group')}</TableHead>
                 <TableHead className={`${headerCellClass} text-right`}>
-                  {t('TTFT P50')}
-                </TableHead>
-                <TableHead className={`${headerCellClass} text-right`}>
-                  {t('TTFT P95')}
-                </TableHead>
-                <TableHead className={`${headerCellClass} text-right`}>
-                  {t('TTFT P99')}
+                  {t('Average TTFT')}
                 </TableHead>
                 <TableHead className={`${headerCellClass} text-right`}>
-                  {t('Throughput')}
+                  {t('Average latency')}
                 </TableHead>
                 <TableHead
                   className={`${headerCellClass} min-w-[160px] text-left`}
                 >
-                  {t('Uptime (30d)')}
+                  {t('Success rate')}
                 </TableHead>
                 <TableHead className={`${headerCellClass} text-right`}>
-                  {t('Requests / 24h')}
+                  {t('Request Count')}
                 </TableHead>
               </TableRow>
             </TableHeader>
             <TableBody>
               {performances.map((perf) => {
-                const isBestTtft = perf.ttft_p50_ms === bestTtft
-                const isBestTput = perf.throughput_tps === bestThroughput
+                const isBestTtft = perf.avg_ttft_ms === bestTtft
                 return (
                   <TableRow key={perf.group}>
                     <TableCell className='py-2.5'>
@@ -210,23 +266,10 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
                         isBestTtft && 'text-emerald-600 dark:text-emerald-400'
                       )}
                     >
-                      {formatLatency(perf.ttft_p50_ms)}
-                    </TableCell>
-                    <TableCell className='text-muted-foreground py-2.5 text-right font-mono'>
-                      {formatLatency(perf.ttft_p95_ms)}
+                      {formatLatency(perf.avg_ttft_ms)}
                     </TableCell>
                     <TableCell className='text-muted-foreground py-2.5 text-right font-mono'>
-                      {formatLatency(perf.ttft_p99_ms)}
-                    </TableCell>
-                    <TableCell
-                      className={cn(
-                        'py-2.5 text-right font-mono',
-                        isBestTput &&
-                          perf.throughput_tps > 0 &&
-                          'text-emerald-600 dark:text-emerald-400'
-                      )}
-                    >
-                      {formatThroughput(perf.throughput_tps)}
+                      {formatLatency(perf.avg_latency_ms)}
                     </TableCell>
                     <TableCell className='py-2.5'>
                       <UptimeSparkline
@@ -235,7 +278,7 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
                       />
                     </TableCell>
                     <TableCell className='text-muted-foreground py-2.5 text-right font-mono'>
-                      {COMPACT_NUMBER.format(perf.request_volume_24h)}
+                      {COMPACT_NUMBER.format(perf.request_count)}
                     </TableCell>
                   </TableRow>
                 )
@@ -249,45 +292,31 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
         <SectionHeader
           icon={Timer}
           title={t('Latency trend (last 24h)')}
-          description={t(
-            'Median time-to-first-token (TTFT) sampled hourly per group'
-          )}
+          description={t('Average time-to-first-token (TTFT) by group')}
         />
         <LatencyTrendChart series={latencySeries} />
       </section>
 
-      {bestThroughput > 0 && (
-        <section>
-          <SectionHeader
-            icon={Gauge}
-            title={t('Throughput by group')}
-            description={t('Average tokens per second sustained per group')}
-          />
-          <ThroughputBarChart rows={performances} />
-        </section>
-      )}
-
       <section>
         <SectionHeader
           icon={HeartPulse}
-          title={t('Uptime (last 30 days)')}
+          title={t('Availability (last 24h)')}
           description={
-            aggregated.incidents > 0
+            incidentCount > 0
               ? t(
-                  'Daily uptime; {{incidents}} incidents totalling {{minutes}} minutes',
+                  'Request success rate; {{incidents}} incident buckets in the last 24 hours',
                   {
-                    incidents: aggregated.incidents,
-                    minutes: aggregated.outage_minutes,
+                    incidents: incidentCount,
                   }
                 )
-              : t('Daily uptime over the last 30 days')
+              : t('Request success rate sampled over the last 24 hours')
           }
           accent={
-            aggregated.incidents > 0 ? (
+            incidentCount > 0 ? (
               <span className='inline-flex items-center gap-1 text-amber-600 dark:text-amber-400'>
                 <AlertTriangle className='size-3.5' />
                 {t('{{count}} incidents', {
-                  count: aggregated.incidents,
+                  count: incidentCount,
                 })}
               </span>
             ) : null
@@ -295,12 +324,6 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
         />
         <UptimeBarChart series={uptimeSeries} />
       </section>
-
-      <p className='text-muted-foreground/60 text-[11px] leading-relaxed'>
-        {t(
-          'Performance metrics shown here are simulated for preview purposes and will be replaced with live observability data once the backend integration is complete.'
-        )}
-      </p>
     </div>
   )
 }

+ 0 - 8
web/default/src/features/pricing/components/model-details.tsx

@@ -41,7 +41,6 @@ import {
   isDynamicPricingModel,
 } from '../lib/dynamic-price'
 import { parseTags } from '../lib/filters'
-import { buildUptimeSeries } from '../lib/mock-stats'
 import {
   getAvailableGroups,
   isTokenBasedModel,
@@ -57,7 +56,6 @@ import { ModelDetailsCapabilities } from './model-details-capabilities'
 import { ModalitiesMatrix } from './model-details-modalities'
 import { ModelDetailsPerformance } from './model-details-performance'
 import { ModelDetailsQuickStats } from './model-details-quick-stats'
-import { UptimeStatusRow } from './model-details-uptime-sparkline'
 
 // ----------------------------------------------------------------------------
 // Local UI helpers
@@ -782,10 +780,6 @@ export function ModelDetailsContent(props: ModelDetailsContentProps) {
   const { t } = useTranslation()
   const showRechargePrice = props.showRechargePrice ?? false
   const metadata = useMemo(() => inferModelMetadata(props.model), [props.model])
-  const uptimeSeries = useMemo(
-    () => buildUptimeSeries(props.model),
-    [props.model]
-  )
 
   const isDynamic =
     props.model.billing_mode === 'tiered_expr' &&
@@ -797,8 +791,6 @@ export function ModelDetailsContent(props: ModelDetailsContentProps) {
 
       <ModelDetailsQuickStats metadata={metadata} />
 
-      <UptimeStatusRow series={uptimeSeries} />
-
       <Tabs defaultValue='overview' className='gap-4'>
         <TabsList className='bg-muted/60 h-auto w-full justify-start gap-1 overflow-x-auto rounded-lg p-1'>
           {TAB_VALUES.map((value) => {

+ 4 - 0
web/default/src/features/system-settings/maintenance/config.ts

@@ -75,6 +75,10 @@ export const DEFAULT_MAINTENANCE_SETTINGS: MaintenanceSettings = {
   'performance_setting.monitor_cpu_threshold': 90,
   'performance_setting.monitor_memory_threshold': 90,
   'performance_setting.monitor_disk_threshold': 95,
+  'perf_metrics_setting.enabled': true,
+  'perf_metrics_setting.flush_interval': 5,
+  'perf_metrics_setting.bucket_time': 'hour',
+  'perf_metrics_setting.retention_days': 0,
 }
 
 const toBoolean = (value: unknown, fallback: boolean): boolean => {

+ 96 - 0
web/default/src/features/system-settings/maintenance/performance-section.tsx

@@ -59,6 +59,10 @@ const perfSchema = z.object({
     .number()
     .min(0)
     .max(100),
+  'perf_metrics_setting.enabled': z.boolean(),
+  'perf_metrics_setting.flush_interval': z.coerce.number().min(1),
+  'perf_metrics_setting.bucket_time': z.enum(['minute', '5min', 'hour']),
+  'perf_metrics_setting.retention_days': z.coerce.number().min(0),
 })
 
 type PerfFormValues = z.infer<typeof perfSchema>
@@ -248,6 +252,7 @@ export function PerformanceSection(props: Props) {
 
   const diskEnabled = form.watch('performance_setting.disk_cache_enabled')
   const monitorEnabled = form.watch('performance_setting.monitor_enabled')
+  const perfMetricsEnabled = form.watch('perf_metrics_setting.enabled')
   const maxCacheSizeMb = form.watch(
     'performance_setting.disk_cache_max_size_mb'
   )
@@ -452,6 +457,97 @@ export function PerformanceSection(props: Props) {
             />
           </div>
 
+          <Separator />
+
+          <div>
+            <h4 className='font-medium'>{t('Model performance metrics')}</h4>
+            <p className='text-muted-foreground mt-1 text-xs'>
+              {t(
+                'Collect relay latency and success-rate metrics for the model square.'
+              )}
+            </p>
+          </div>
+
+          <div className='grid grid-cols-1 gap-4 md:grid-cols-4'>
+            <FormField
+              control={form.control}
+              name='perf_metrics_setting.enabled'
+              render={({ field }) => (
+                <FormItem className='flex items-center gap-2'>
+                  <FormControl>
+                    <Switch
+                      checked={field.value}
+                      onCheckedChange={field.onChange}
+                    />
+                  </FormControl>
+                  <FormLabel>{t('Enable model performance metrics')}</FormLabel>
+                </FormItem>
+              )}
+            />
+            <FormField
+              control={form.control}
+              name='perf_metrics_setting.flush_interval'
+              render={({ field }) => (
+                <FormItem>
+                  <FormLabel>{t('Flush interval (minutes)')}</FormLabel>
+                  <FormControl>
+                    <Input
+                      type='number'
+                      min={1}
+                      {...field}
+                      disabled={!perfMetricsEnabled}
+                    />
+                  </FormControl>
+                </FormItem>
+              )}
+            />
+            <FormField
+              control={form.control}
+              name='perf_metrics_setting.bucket_time'
+              render={({ field }) => (
+                <FormItem>
+                  <FormLabel>{t('Aggregation bucket')}</FormLabel>
+                  <Select
+                    value={field.value}
+                    onValueChange={field.onChange}
+                    disabled={!perfMetricsEnabled}
+                  >
+                    <FormControl>
+                      <SelectTrigger>
+                        <SelectValue />
+                      </SelectTrigger>
+                    </FormControl>
+                    <SelectContent>
+                      <SelectItem value='minute'>{t('1 minute')}</SelectItem>
+                      <SelectItem value='5min'>{t('5 minutes')}</SelectItem>
+                      <SelectItem value='hour'>{t('1 hour')}</SelectItem>
+                    </SelectContent>
+                  </Select>
+                </FormItem>
+              )}
+            />
+            <FormField
+              control={form.control}
+              name='perf_metrics_setting.retention_days'
+              render={({ field }) => (
+                <FormItem>
+                  <FormLabel>{t('Retention days')}</FormLabel>
+                  <FormControl>
+                    <Input
+                      type='number'
+                      min={0}
+                      {...field}
+                      disabled={!perfMetricsEnabled}
+                    />
+                  </FormControl>
+                  <FormDescription>
+                    {t('0 means data is kept permanently')}
+                  </FormDescription>
+                </FormItem>
+              )}
+            />
+          </div>
+
           <Button type='submit' disabled={updateOption.isPending}>
             {updateOption.isPending ? t('Saving...') : t('Save Changes')}
           </Button>

+ 8 - 0
web/default/src/features/system-settings/maintenance/section-registry.tsx

@@ -102,6 +102,14 @@ const MAINTENANCE_SECTIONS = [
             settings['performance_setting.monitor_memory_threshold'] ?? 90,
           'performance_setting.monitor_disk_threshold':
             settings['performance_setting.monitor_disk_threshold'] ?? 95,
+          'perf_metrics_setting.enabled':
+            settings['perf_metrics_setting.enabled'] ?? true,
+          'perf_metrics_setting.flush_interval':
+            settings['perf_metrics_setting.flush_interval'] ?? 5,
+          'perf_metrics_setting.bucket_time':
+            settings['perf_metrics_setting.bucket_time'] ?? 'hour',
+          'perf_metrics_setting.retention_days':
+            settings['perf_metrics_setting.retention_days'] ?? 0,
         }}
       />
     ),

+ 4 - 0
web/default/src/features/system-settings/types.ts

@@ -254,6 +254,10 @@ export type MaintenanceSettings = {
   'performance_setting.monitor_cpu_threshold': number
   'performance_setting.monitor_memory_threshold': number
   'performance_setting.monitor_disk_threshold': number
+  'perf_metrics_setting.enabled': boolean
+  'perf_metrics_setting.flush_interval': number
+  'perf_metrics_setting.bucket_time': 'hour' | 'minute' | '5min'
+  'perf_metrics_setting.retention_days': number
 }
 
 export type RequestLimitsSettings = {

+ 20 - 0
web/default/src/i18n/locales/en.json

@@ -29,6 +29,7 @@
     "{{count}} disabled channel(s) deleted": "{{count}} disabled channel(s) deleted",
     "{{count}} hours ago": "{{count}} hours ago",
     "{{count}} incidents": "{{count}} incidents",
+    "{{count}} incidents in the last 24 hours": "{{count}} incidents in the last 24 hours",
     "{{count}} incidents in the last 30 days": "{{count}} incidents in the last 30 days",
     "{{count}} IP(s)": "{{count}} IP(s)",
     "{{count}} log entries removed.": "{{count}} log entries removed.",
@@ -59,11 +60,14 @@
     "© 2025 Your Company. All rights reserved.": "© 2025 Your Company. All rights reserved.",
     "+{{count}} more": "+{{count}} more",
     "| Based on": "| Based on",
+    "0 means data is kept permanently": "0 means data is kept permanently",
     "0 means unlimited": "0 means unlimited",
     "1 Day": "1 Day",
     "1 day ago": "1 day ago",
+    "1 hour": "1 hour",
     "1 Hour": "1H",
     "1 hour ago": "1 hour ago",
+    "1 minute": "1 minute",
     "1 minute ago": "1 minute ago",
     "1 Month": "1M",
     "1 month ago": "1 month ago",
@@ -86,6 +90,7 @@
     "30 Days": "30 Days",
     "30 days ago": "30 days ago",
     "30d change": "30d change",
+    "5 minutes": "5 minutes",
     "5-Hour Window": "5-Hour Window",
     "50 / page": "50 / page",
     "7 Days": "7 Days",
@@ -218,6 +223,7 @@
     "Aggregated traffic by upstream model provider": "Aggregated traffic by upstream model provider",
     "Aggregated usage metrics and trend charts.": "Aggregated usage metrics and trend charts.",
     "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.": "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.",
+    "Aggregation bucket": "Aggregation bucket",
     "AGPL v3.0 License": "AGPL v3.0 License",
     "AI model testing environment": "AI model testing environment",
     "AI models": "AI models",
@@ -423,13 +429,18 @@
     "Automatically selects the best available group with circuit breaker mechanism": "Automatically selects the best available group with circuit breaker mechanism",
     "Automatically sync model list when upstream changes are detected": "Automatically sync model list when upstream changes are detected",
     "Automatically test channels and notify users when limits are hit": "Automatically test channels and notify users when limits are hit",
+    "Availability (last 24h)": "Availability (last 24h)",
     "Available": "Available",
     "Available disk space": "Available disk space",
     "Available Models": "Available Models",
     "Available Rewards": "Available Rewards",
+    "Average latency": "Average latency",
+    "Average latency, TTFT, and success rate by group": "Average latency, TTFT, and success rate by group",
     "Average RPM": "Average RPM",
+    "Average time-to-first-token (TTFT) by group": "Average time-to-first-token (TTFT) by group",
     "Average tokens per second sustained per group": "Average tokens per second sustained per group",
     "Average TPM": "Average TPM",
+    "Average TTFT": "Average TTFT",
     "AWS": "AWS",
     "AWS Bedrock Claude Compat": "AWS Bedrock Claude Compat",
     "AWS Key Format": "AWS Key Format",
@@ -710,6 +721,7 @@
     "Cohere": "Cohere",
     "Collapse": "Collapse",
     "Collapse All": "Collapse All",
+    "Collect relay latency and success-rate metrics for the model square.": "Collect relay latency and success-rate metrics for the model square.",
     "Color": "Color",
     "Color is required": "Color is required",
     "Color preset": "Color preset",
@@ -1294,6 +1306,7 @@
     "Enable io.net deployments": "Enable io.net deployments",
     "Enable io.net model deployment service in console": "Enable io.net model deployment service in console",
     "Enable LinuxDO OAuth": "Enable LinuxDO OAuth",
+    "Enable model performance metrics": "Enable model performance metrics",
     "Enable OIDC": "Enable OIDC",
     "Enable or disable this channel": "Enable or disable this channel",
     "Enable or disable this model": "Enable or disable this model",
@@ -1659,6 +1672,7 @@
     "Fixed price (USD)": "Fixed price (USD)",
     "Floating": "Floating",
     "FluentRead extension not detected. Please ensure it is installed and active.": "FluentRead extension not detected. Please ensure it is installed and active.",
+    "Flush interval (minutes)": "Flush interval (minutes)",
     "Follow the guided steps to prepare your workspace before the first login.": "Follow the guided steps to prepare your workspace before the first login.",
     "Footer": "Footer",
     "Footer text displayed at the bottom of pages": "Footer text displayed at the bottom of pages",
@@ -2221,6 +2235,7 @@
     "Model name is required": "Model name is required",
     "Model names copied to clipboard": "Model names copied to clipboard",
     "Model not found": "Model not found",
+    "Model performance metrics": "Model performance metrics",
     "Model Price": "Model Price",
     "Model Price Not Configured": "Model Price Not Configured",
     "Model Pricing": "Model Pricing",
@@ -2396,6 +2411,7 @@
     "No groups match your search": "No groups match your search",
     "No header overrides configured.": "No header overrides configured.",
     "No history data available": "No history data available",
+    "No incidents in the last 24 hours": "No incidents in the last 24 hours",
     "No incidents in the last 30 days": "No incidents in the last 30 days",
     "No Inviter": "No Inviter",
     "No keys found": "No keys found",
@@ -3106,6 +3122,8 @@
     "Request Model:": "Request Model:",
     "Request overrides, routing behavior, and upstream model automation": "Request overrides, routing behavior, and upstream model automation",
     "Request rule pricing": "Request rule pricing",
+    "Request success rate sampled over the last 24 hours": "Request success rate sampled over the last 24 hours",
+    "Request success rate; {{incidents}} incident buckets in the last 24 hours": "Request success rate; {{incidents}} incident buckets in the last 24 hours",
     "Request timed out, please refresh and restart GitHub login": "Request timed out, please refresh and restart GitHub login",
     "Request-based": "Request-based",
     "Requests (24h)": "Requests (24h)",
@@ -3154,6 +3172,7 @@
     "Restrict user model request frequency (may impact high concurrency performance)": "Restrict user model request frequency (may impact high concurrency performance)",
     "Retain last N days": "Retain last N days",
     "Retain last N files": "Retain last N files",
+    "Retention days": "Retention days",
     "Retry": "Retry",
     "Retry Chain": "Retry Chain",
     "Retry Suggestion": "Retry Suggestion",
@@ -3527,6 +3546,7 @@
     "Subscription Plans": "Subscription Plans",
     "Subtract": "Subtract",
     "Success": "Success",
+    "Success rate": "Success rate",
     "Successfully created {{count}} API Key(s)": "Successfully created {{count}} API Key(s)",
     "Successfully created {{count}} redemption codes": "Successfully created {{count}} redemption codes",
     "Successfully deleted {{count}} API key(s)": "Successfully deleted {{count}} API key(s)",

+ 20 - 0
web/default/src/i18n/locales/fr.json

@@ -29,6 +29,7 @@
     "{{count}} disabled channel(s) deleted": "{{count}} canal(canaux) désactivé(s) supprimé(s)",
     "{{count}} hours ago": "il y a {{count}} heures",
     "{{count}} incidents": "{{count}} incidents",
+    "{{count}} incidents in the last 24 hours": "{{count}} incidents au cours des dernières 24 heures",
     "{{count}} incidents in the last 30 days": "{{count}} incidents au cours des 30 derniers jours",
     "{{count}} IP(s)": "{{count}} IP",
     "{{count}} log entries removed.": "{{count}} entrées de journal supprimées.",
@@ -59,11 +60,14 @@
     "© 2025 Your Company. All rights reserved.": "© 2025 Votre entreprise. Tous droits réservés.",
     "+{{count}} more": "+{{count}} de plus",
     "| Based on": "| Basé sur",
+    "0 means data is kept permanently": "0 signifie que les données sont conservées indéfiniment",
     "0 means unlimited": "0 signifie illimité",
     "1 Day": "1 jour",
     "1 day ago": "Il y a 1 jour",
+    "1 hour": "1 heure",
     "1 Hour": "1H",
     "1 hour ago": "Il y a 1 heure",
+    "1 minute": "1 minute",
     "1 minute ago": "Il y a 1 minute",
     "1 Month": "1M",
     "1 month ago": "Il y a 1 mois",
@@ -86,6 +90,7 @@
     "30 Days": "30 jours",
     "30 days ago": "Il y a 30 jours",
     "30d change": "Variation 30 j",
+    "5 minutes": "5 minutes",
     "5-Hour Window": "Fenêtre de 5 heures",
     "50 / page": "50 / page",
     "7 Days": "7 jours",
@@ -218,6 +223,7 @@
     "Aggregated traffic by upstream model provider": "Trafic agrégé par fournisseur de modèle amont",
     "Aggregated usage metrics and trend charts.": "Métriques d'utilisation agrégées et graphiques de tendances.",
     "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.": "agrège plus de 50 fournisseurs IA derrière une API unifiée. Gérez l'accès, suivez les coûts et évoluez sans effort.",
+    "Aggregation bucket": "Fenêtre d’agrégation",
     "AGPL v3.0 License": "Licence AGPL v3.0",
     "AI model testing environment": "Environnement de test de modèle IA",
     "AI models": "Modèles d'IA",
@@ -423,13 +429,18 @@
     "Automatically selects the best available group with circuit breaker mechanism": "Sélectionne automatiquement le meilleur groupe disponible avec un mécanisme de disjoncteur de circuit",
     "Automatically sync model list when upstream changes are detected": "Synchroniser automatiquement la liste des modèles lorsque des changements en amont sont détectés",
     "Automatically test channels and notify users when limits are hit": "Tester automatiquement les canaux et notifier les utilisateurs lorsque les limites sont atteintes",
+    "Availability (last 24h)": "Disponibilité (dernières 24 h)",
     "Available": "Disponible",
     "Available disk space": "Espace disque disponible",
     "Available Models": "Modèles disponibles",
     "Available Rewards": "Récompenses disponibles",
+    "Average latency": "Latence moyenne",
+    "Average latency, TTFT, and success rate by group": "Latence moyenne, TTFT et taux de réussite par groupe",
     "Average RPM": "RPM moyen",
+    "Average time-to-first-token (TTFT) by group": "Temps moyen jusqu’au premier token (TTFT) par groupe",
     "Average tokens per second sustained per group": "Tokens par seconde soutenus en moyenne par groupe",
     "Average TPM": "TPM moyen",
+    "Average TTFT": "TTFT moyen",
     "AWS": "AWS",
     "AWS Bedrock Claude Compat": "AWS Bedrock Claude Compat",
     "AWS Key Format": "Format de clé AWS",
@@ -710,6 +721,7 @@
     "Cohere": "Cohere",
     "Collapse": "Réduire",
     "Collapse All": "Tout réduire",
+    "Collect relay latency and success-rate metrics for the model square.": "Collecte les métriques de latence Relay et de taux de réussite pour la place des modèles.",
     "Color": "Couleur",
     "Color is required": "La couleur est requise",
     "Color preset": "Préréglage de couleur",
@@ -1294,6 +1306,7 @@
     "Enable io.net deployments": "Activer les déploiements io.net",
     "Enable io.net model deployment service in console": "Activer le service de déploiement de modèles io.net dans la console",
     "Enable LinuxDO OAuth": "Activer LinuxDO OAuth",
+    "Enable model performance metrics": "Activer les indicateurs de performance des modèles",
     "Enable OIDC": "Activer OIDC",
     "Enable or disable this channel": "Activer ou désactiver ce canal",
     "Enable or disable this model": "Activer ou désactiver ce modèle",
@@ -1659,6 +1672,7 @@
     "Fixed price (USD)": "Prix fixe (USD)",
     "Floating": "Flottant",
     "FluentRead extension not detected. Please ensure it is installed and active.": "Extension FluentRead non détectée. Veuillez vous assurer qu'elle est installée et activée.",
+    "Flush interval (minutes)": "Intervalle d’écriture (minutes)",
     "Follow the guided steps to prepare your workspace before the first login.": "Suivez les étapes guidées pour préparer votre espace de travail avant la première connexion.",
     "Footer": "Pied de page",
     "Footer text displayed at the bottom of pages": "Texte de pied de page affiché en bas des pages",
@@ -2221,6 +2235,7 @@
     "Model name is required": "Le nom du modèle est requis",
     "Model names copied to clipboard": "Noms des modèles copiés dans le presse-papiers",
     "Model not found": "Modèle introuvable",
+    "Model performance metrics": "Indicateurs de performance des modèles",
     "Model Price": "Prix du modèle",
     "Model Price Not Configured": "Prix du modèle non configuré",
     "Model Pricing": "Tarification des modèles",
@@ -2396,6 +2411,7 @@
     "No groups match your search": "Aucun groupe ne correspond à votre recherche",
     "No header overrides configured.": "Aucune surcharge d'en-têtes configurée.",
     "No history data available": "Aucune donnée historique disponible",
+    "No incidents in the last 24 hours": "Aucun incident au cours des dernières 24 heures",
     "No incidents in the last 30 days": "Aucun incident sur les 30 derniers jours",
     "No Inviter": "Pas d'inviteur",
     "No keys found": "Aucune clé trouvée",
@@ -3106,6 +3122,8 @@
     "Request Model:": "Modèle demandé :",
     "Request overrides, routing behavior, and upstream model automation": "Surcharges de requête, comportement de routage et automatisation des modèles amont",
     "Request rule pricing": "Règles de tarification de requête",
+    "Request success rate sampled over the last 24 hours": "Taux de réussite des requêtes échantillonné sur les dernières 24 heures",
+    "Request success rate; {{incidents}} incident buckets in the last 24 hours": "Taux de réussite des requêtes ; {{incidents}} créneaux avec incident sur les dernières 24 heures",
     "Request timed out, please refresh and restart GitHub login": "Délai dépassé, veuillez actualiser la page puis relancer la connexion GitHub",
     "Request-based": "Selon la requête",
     "Requests (24h)": "Requêtes (24 h)",
@@ -3154,6 +3172,7 @@
     "Restrict user model request frequency (may impact high concurrency performance)": "Restreindre la fréquence des requêtes du modèle utilisateur (peut impacter les performances en cas de forte concurrence)",
     "Retain last N days": "Conserver les N derniers jours",
     "Retain last N files": "Conserver les N derniers fichiers",
+    "Retention days": "Jours de rétention",
     "Retry": "Réessayer",
     "Retry Chain": "Chaîne de tentatives",
     "Retry Suggestion": "Suggestion de relance",
@@ -3527,6 +3546,7 @@
     "Subscription Plans": "Plans d'abonnement",
     "Subtract": "Soustraire",
     "Success": "Succès",
+    "Success rate": "Taux de réussite",
     "Successfully created {{count}} API Key(s)": "{{count}} clé(s) API créée(s) avec succès",
     "Successfully created {{count}} redemption codes": "{{count}} codes de réduction créés avec succès",
     "Successfully deleted {{count}} API key(s)": "{{count}} clé(s) API supprimée(s) avec succès",

+ 20 - 0
web/default/src/i18n/locales/ja.json

@@ -29,6 +29,7 @@
     "{{count}} disabled channel(s) deleted": "{{count}} 個の無効チャネルを削除しました",
     "{{count}} hours ago": "{{count}} 時間前",
     "{{count}} incidents": "{{count}} 件のインシデント",
+    "{{count}} incidents in the last 24 hours": "過去 24 時間に {{count}} 件のインシデント",
     "{{count}} incidents in the last 30 days": "過去 30 日間で {{count}} 件のインシデント",
     "{{count}} IP(s)": "{{count}} IP",
     "{{count}} log entries removed.": "{{count}} 件のログエントリを削除しました。",
@@ -59,11 +60,14 @@
     "© 2025 Your Company. All rights reserved.": "© 2025 Your Company. 全著作権所有。",
     "+{{count}} more": "他 {{count}} 件",
     "| Based on": "| に基づく",
+    "0 means data is kept permanently": "0 はデータを永続的に保持することを意味します",
     "0 means unlimited": "0は無制限を意味します",
     "1 Day": "1日",
     "1 day ago": "1日前",
+    "1 hour": "1 時間",
     "1 Hour": "1時間",
     "1 hour ago": "1時間前",
+    "1 minute": "1 分",
     "1 minute ago": "1分前",
     "1 Month": "1ヶ月",
     "1 month ago": "1ヶ月前",
@@ -86,6 +90,7 @@
     "30 Days": "30日",
     "30 days ago": "30日前",
     "30d change": "30日変化",
+    "5 minutes": "5 分",
     "5-Hour Window": "5時間ウィンドウ",
     "50 / page": "50 / ページ",
     "7 Days": "7日",
@@ -218,6 +223,7 @@
     "Aggregated traffic by upstream model provider": "上流モデルプロバイダー別の集計トラフィック",
     "Aggregated usage metrics and trend charts.": "集計された使用量メトリクスとトレンドチャート。",
     "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.": "50以上のAIプロバイダーを統一APIで集約。アクセス管理、コスト追跡、スケーリングを簡単に。",
+    "Aggregation bucket": "集計バケット",
     "AGPL v3.0 License": "AGPL v3.0ライセンス",
     "AI model testing environment": "AIモデルテスト環境",
     "AI models": "AIモデル",
@@ -423,13 +429,18 @@
     "Automatically selects the best available group with circuit breaker mechanism": "回路ブレーカーメカニズム付きで最適な利用可能なグループを自動的に選択",
     "Automatically sync model list when upstream changes are detected": "アップストリームの変更が検出されたときにモデルリストを自動的に同期",
     "Automatically test channels and notify users when limits are hit": "チャネルを自動的にテストし、制限に達したときにユーザーに通知する",
+    "Availability (last 24h)": "可用性(過去 24 時間)",
     "Available": "空き",
     "Available disk space": "利用可能なディスク容量",
     "Available Models": "利用可能なモデル",
     "Available Rewards": "利用可能な報酬",
+    "Average latency": "平均レイテンシ",
+    "Average latency, TTFT, and success rate by group": "グループ別の平均レイテンシ、TTFT、成功率",
     "Average RPM": "平均RPM",
+    "Average time-to-first-token (TTFT) by group": "グループ別の平均 Time to First Token(TTFT)",
     "Average tokens per second sustained per group": "グループごとに持続する平均スループット (tokens/秒)",
     "Average TPM": "平均TPM",
+    "Average TTFT": "平均 TTFT",
     "AWS": "AWS",
     "AWS Bedrock Claude Compat": "AWS Bedrock Claude 互換テンプレート",
     "AWS Key Format": "AWSキーフォーマット",
@@ -710,6 +721,7 @@
     "Cohere": "Cohere",
     "Collapse": "折りたたむ",
     "Collapse All": "すべて折りたたむ",
+    "Collect relay latency and success-rate metrics for the model square.": "モデル広場向けに Relay のレイテンシと成功率メトリクスを収集します。",
     "Color": "カラー",
     "Color is required": "色は必須です",
     "Color preset": "カラープリセット",
@@ -1294,6 +1306,7 @@
     "Enable io.net deployments": "io.net デプロイを有効化",
     "Enable io.net model deployment service in console": "コンソールで io.net モデルデプロイサービスを有効化",
     "Enable LinuxDO OAuth": "LinuxDO OAuthを有効にする",
+    "Enable model performance metrics": "モデル性能メトリクスを有効化",
     "Enable OIDC": "OIDCを有効にする",
     "Enable or disable this channel": "このチャネルを有効または無効にする",
     "Enable or disable this model": "このモデルを有効または無効にする",
@@ -1659,6 +1672,7 @@
     "Fixed price (USD)": "固定価格 (USD)",
     "Floating": "フローティング",
     "FluentRead extension not detected. Please ensure it is installed and active.": "FluentRead 拡張機能が検出されませんでした。インストールされていて有効になっていることを確認してください。",
+    "Flush interval (minutes)": "書き込み間隔(分)",
     "Follow the guided steps to prepare your workspace before the first login.": "初回ログイン前に、ガイド付きの手順に従ってワークスペースを準備してください。",
     "Footer": "フッター",
     "Footer text displayed at the bottom of pages": "ページ下部に表示されるフッターテキスト",
@@ -2221,6 +2235,7 @@
     "Model name is required": "モデル名は必須です",
     "Model names copied to clipboard": "モデル名がクリップボードにコピーされました",
     "Model not found": "モデルが見つかりません",
+    "Model performance metrics": "モデル性能メトリクス",
     "Model Price": "モデル価格",
     "Model Price Not Configured": "モデル価格が未設定",
     "Model Pricing": "モデル料金",
@@ -2396,6 +2411,7 @@
     "No groups match your search": "検索に一致するグループがありません",
     "No header overrides configured.": "ヘッダーのオーバーライドが設定されていません。",
     "No history data available": "履歴データがありません",
+    "No incidents in the last 24 hours": "過去 24 時間にインシデントはありません",
     "No incidents in the last 30 days": "過去 30 日間でインシデントはありません",
     "No Inviter": "招待者なし",
     "No keys found": "キーが見つかりません",
@@ -3106,6 +3122,8 @@
     "Request Model:": "リクエストモデル:",
     "Request overrides, routing behavior, and upstream model automation": "リクエスト上書き、ルーティング動作、上流モデル自動化",
     "Request rule pricing": "リクエストルールの課金",
+    "Request success rate sampled over the last 24 hours": "過去 24 時間にサンプリングされたリクエスト成功率",
+    "Request success rate; {{incidents}} incident buckets in the last 24 hours": "リクエスト成功率;過去 24 時間に {{incidents}} 個のインシデント時間枠",
     "Request timed out, please refresh and restart GitHub login": "タイムアウトしました。ページをリロードして GitHub ログインをやり直してください",
     "Request-based": "リクエスト条件あり",
     "Requests (24h)": "リクエスト (24h)",
@@ -3154,6 +3172,7 @@
     "Restrict user model request frequency (may impact high concurrency performance)": "ユーザーモデルのリクエスト頻度を制限する(高並行性パフォーマンスに影響を与える可能性があります)",
     "Retain last N days": "最新N日間を保持",
     "Retain last N files": "最新N個のファイルを保持",
+    "Retention days": "保持日数",
     "Retry": "再試行",
     "Retry Chain": "リトライチェーン",
     "Retry Suggestion": "リトライ提案",
@@ -3527,6 +3546,7 @@
     "Subscription Plans": "サブスクリプションプラン",
     "Subtract": "減算",
     "Success": "成功",
+    "Success rate": "成功率",
     "Successfully created {{count}} API Key(s)": "{{count}}個のAPIキーが正常に作成されました",
     "Successfully created {{count}} redemption codes": "{{count}}件の引き換えコードが正常に作成されました",
     "Successfully deleted {{count}} API key(s)": "{{count}}個のAPIキーが正常に削除されました",

+ 20 - 0
web/default/src/i18n/locales/ru.json

@@ -29,6 +29,7 @@
     "{{count}} disabled channel(s) deleted": "Удалено {{count}} отключённых каналов",
     "{{count}} hours ago": "{{count}} часов назад",
     "{{count}} incidents": "{{count}} инцидентов",
+    "{{count}} incidents in the last 24 hours": "{{count}} инцидентов за последние 24 часа",
     "{{count}} incidents in the last 30 days": "{{count}} инцидентов за последние 30 дней",
     "{{count}} IP(s)": "{{count}} IP",
     "{{count}} log entries removed.": "Удалено {{count}} записей журнала.",
@@ -59,11 +60,14 @@
     "© 2025 Your Company. All rights reserved.": "© 2025 Ваша Компания. Все права защищены.",
     "+{{count}} more": "ещё {{count}}",
     "| Based on": "| На основе",
+    "0 means data is kept permanently": "0 означает, что данные хранятся постоянно",
     "0 means unlimited": "0 означает без ограничений",
     "1 Day": "1 день",
     "1 day ago": "1 день назад",
+    "1 hour": "1 час",
     "1 Hour": "1 ч.",
     "1 hour ago": "1 час назад",
+    "1 minute": "1 минута",
     "1 minute ago": "1 минуту назад",
     "1 Month": "1 мес.",
     "1 month ago": "1 месяц назад",
@@ -86,6 +90,7 @@
     "30 Days": "30 дней",
     "30 days ago": "30 дней назад",
     "30d change": "Изменение за 30 дней",
+    "5 minutes": "5 минут",
     "5-Hour Window": "5-часовое окно",
     "50 / page": "50 / страница",
     "7 Days": "7 дней",
@@ -218,6 +223,7 @@
     "Aggregated traffic by upstream model provider": "Агрегированный трафик по поставщикам моделей",
     "Aggregated usage metrics and trend charts.": "Агрегированные метрики использования и графики трендов.",
     "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.": "объединяет 50+ ИИ-провайдеров за единым API. Управляйте доступом, отслеживайте затраты и масштабируйтесь без усилий.",
+    "Aggregation bucket": "Интервал агрегации",
     "AGPL v3.0 License": "Лицензия AGPL v3.0",
     "AI model testing environment": "Среда тестирования ИИ моделей",
     "AI models": "Модели ИИ",
@@ -423,13 +429,18 @@
     "Automatically selects the best available group with circuit breaker mechanism": "Автоматически выбирает лучшую доступную группу с механизмом circuit breaker",
     "Automatically sync model list when upstream changes are detected": "Автоматически синхронизировать список моделей при обнаружении изменений у провайдера",
     "Automatically test channels and notify users when limits are hit": "Автоматически тестировать каналы и уведомлять пользователей при достижении лимитов",
+    "Availability (last 24h)": "Доступность (последние 24 ч)",
     "Available": "Доступно",
     "Available disk space": "Доступное дисковое пространство",
     "Available Models": "Доступные модели",
     "Available Rewards": "Доступные награды",
+    "Average latency": "Средняя задержка",
+    "Average latency, TTFT, and success rate by group": "Средняя задержка, TTFT и доля успешных запросов по группам",
     "Average RPM": "Среднее число оборотов в минуту",
+    "Average time-to-first-token (TTFT) by group": "Среднее время до первого токена (TTFT) по группам",
     "Average tokens per second sustained per group": "Средняя устойчивая пропускная способность (токенов/с) по группам",
     "Average TPM": "Среднее число транзакций в минуту",
+    "Average TTFT": "Средний TTFT",
     "AWS": "AWS",
     "AWS Bedrock Claude Compat": "AWS Bedrock Claude совместимость",
     "AWS Key Format": "Формат ключа AWS",
@@ -710,6 +721,7 @@
     "Cohere": "Cohere",
     "Collapse": "Свернуть",
     "Collapse All": "Свернуть все",
+    "Collect relay latency and success-rate metrics for the model square.": "Собирает метрики задержки Relay и доли успешных запросов для витрины моделей.",
     "Color": "Цвет",
     "Color is required": "Цвет обязателен",
     "Color preset": "Цветовая предустановка",
@@ -1294,6 +1306,7 @@
     "Enable io.net deployments": "Включить развертывания io.net",
     "Enable io.net model deployment service in console": "Включить сервис развертывания моделей io.net в консоли",
     "Enable LinuxDO OAuth": "Включить LinuxDO OAuth",
+    "Enable model performance metrics": "Включить метрики производительности моделей",
     "Enable OIDC": "Включить OIDC",
     "Enable or disable this channel": "Включить или отключить этот канал",
     "Enable or disable this model": "Включить или отключить эту модель",
@@ -1659,6 +1672,7 @@
     "Fixed price (USD)": "Фиксированная цена (USD)",
     "Floating": "Плавающая",
     "FluentRead extension not detected. Please ensure it is installed and active.": "Расширение FluentRead не обнаружено. Убедитесь, что оно установлено и активно.",
+    "Flush interval (minutes)": "Интервал записи (минуты)",
     "Follow the guided steps to prepare your workspace before the first login.": "Следуйте пошаговым инструкциям, чтобы подготовить рабочее пространство перед первым входом.",
     "Footer": "Подвал",
     "Footer text displayed at the bottom of pages": "Текст нижнего колонтитула, отображаемый внизу страниц",
@@ -2221,6 +2235,7 @@
     "Model name is required": "Название модели обязательно",
     "Model names copied to clipboard": "Названия моделей скопированы в буфер обмена",
     "Model not found": "Модель не найдена",
+    "Model performance metrics": "Метрики производительности моделей",
     "Model Price": "Цена модели",
     "Model Price Not Configured": "Цена модели не настроена",
     "Model Pricing": "Цены на модели",
@@ -2396,6 +2411,7 @@
     "No groups match your search": "Нет групп, соответствующих вашему поиску",
     "No header overrides configured.": "Нет настроенных переопределений заголовков.",
     "No history data available": "Исторические данные недоступны",
+    "No incidents in the last 24 hours": "За последние 24 часа инцидентов не было",
     "No incidents in the last 30 days": "За последние 30 дней инцидентов не было",
     "No Inviter": "Нет пригласившего",
     "No keys found": "Ключи не найдены",
@@ -3106,6 +3122,8 @@
     "Request Model:": "Модель запроса:",
     "Request overrides, routing behavior, and upstream model automation": "Переопределения запросов, маршрутизация и автоматизация upstream-моделей",
     "Request rule pricing": "Правила ценообразования по запросу",
+    "Request success rate sampled over the last 24 hours": "Доля успешных запросов по выборкам за последние 24 часа",
+    "Request success rate; {{incidents}} incident buckets in the last 24 hours": "Доля успешных запросов; {{incidents}} интервалов с инцидентами за последние 24 часа",
     "Request timed out, please refresh and restart GitHub login": "Время ожидания истекло, обновите страницу и снова запустите вход через GitHub",
     "Request-based": "Зависит от запроса",
     "Requests (24h)": "Запросы (24 ч)",
@@ -3154,6 +3172,7 @@
     "Restrict user model request frequency (may impact high concurrency performance)": "Ограничить частоту запросов пользовательских моделей (может повлиять на производительность при высокой конкуренции)",
     "Retain last N days": "Хранить последние N дней",
     "Retain last N files": "Хранить последние N файлов",
+    "Retention days": "Дней хранения",
     "Retry": "Повторить попытку",
     "Retry Chain": "Цепочка повторов",
     "Retry Suggestion": "Рекомендация по повтору",
@@ -3527,6 +3546,7 @@
     "Subscription Plans": "Планы подписки",
     "Subtract": "Вычесть",
     "Success": "Успешно",
+    "Success rate": "Доля успешных запросов",
     "Successfully created {{count}} API Key(s)": "Успешно создано {{count}} API-ключ(а/ей)",
     "Successfully created {{count}} redemption codes": "Успешно создано {{count}} кодов активации",
     "Successfully deleted {{count}} API key(s)": "Успешно удалено {{count}} API-ключ(а/ей)",

+ 20 - 0
web/default/src/i18n/locales/vi.json

@@ -29,6 +29,7 @@
     "{{count}} disabled channel(s) deleted": "Đã xóa {{count}} kênh đã tắt",
     "{{count}} hours ago": "{{count}} giờ trước",
     "{{count}} incidents": "{{count}} sự cố",
+    "{{count}} incidents in the last 24 hours": "{{count}} sự cố trong 24 giờ qua",
     "{{count}} incidents in the last 30 days": "{{count}} sự cố trong 30 ngày qua",
     "{{count}} IP(s)": "{{count}} IP",
     "{{count}} log entries removed.": "Đã xóa {{count}} mục nhật ký.",
@@ -59,11 +60,14 @@
     "© 2025 Your Company. All rights reserved.": "© 2025 Công ty của bạn. Mọi quyền được bảo lưu.",
     "+{{count}} more": "thêm {{count}} mục",
     "| Based on": "| Dựa trên",
+    "0 means data is kept permanently": "0 nghĩa là dữ liệu được giữ vĩnh viễn",
     "0 means unlimited": "0 có nghĩa là không giới hạn",
     "1 Day": "1 ngày",
     "1 day ago": "1 ngày trước",
+    "1 hour": "1 giờ",
     "1 Hour": "1 giờ",
     "1 hour ago": "1 giờ trước",
+    "1 minute": "1 phút",
     "1 minute ago": "1 phút trước",
     "1 Month": "1 tháng",
     "1 month ago": "1 tháng trước",
@@ -86,6 +90,7 @@
     "30 Days": "30 ngày",
     "30 days ago": "30 ngày trước",
     "30d change": "Thay đổi 30 ngày",
+    "5 minutes": "5 phút",
     "5-Hour Window": "Cửa sổ 5 giờ",
     "50 / page": "50 / trang",
     "7 Days": "7 ngày",
@@ -218,6 +223,7 @@
     "Aggregated traffic by upstream model provider": "Lưu lượng tổng hợp theo nhà cung cấp mô hình",
     "Aggregated usage metrics and trend charts.": "Chỉ số sử dụng tổng hợp và biểu đồ xu hướng.",
     "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.": "tổng hợp hơn 50 nhà cung cấp AI sau một API thống nhất. Quản lý truy cập, theo dõi chi phí và mở rộng dễ dàng.",
+    "Aggregation bucket": "Khoảng tổng hợp",
     "AGPL v3.0 License": "Giấy phép AGPL v3.0",
     "AI model testing environment": "Môi trường thử nghiệm mô hình AI",
     "AI models": "mô hình AI",
@@ -423,13 +429,18 @@
     "Automatically selects the best available group with circuit breaker mechanism": "Tự động chọn nhóm tốt nhất hiện có với cơ chế ngắt mạch",
     "Automatically sync model list when upstream changes are detected": "Tự động đồng bộ danh sách mô hình khi phát hiện thay đổi từ nguồn",
     "Automatically test channels and notify users when limits are hit": "Tự động kiểm tra các kênh và thông báo cho người dùng khi đạt đến giới hạn",
+    "Availability (last 24h)": "Khả dụng (24 giờ qua)",
     "Available": "Khả dụng",
     "Available disk space": "Dung lượng đĩa khả dụng",
     "Available Models": "Mô hình khả dụng",
     "Available Rewards": "Phần thưởng hiện có",
+    "Average latency": "Độ trễ trung bình",
+    "Average latency, TTFT, and success rate by group": "Độ trễ trung bình, TTFT và tỷ lệ thành công theo nhóm",
     "Average RPM": "RPM trung bình",
+    "Average time-to-first-token (TTFT) by group": "Thời gian trung bình tới token đầu tiên (TTFT) theo nhóm",
     "Average tokens per second sustained per group": "Số token mỗi giây trung bình duy trì cho từng nhóm",
     "Average TPM": "TPM trung bình",
+    "Average TTFT": "TTFT trung bình",
     "AWS": "AWS",
     "AWS Bedrock Claude Compat": "AWS Bedrock Claude tương thích",
     "AWS Key Format": "Định dạng khóa AWS",
@@ -710,6 +721,7 @@
     "Cohere": "Cohere",
     "Collapse": "Thu gọn",
     "Collapse All": "Thu gọn tất cả",
+    "Collect relay latency and success-rate metrics for the model square.": "Thu thập độ trễ Relay và tỷ lệ thành công cho quảng trường mô hình.",
     "Color": "Màu",
     "Color is required": "Màu sắc là bắt buộc",
     "Color preset": "Cài đặt màu sẵn",
@@ -1294,6 +1306,7 @@
     "Enable io.net deployments": "Bật triển khai io.net",
     "Enable io.net model deployment service in console": "Bật dịch vụ triển khai mô hình io.net trong bảng điều khiển",
     "Enable LinuxDO OAuth": "Bật LinuxDO OAuth",
+    "Enable model performance metrics": "Bật chỉ số hiệu năng mô hình",
     "Enable OIDC": "Bật OIDC",
     "Enable or disable this channel": "Bật hoặc tắt kênh này",
     "Enable or disable this model": "Bật hoặc tắt mô hình này",
@@ -1659,6 +1672,7 @@
     "Fixed price (USD)": "Giá cố định (USD)",
     "Floating": "Nổi",
     "FluentRead extension not detected. Please ensure it is installed and active.": "Không phát hiện tiện ích mở rộng FluentRead. Vui lòng đảm bảo nó đã được cài đặt và kích hoạt.",
+    "Flush interval (minutes)": "Khoảng ghi xuống DB (phút)",
     "Follow the guided steps to prepare your workspace before the first login.": "Thực hiện theo các bước hướng dẫn để chuẩn bị không gian làm việc của bạn trước lần đăng nhập đầu tiên.",
     "Footer": "Chân trang",
     "Footer text displayed at the bottom of pages": "Văn bản chân trang hiển thị ở cuối các trang",
@@ -2221,6 +2235,7 @@
     "Model name is required": "Tên mô hình là bắt buộc",
     "Model names copied to clipboard": "Tên mô hình đã được sao chép vào bộ nhớ tạm",
     "Model not found": "Không tìm thấy mô hình",
+    "Model performance metrics": "Chỉ số hiệu năng mô hình",
     "Model Price": "Giá mô hình",
     "Model Price Not Configured": "Giá mô hình chưa được cấu hình",
     "Model Pricing": "Bảng giá mô hình",
@@ -2396,6 +2411,7 @@
     "No groups match your search": "Không có nhóm nào khớp với tìm kiếm của bạn",
     "No header overrides configured.": "Không có ghi đè tiêu đề nào được cấu hình.",
     "No history data available": "Không có dữ liệu lịch sử",
+    "No incidents in the last 24 hours": "Không có sự cố trong 24 giờ qua",
     "No incidents in the last 30 days": "Không có sự cố trong 30 ngày qua",
     "No Inviter": "Không có người mời",
     "No keys found": "Không tìm thấy khóa",
@@ -3106,6 +3122,8 @@
     "Request Model:": "Mô hình yêu cầu:",
     "Request overrides, routing behavior, and upstream model automation": "Ghi đè yêu cầu, hành vi định tuyến và tự động hóa mô hình upstream",
     "Request rule pricing": "Quy tắc tính giá theo request",
+    "Request success rate sampled over the last 24 hours": "Tỷ lệ yêu cầu thành công được lấy mẫu trong 24 giờ qua",
+    "Request success rate; {{incidents}} incident buckets in the last 24 hours": "Tỷ lệ yêu cầu thành công; {{incidents}} khoảng có sự cố trong 24 giờ qua",
     "Request timed out, please refresh and restart GitHub login": "Yêu cầu đã hết thời gian chờ, vui lòng làm mới và đăng nhập lại GitHub",
     "Request-based": "Theo yêu cầu",
     "Requests (24h)": "Yêu cầu (24h)",
@@ -3154,6 +3172,7 @@
     "Restrict user model request frequency (may impact high concurrency performance)": "Hạn chế tần suất yêu cầu mô hình người dùng (có thể ảnh hưởng đến hiệu suất khi có độ đồng thời cao)",
     "Retain last N days": "Giữ lại N ngày gần nhất",
     "Retain last N files": "Giữ lại N tệp gần nhất",
+    "Retention days": "Số ngày lưu giữ",
     "Retry": "Thử lại",
     "Retry Chain": "Chuỗi thử lại",
     "Retry Suggestion": "Gợi ý thử lại",
@@ -3527,6 +3546,7 @@
     "Subscription Plans": "Gói đăng ký",
     "Subtract": "Trừ",
     "Success": "Thành công",
+    "Success rate": "Tỷ lệ thành công",
     "Successfully created {{count}} API Key(s)": "Đã tạo thành công {{count}} khóa API",
     "Successfully created {{count}} redemption codes": "Đã tạo thành công {{count}} mã đổi thưởng",
     "Successfully deleted {{count}} API key(s)": "Đã xóa thành công {{count}} khóa API",

+ 20 - 0
web/default/src/i18n/locales/zh.json

@@ -29,6 +29,7 @@
     "{{count}} disabled channel(s) deleted": "已删除 {{count}} 个已禁用的渠道",
     "{{count}} hours ago": "{{count}} 小时前",
     "{{count}} incidents": "{{count}} 起事件",
+    "{{count}} incidents in the last 24 hours": "最近 24 小时 {{count}} 个异常桶",
     "{{count}} incidents in the last 30 days": "最近 30 天 {{count}} 起事件",
     "{{count}} IP(s)": "{{count}} 个 IP",
     "{{count}} log entries removed.": "已删除 {{count}} 条日志。",
@@ -59,11 +60,14 @@
     "© 2025 Your Company. All rights reserved.": "© 2025 您的公司。保留所有权利。",
     "+{{count}} more": "还有 {{count}} 项",
     "| Based on": "| 基于",
+    "0 means data is kept permanently": "0 表示永久保留数据",
     "0 means unlimited": "0 表示不限",
     "1 Day": "1 天",
     "1 day ago": "1 天前",
+    "1 hour": "1 小时",
     "1 Hour": "1 小时",
     "1 hour ago": "1 小时前",
+    "1 minute": "1 分钟",
     "1 minute ago": "1 分钟前",
     "1 Month": "1 个月",
     "1 month ago": "1 个月前",
@@ -86,6 +90,7 @@
     "30 Days": "30 天",
     "30 days ago": "30 天前",
     "30d change": "30 天变化",
+    "5 minutes": "5 分钟",
     "5-Hour Window": "5小时窗口",
     "50 / page": "50 条/页",
     "7 Days": "7 天",
@@ -218,6 +223,7 @@
     "Aggregated traffic by upstream model provider": "按上游模型提供商聚合的流量",
     "Aggregated usage metrics and trend charts.": "聚合使用指标和趋势图表。",
     "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.": "聚合 50+ AI 提供商于统一 API 之后。轻松管理访问、追踪成本、弹性扩展。",
+    "Aggregation bucket": "聚合时间桶",
     "AGPL v3.0 License": "AGPL v3.0 协议",
     "AI model testing environment": "AI模型测试环境",
     "AI models": "AI 模型",
@@ -423,13 +429,18 @@
     "Automatically selects the best available group with circuit breaker mechanism": "自动选择可用分组,失败时触发熔断切换",
     "Automatically sync model list when upstream changes are detected": "检测到上游模型变更时自动同步模型列表",
     "Automatically test channels and notify users when limits are hit": "自动测试渠道并在达到限制时通知用户",
+    "Availability (last 24h)": "可用率(最近 24 小时)",
     "Available": "可用",
     "Available disk space": "可用磁盘空间",
     "Available Models": "可用模型",
     "Available Rewards": "可用奖励",
+    "Average latency": "平均延迟",
+    "Average latency, TTFT, and success rate by group": "各分组的平均延迟、首 Token 延迟和成功率",
     "Average RPM": "平均 RPM",
+    "Average time-to-first-token (TTFT) by group": "各分组的平均首 Token 延迟(TTFT)",
     "Average tokens per second sustained per group": "各分组持续输出的平均每秒 token 数",
     "Average TPM": "平均 TPM",
+    "Average TTFT": "平均首 Token 延迟",
     "AWS": "AWS",
     "AWS Bedrock Claude Compat": "AWS Bedrock Claude 兼容模板",
     "AWS Key Format": "AWS 密钥格式",
@@ -710,6 +721,7 @@
     "Cohere": "Cohere",
     "Collapse": "收起",
     "Collapse All": "全部收起",
+    "Collect relay latency and success-rate metrics for the model square.": "收集 Relay 延迟和成功率指标,用于模型广场展示。",
     "Color": "颜色",
     "Color is required": "颜色为必填项",
     "Color preset": "颜色预设",
@@ -1294,6 +1306,7 @@
     "Enable io.net deployments": "启用 io.net 部署",
     "Enable io.net model deployment service in console": "在控制台启用 io.net 模型部署服务",
     "Enable LinuxDO OAuth": "启用 LinuxDO OAuth",
+    "Enable model performance metrics": "启用模型性能指标",
     "Enable OIDC": "启用 OIDC",
     "Enable or disable this channel": "启用或禁用此渠道",
     "Enable or disable this model": "启用或禁用此模型",
@@ -1659,6 +1672,7 @@
     "Fixed price (USD)": "固定价格 (USD)",
     "Floating": "浮动",
     "FluentRead extension not detected. Please ensure it is installed and active.": "未检测到 FluentRead 扩展。请确保已安装并激活。",
+    "Flush interval (minutes)": "刷库间隔(分钟)",
     "Follow the guided steps to prepare your workspace before the first login.": "请按照引导步骤在首次登录前准备您的工作区。",
     "Footer": "页脚",
     "Footer text displayed at the bottom of pages": "显示在页面底部的页脚文本",
@@ -2221,6 +2235,7 @@
     "Model name is required": "模型名称为必填项",
     "Model names copied to clipboard": "模型名称已复制到剪贴板",
     "Model not found": "模型未找到",
+    "Model performance metrics": "模型性能指标",
     "Model Price": "模型价格",
     "Model Price Not Configured": "模型价格未配置",
     "Model Pricing": "模型定价",
@@ -2396,6 +2411,7 @@
     "No groups match your search": "没有组匹配您的搜索",
     "No header overrides configured.": "未配置标头覆盖。",
     "No history data available": "暂无历史数据",
+    "No incidents in the last 24 hours": "最近 24 小时无异常",
     "No incidents in the last 30 days": "最近 30 天无事件",
     "No Inviter": "无邀请人",
     "No keys found": "未找到密钥",
@@ -3106,6 +3122,8 @@
     "Request Model:": "请求模型:",
     "Request overrides, routing behavior, and upstream model automation": "请求覆盖、路由行为和上游模型自动化",
     "Request rule pricing": "请求规则计费",
+    "Request success rate sampled over the last 24 hours": "最近 24 小时按时间桶采样的请求成功率",
+    "Request success rate; {{incidents}} incident buckets in the last 24 hours": "请求成功率;最近 24 小时 {{incidents}} 个异常桶",
     "Request timed out, please refresh and restart GitHub login": "请求超时,请刷新页面后重新发起 GitHub 登录",
     "Request-based": "含请求条件",
     "Requests (24h)": "请求数(24 小时)",
@@ -3154,6 +3172,7 @@
     "Restrict user model request frequency (may impact high concurrency performance)": "限制用户模型请求频率(可能会影响高并发性能)",
     "Retain last N days": "保留最近N天",
     "Retain last N files": "保留最近 N 个文件",
+    "Retention days": "保留天数",
     "Retry": "重试",
     "Retry Chain": "重试链路",
     "Retry Suggestion": "重试建议",
@@ -3527,6 +3546,7 @@
     "Subscription Plans": "订阅套餐",
     "Subtract": "减少",
     "Success": "成功",
+    "Success rate": "成功率",
     "Successfully created {{count}} API Key(s)": "成功创建了 {{count}} 个 API 密钥",
     "Successfully created {{count}} redemption codes": "成功创建了 {{count}} 个兑换码",
     "Successfully deleted {{count}} API key(s)": "成功删除了 {{count}} 个 API 密钥",