1 неделя назад · 9acf5fecae
--- a/controller/perf_metrics.go
+++ b/controller/perf_metrics.go
@@ -0,0 +1,46 @@
 
															+package controller
														
 
															+
														
 
															+import (
														
 
															+	"net/http"
														
 
															+	"strconv"
														
 
															+
														
 
															+	perfmetrics "github.com/QuantumNous/new-api/pkg/perf_metrics"
														
 
															+
														
 
															+	"github.com/gin-gonic/gin"
														
 
															+)
														
 
															+
														
 
															+func GetPerfMetrics(c *gin.Context) {
														
 
															+	modelName := c.Query("model")
														
 
															+	if modelName == "" {
														
 
															+		c.JSON(http.StatusBadRequest, gin.H{
														
 
															+			"success": false,
														
 
															+			"message": "model is required",
														
 
															+		})
														
 
															+		return
														
 
															+	}
														
 
															+
														
 
															+	hours := 24
														
 
															+	if rawHours := c.Query("hours"); rawHours != "" {
														
 
															+		if parsed, err := strconv.Atoi(rawHours); err == nil {
														
 
															+			hours = parsed
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	result, err := perfmetrics.Query(perfmetrics.QueryParams{
														
 
															+		Model: modelName,
														
 
															+		Group: c.Query("group"),
														
 
															+		Hours: hours,
														
 
															+	})
														
 
															+	if err != nil {
														
 
															+		c.JSON(http.StatusInternalServerError, gin.H{
														
 
															+			"success": false,
														
 
															+			"message": err.Error(),
														
 
															+		})
														
 
															+		return
														
 
															+	}
														
 
															+
														
 
															+	c.JSON(http.StatusOK, gin.H{
														
 
															+		"success": true,
														
 
															+		"data":    result,
														
 
															+	})
														
 
															+}
														
--- a/controller/relay.go
+++ b/controller/relay.go
@@ -15,6 +15,7 @@ import (
 
															 	"github.com/QuantumNous/new-api/logger"
														
 
															 	"github.com/QuantumNous/new-api/middleware"
														
 
															 	"github.com/QuantumNous/new-api/model"
														
 
															+	perfmetrics "github.com/QuantumNous/new-api/pkg/perf_metrics"
														
 
															 	"github.com/QuantumNous/new-api/relay"
														
 
															 	relaycommon "github.com/QuantumNous/new-api/relay/common"
														
 
															 	relayconstant "github.com/QuantumNous/new-api/relay/constant"
														
@@ -239,6 +240,11 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) {
 
															 		retryLogStr := fmt.Sprintf("重试：%s", strings.Trim(strings.Join(strings.Fields(fmt.Sprint(useChannel)), "->"), "[]"))
														
 
															 		logger.LogInfo(c, retryLogStr)
														
 
															 	}
														
 
															+	if newAPIError != nil {
														
 
															+		gopool.Go(func() {
														
 
															+			perfmetrics.RecordRelaySample(relayInfo, false)
														
 
															+		})
														
 
															+	}
														
 
															 }
														
 
															 var upgrader = websocket.Upgrader{
														
--- a/main.go
+++ b/main.go
@@ -19,6 +19,7 @@ import (
 
															 	"github.com/QuantumNous/new-api/middleware"
														
 
															 	"github.com/QuantumNous/new-api/model"
														
 
															 	"github.com/QuantumNous/new-api/oauth"
														
 
															+	perfmetrics "github.com/QuantumNous/new-api/pkg/perf_metrics"
														
 
															 	"github.com/QuantumNous/new-api/relay"
														
 
															 	"github.com/QuantumNous/new-api/router"
														
 
															 	"github.com/QuantumNous/new-api/service"
														
@@ -306,6 +307,8 @@ func InitResources() error {
 
															 		return err
														
 
															 	}
														
 
															+	perfmetrics.Init()
														
 
															+
														
 
															 	// 启动系统监控
														
 
															 	common.StartSystemMonitor()
														
--- a/model/main.go
+++ b/model/main.go
@@ -280,6 +280,7 @@ func migrateDB() error {
 
															 		&SubscriptionPreConsumeRecord{},
														
 
															 		&CustomOAuthProvider{},
														
 
															 		&UserOAuthBinding{},
														
 
															+		&PerfMetric{},
														
 
															 	)
														
 
															 	if err != nil {
														
 
															 		return err
														
@@ -328,6 +329,7 @@ func migrateDBFast() error {
 
															 		{&SubscriptionPreConsumeRecord{}, "SubscriptionPreConsumeRecord"},
														
 
															 		{&CustomOAuthProvider{}, "CustomOAuthProvider"},
														
 
															 		{&UserOAuthBinding{}, "UserOAuthBinding"},
														
 
															+		{&PerfMetric{}, "PerfMetric"},
														
 
															 	}
														
 
															 	// 动态计算migration数量，确保errChan缓冲区足够大
														
 
															 	errChan := make(chan error, len(migrations))
														
--- a/model/perf_metric.go
+++ b/model/perf_metric.go
@@ -0,0 +1,70 @@
 
															+package model
														
 
															+
														
 
															+import (
														
 
															+	"time"
														
 
															+
														
 
															+	"gorm.io/gorm"
														
 
															+	"gorm.io/gorm/clause"
														
 
															+)
														
 
															+
														
 
															+// PerfMetric stores aggregated relay performance metrics for the model square.
														
 
															+type PerfMetric struct {
														
 
															+	Id             int    `json:"id" gorm:"primaryKey"`
														
 
															+	ModelName      string `json:"model_name" gorm:"size:128;uniqueIndex:idx_perf_model_group_bucket,priority:1"`
														
 
															+	Group          string `json:"group" gorm:"column:group;size:64;uniqueIndex:idx_perf_model_group_bucket,priority:2"`
														
 
															+	BucketTs       int64  `json:"bucket_ts" gorm:"uniqueIndex:idx_perf_model_group_bucket,priority:3;index:idx_perf_bucket_ts"`
														
 
															+	RequestCount   int64  `json:"request_count" gorm:"default:0"`
														
 
															+	SuccessCount   int64  `json:"success_count" gorm:"default:0"`
														
 
															+	TotalLatencyMs int64  `json:"total_latency_ms" gorm:"default:0"`
														
 
															+	TtftSumMs      int64  `json:"ttft_sum_ms" gorm:"default:0"`
														
 
															+	TtftCount      int64  `json:"ttft_count" gorm:"default:0"`
														
 
															+}
														
 
															+
														
 
															+func (PerfMetric) TableName() string {
														
 
															+	return "perf_metrics"
														
 
															+}
														
 
															+
														
 
															+func UpsertPerfMetric(metric *PerfMetric) error {
														
 
															+	if metric == nil || metric.RequestCount == 0 {
														
 
															+		return nil
														
 
															+	}
														
 
															+	return DB.Clauses(clause.OnConflict{
														
 
															+		Columns: []clause.Column{
														
 
															+			{Name: "model_name"},
														
 
															+			{Name: "group"},
														
 
															+			{Name: "bucket_ts"},
														
 
															+		},
														
 
															+		DoUpdates: clause.Assignments(map[string]interface{}{
														
 
															+			"request_count":    gorm.Expr("request_count + ?", metric.RequestCount),
														
 
															+			"success_count":    gorm.Expr("success_count + ?", metric.SuccessCount),
														
 
															+			"total_latency_ms": gorm.Expr("total_latency_ms + ?", metric.TotalLatencyMs),
														
 
															+			"ttft_sum_ms":      gorm.Expr("ttft_sum_ms + ?", metric.TtftSumMs),
														
 
															+			"ttft_count":       gorm.Expr("ttft_count + ?", metric.TtftCount),
														
 
															+		}),
														
 
															+	}).Create(metric).Error
														
 
															+}
														
 
															+
														
 
															+func GetPerfMetrics(modelName string, group string, startTs int64, endTs int64) ([]PerfMetric, error) {
														
 
															+	var metrics []PerfMetric
														
 
															+	query := DB.Model(&PerfMetric{}).
														
 
															+		Where("model_name = ? AND bucket_ts >= ? AND bucket_ts <= ?", modelName, startTs, endTs)
														
 
															+	if group != "" {
														
 
															+		query = query.Where(commonGroupCol+" = ?", group)
														
 
															+	}
														
 
															+	err := query.Order("bucket_ts ASC").Find(&metrics).Error
														
 
															+	return metrics, err
														
 
															+}
														
 
															+
														
 
															+func DeletePerfMetricsBefore(cutoffTs int64) error {
														
 
															+	if cutoffTs <= 0 {
														
 
															+		return nil
														
 
															+	}
														
 
															+	return DB.Where("bucket_ts < ?", cutoffTs).Delete(&PerfMetric{}).Error
														
 
															+}
														
 
															+
														
 
															+func PerfMetricStartTime(hours int) int64 {
														
 
															+	if hours <= 0 {
														
 
															+		hours = 24
														
 
															+	}
														
 
															+	return time.Now().Add(-time.Duration(hours) * time.Hour).Unix()
														
 
															+}
														
--- a/pkg/perf_metrics/flush.go
+++ b/pkg/perf_metrics/flush.go
@@ -0,0 +1,94 @@
 
															+package perfmetrics
														
 
															+
														
 
															+import (
														
 
															+	"fmt"
														
 
															+	"strconv"
														
 
															+	"time"
														
 
															+
														
 
															+	"github.com/QuantumNous/new-api/common"
														
 
															+	"github.com/QuantumNous/new-api/model"
														
 
															+	"github.com/QuantumNous/new-api/setting/perf_metrics_setting"
														
 
															+)
														
 
															+
														
 
															+func flushLoop() {
														
 
															+	for {
														
 
															+		interval := perf_metrics_setting.GetFlushIntervalMinutes()
														
 
															+		time.Sleep(time.Duration(interval) * time.Minute)
														
 
															+		setting := perf_metrics_setting.GetSetting()
														
 
															+		if !setting.Enabled {
														
 
															+			continue
														
 
															+		}
														
 
															+		flushCompletedBuckets()
														
 
															+		cleanupExpiredMetrics(setting.RetentionDays)
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func flushCompletedBuckets() {
														
 
															+	currentBucket := bucketStart(time.Now().Unix())
														
 
															+	hotBuckets.Range(func(key, value any) bool {
														
 
															+		k := key.(bucketKey)
														
 
															+		if k.bucketTs >= currentBucket {
														
 
															+			return true
														
 
															+		}
														
 
															+
														
 
															+		bucket := value.(*atomicBucket)
														
 
															+		drained := bucket.drain()
														
 
															+		if drained.requestCount == 0 {
														
 
															+			deleteOldEmptyBucket(k, key)
														
 
															+			return true
														
 
															+		}
														
 
															+
														
 
															+		err := model.UpsertPerfMetric(&model.PerfMetric{
														
 
															+			ModelName:      k.model,
														
 
															+			Group:          k.group,
														
 
															+			BucketTs:       k.bucketTs,
														
 
															+			RequestCount:   drained.requestCount,
														
 
															+			SuccessCount:   drained.successCount,
														
 
															+			TotalLatencyMs: drained.totalLatencyMs,
														
 
															+			TtftSumMs:      drained.ttftSumMs,
														
 
															+			TtftCount:      drained.ttftCount,
														
 
															+		})
														
 
															+		if err != nil {
														
 
															+			bucket.addCounters(drained)
														
 
															+			common.SysError(fmt.Sprintf("failed to flush perf metric bucket model=%s group=%s bucket=%d: %s", k.model, k.group, k.bucketTs, err.Error()))
														
 
															+			return true
														
 
															+		}
														
 
															+
														
 
															+		deleteOldEmptyBucket(k, key)
														
 
															+		return true
														
 
															+	})
														
 
															+}
														
 
															+
														
 
															+func deleteOldEmptyBucket(k bucketKey, rawKey any) {
														
 
															+	if k.bucketTs < bucketStart(time.Now().Add(-24*time.Hour).Unix()) {
														
 
															+		hotBuckets.Delete(rawKey)
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func cleanupExpiredMetrics(retentionDays int) {
														
 
															+	if retentionDays <= 0 {
														
 
															+		return
														
 
															+	}
														
 
															+	cutoff := time.Now().Add(-time.Duration(retentionDays) * 24 * time.Hour).Unix()
														
 
															+	if err := model.DeletePerfMetricsBefore(cutoff); err != nil {
														
 
															+		common.SysError("failed to cleanup expired perf metrics: " + err.Error())
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func redisCounters(values map[string]string) counters {
														
 
															+	return counters{
														
 
															+		requestCount:   parseRedisInt(values["req"]),
														
 
															+		successCount:   parseRedisInt(values["ok"]),
														
 
															+		totalLatencyMs: parseRedisInt(values["lat"]),
														
 
															+		ttftSumMs:      parseRedisInt(values["ttft"]),
														
 
															+		ttftCount:      parseRedisInt(values["ttft_n"]),
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func parseRedisInt(value string) int64 {
														
 
															+	if value == "" {
														
 
															+		return 0
														
 
															+	}
														
 
															+	parsed, _ := strconv.ParseInt(value, 10, 64)
														
 
															+	return parsed
														
 
															+}
														
--- a/pkg/perf_metrics/metrics.go
+++ b/pkg/perf_metrics/metrics.go
@@ -0,0 +1,261 @@
 
															+package perfmetrics
														
 
															+
														
 
															+import (
														
 
															+	"context"
														
 
															+	"fmt"
														
 
															+	"sort"
														
 
															+	"sync"
														
 
															+	"time"
														
 
															+
														
 
															+	"github.com/QuantumNous/new-api/common"
														
 
															+	"github.com/QuantumNous/new-api/model"
														
 
															+	relaycommon "github.com/QuantumNous/new-api/relay/common"
														
 
															+	"github.com/QuantumNous/new-api/setting/perf_metrics_setting"
														
 
															+)
														
 
															+
														
 
															+var hotBuckets sync.Map
														
 
															+
														
 
															+const seriesSchema = "dbcd0a3c01b55203"
														
 
															+
														
 
															+func Init() {
														
 
															+	go flushLoop()
														
 
															+}
														
 
															+
														
 
															+func RecordRelaySample(info *relaycommon.RelayInfo, success bool) {
														
 
															+	if info == nil {
														
 
															+		return
														
 
															+	}
														
 
															+	now := time.Now()
														
 
															+	hasTtft := info.IsStream && info.HasSendResponse()
														
 
															+	ttftMs := int64(0)
														
 
															+	if hasTtft {
														
 
															+		ttftMs = info.FirstResponseTime.Sub(info.StartTime).Milliseconds()
														
 
															+	}
														
 
															+	Record(Sample{
														
 
															+		Model:     info.OriginModelName,
														
 
															+		Group:     info.UsingGroup,
														
 
															+		LatencyMs: now.Sub(info.StartTime).Milliseconds(),
														
 
															+		TtftMs:    ttftMs,
														
 
															+		HasTtft:   hasTtft,
														
 
															+		Success:   success,
														
 
															+	})
														
 
															+}
														
 
															+
														
 
															+func Record(sample Sample) {
														
 
															+	setting := perf_metrics_setting.GetSetting()
														
 
															+	if !setting.Enabled || sample.Model == "" {
														
 
															+		return
														
 
															+	}
														
 
															+	if sample.Group == "" {
														
 
															+		sample.Group = "default"
														
 
															+	}
														
 
															+	if sample.LatencyMs < 0 {
														
 
															+		sample.LatencyMs = 0
														
 
															+	}
														
 
															+
														
 
															+	key := bucketKey{
														
 
															+		model:    sample.Model,
														
 
															+		group:    sample.Group,
														
 
															+		bucketTs: bucketStart(time.Now().Unix()),
														
 
															+	}
														
 
															+	actual, _ := hotBuckets.LoadOrStore(key, &atomicBucket{})
														
 
															+	actual.(*atomicBucket).add(sample)
														
 
															+	recordRedis(key, sample)
														
 
															+}
														
 
															+
														
 
															+func Query(params QueryParams) (QueryResult, error) {
														
 
															+	if params.Hours <= 0 {
														
 
															+		params.Hours = 24
														
 
															+	}
														
 
															+	if params.Hours > 24*30 {
														
 
															+		params.Hours = 24 * 30
														
 
															+	}
														
 
															+	endTs := time.Now().Unix()
														
 
															+	startTs := endTs - int64(params.Hours)*3600
														
 
															+
														
 
															+	merged := map[bucketKey]counters{}
														
 
															+	rows, err := model.GetPerfMetrics(params.Model, params.Group, startTs, endTs)
														
 
															+	if err != nil {
														
 
															+		return QueryResult{}, err
														
 
															+	}
														
 
															+	for _, row := range rows {
														
 
															+		mergeCounters(merged, bucketKey{
														
 
															+			model:    row.ModelName,
														
 
															+			group:    row.Group,
														
 
															+			bucketTs: row.BucketTs,
														
 
															+		}, counters{
														
 
															+			requestCount:   row.RequestCount,
														
 
															+			successCount:   row.SuccessCount,
														
 
															+			totalLatencyMs: row.TotalLatencyMs,
														
 
															+			ttftSumMs:      row.TtftSumMs,
														
 
															+			ttftCount:      row.TtftCount,
														
 
															+		})
														
 
															+	}
														
 
															+
														
 
															+	hotBuckets.Range(func(key, value any) bool {
														
 
															+		k := key.(bucketKey)
														
 
															+		if k.model != params.Model || k.bucketTs < startTs || k.bucketTs > endTs {
														
 
															+			return true
														
 
															+		}
														
 
															+		if params.Group != "" && k.group != params.Group {
														
 
															+			return true
														
 
															+		}
														
 
															+		mergeCounters(merged, k, value.(*atomicBucket).snapshot())
														
 
															+		return true
														
 
															+	})
														
 
															+
														
 
															+	return buildQueryResult(params.Model, merged), nil
														
 
															+}
														
 
															+
														
 
															+func bucketStart(ts int64) int64 {
														
 
															+	bucketSeconds := perf_metrics_setting.GetBucketSeconds()
														
 
															+	if bucketSeconds <= 0 {
														
 
															+		bucketSeconds = 3600
														
 
															+	}
														
 
															+	return ts - (ts % bucketSeconds)
														
 
															+}
														
 
															+
														
 
															+func mergeCounters(merged map[bucketKey]counters, key bucketKey, value counters) {
														
 
															+	if value.requestCount == 0 {
														
 
															+		return
														
 
															+	}
														
 
															+	current := merged[key]
														
 
															+	current.requestCount += value.requestCount
														
 
															+	current.successCount += value.successCount
														
 
															+	current.totalLatencyMs += value.totalLatencyMs
														
 
															+	current.ttftSumMs += value.ttftSumMs
														
 
															+	current.ttftCount += value.ttftCount
														
 
															+	merged[key] = current
														
 
															+}
														
 
															+
														
 
															+func buildQueryResult(modelName string, merged map[bucketKey]counters) QueryResult {
														
 
															+	groupBuckets := map[string]map[int64]counters{}
														
 
															+	for key, value := range merged {
														
 
															+		if value.requestCount == 0 {
														
 
															+			continue
														
 
															+		}
														
 
															+		if _, ok := groupBuckets[key.group]; !ok {
														
 
															+			groupBuckets[key.group] = map[int64]counters{}
														
 
															+		}
														
 
															+		groupBuckets[key.group][key.bucketTs] = value
														
 
															+	}
														
 
															+
														
 
															+	groups := make([]string, 0, len(groupBuckets))
														
 
															+	for group := range groupBuckets {
														
 
															+		groups = append(groups, group)
														
 
															+	}
														
 
															+	sort.Strings(groups)
														
 
															+
														
 
															+	results := make([]GroupResult, 0, len(groups))
														
 
															+	for _, group := range groups {
														
 
															+		buckets := groupBuckets[group]
														
 
															+		timestamps := make([]int64, 0, len(buckets))
														
 
															+		for ts := range buckets {
														
 
															+			timestamps = append(timestamps, ts)
														
 
															+		}
														
 
															+		sort.Slice(timestamps, func(i, j int) bool {
														
 
															+			return timestamps[i] < timestamps[j]
														
 
															+		})
														
 
															+
														
 
															+		total := counters{}
														
 
															+		series := make([]BucketPoint, 0, len(timestamps))
														
 
															+		for _, ts := range timestamps {
														
 
															+			value := buckets[ts]
														
 
															+			total.requestCount += value.requestCount
														
 
															+			total.successCount += value.successCount
														
 
															+			total.totalLatencyMs += value.totalLatencyMs
														
 
															+			total.ttftSumMs += value.ttftSumMs
														
 
															+			total.ttftCount += value.ttftCount
														
 
															+			series = append(series, bucketPoint(ts, value))
														
 
															+		}
														
 
															+
														
 
															+		results = append(results, GroupResult{
														
 
															+			Group:        group,
														
 
															+			AvgTtftMs:    avg(total.ttftSumMs, total.ttftCount),
														
 
															+			AvgLatencyMs: avg(total.totalLatencyMs, total.requestCount),
														
 
															+			SuccessRate:  successRate(total),
														
 
															+			RequestCount: total.requestCount,
														
 
															+			SuccessCount: total.successCount,
														
 
															+			TtftCount:    total.ttftCount,
														
 
															+			Series:       series,
														
 
															+		})
														
 
															+	}
														
 
															+
														
 
															+	return QueryResult{
														
 
															+		ModelName:    modelName,
														
 
															+		SeriesSchema: seriesSchema,
														
 
															+		Groups:       results,
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func bucketPoint(ts int64, value counters) BucketPoint {
														
 
															+	return BucketPoint{
														
 
															+		Ts:           ts,
														
 
															+		AvgTtftMs:    avg(value.ttftSumMs, value.ttftCount),
														
 
															+		AvgLatencyMs: avg(value.totalLatencyMs, value.requestCount),
														
 
															+		SuccessRate:  successRate(value),
														
 
															+		Count:        value.requestCount,
														
 
															+		SuccessCount: value.successCount,
														
 
															+		TtftCount:    value.ttftCount,
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func avg(sum int64, count int64) int64 {
														
 
															+	if count <= 0 {
														
 
															+		return 0
														
 
															+	}
														
 
															+	return sum / count
														
 
															+}
														
 
															+
														
 
															+func successRate(value counters) float64 {
														
 
															+	if value.requestCount <= 0 {
														
 
															+		return 0
														
 
															+	}
														
 
															+	return float64(value.successCount) / float64(value.requestCount) * 100
														
 
															+}
														
 
															+
														
 
															+func recordRedis(key bucketKey, sample Sample) {
														
 
															+	if !common.RedisEnabled || common.RDB == nil {
														
 
															+		return
														
 
															+	}
														
 
															+	ctx, cancel := context.WithTimeout(context.Background(), time.Second)
														
 
															+	defer cancel()
														
 
															+
														
 
															+	redisKey := redisBucketKey(key)
														
 
															+	pipe := common.RDB.TxPipeline()
														
 
															+	pipe.HIncrBy(ctx, redisKey, "req", 1)
														
 
															+	if sample.Success {
														
 
															+		pipe.HIncrBy(ctx, redisKey, "ok", 1)
														
 
															+	}
														
 
															+	if sample.LatencyMs > 0 {
														
 
															+		pipe.HIncrBy(ctx, redisKey, "lat", sample.LatencyMs)
														
 
															+	}
														
 
															+	if sample.HasTtft && sample.TtftMs >= 0 {
														
 
															+		pipe.HIncrBy(ctx, redisKey, "ttft", sample.TtftMs)
														
 
															+		pipe.HIncrBy(ctx, redisKey, "ttft_n", 1)
														
 
															+	}
														
 
															+	pipe.Expire(ctx, redisKey, time.Hour)
														
 
															+	_, _ = pipe.Exec(ctx)
														
 
															+}
														
 
															+
														
 
															+func mergeRedisActiveBuckets(merged map[bucketKey]counters, params QueryParams, startTs int64, endTs int64) {
														
 
															+	if !common.RedisEnabled || common.RDB == nil || params.Model == "" || params.Group == "" {
														
 
															+		return
														
 
															+	}
														
 
															+	active := bucketStart(time.Now().Unix())
														
 
															+	if active < startTs || active > endTs {
														
 
															+		return
														
 
															+	}
														
 
															+	key := bucketKey{model: params.Model, group: params.Group, bucketTs: active}
														
 
															+	ctx, cancel := context.WithTimeout(context.Background(), time.Second)
														
 
															+	defer cancel()
														
 
															+	values, err := common.RDB.HGetAll(ctx, redisBucketKey(key)).Result()
														
 
															+	if err != nil || len(values) == 0 {
														
 
															+		return
														
 
															+	}
														
 
															+	mergeCounters(merged, key, redisCounters(values))
														
 
															+}
														
 
															+
														
 
															+func redisBucketKey(key bucketKey) string {
														
 
															+	return fmt.Sprintf("perf:%s:%s:%d", key.model, key.group, key.bucketTs)
														
 
															+}
														
--- a/pkg/perf_metrics/types.go
+++ b/pkg/perf_metrics/types.go
@@ -0,0 +1,124 @@
 
															+package perfmetrics
														
 
															+
														
 
															+import "sync/atomic"
														
 
															+
														
 
															+type Store interface {
														
 
															+	Record(sample Sample)
														
 
															+	Query(params QueryParams) (QueryResult, error)
														
 
															+}
														
 
															+
														
 
															+type Sample struct {
														
 
															+	Model     string
														
 
															+	Group     string
														
 
															+	LatencyMs int64
														
 
															+	TtftMs    int64
														
 
															+	HasTtft   bool
														
 
															+	Success   bool
														
 
															+}
														
 
															+
														
 
															+type QueryParams struct {
														
 
															+	Model string
														
 
															+	Group string
														
 
															+	Hours int
														
 
															+}
														
 
															+
														
 
															+type BucketPoint struct {
														
 
															+	Ts           int64   `json:"ts"`
														
 
															+	AvgTtftMs    int64   `json:"avg_ttft_ms"`
														
 
															+	AvgLatencyMs int64   `json:"avg_latency_ms"`
														
 
															+	SuccessRate  float64 `json:"success_rate"`
														
 
															+	Count        int64   `json:"count"`
														
 
															+	SuccessCount int64   `json:"success_count"`
														
 
															+	TtftCount    int64   `json:"ttft_count"`
														
 
															+}
														
 
															+
														
 
															+type GroupResult struct {
														
 
															+	Group        string        `json:"group"`
														
 
															+	AvgTtftMs    int64         `json:"avg_ttft_ms"`
														
 
															+	AvgLatencyMs int64         `json:"avg_latency_ms"`
														
 
															+	SuccessRate  float64       `json:"success_rate"`
														
 
															+	RequestCount int64         `json:"request_count"`
														
 
															+	SuccessCount int64         `json:"success_count"`
														
 
															+	TtftCount    int64         `json:"ttft_count"`
														
 
															+	Series       []BucketPoint `json:"series"`
														
 
															+}
														
 
															+
														
 
															+type QueryResult struct {
														
 
															+	ModelName    string        `json:"model_name"`
														
 
															+	SeriesSchema string        `json:"series_schema"`
														
 
															+	Groups       []GroupResult `json:"groups"`
														
 
															+}
														
 
															+
														
 
															+type bucketKey struct {
														
 
															+	model    string
														
 
															+	group    string
														
 
															+	bucketTs int64
														
 
															+}
														
 
															+
														
 
															+type counters struct {
														
 
															+	requestCount   int64
														
 
															+	successCount   int64
														
 
															+	totalLatencyMs int64
														
 
															+	ttftSumMs      int64
														
 
															+	ttftCount      int64
														
 
															+}
														
 
															+
														
 
															+type atomicBucket struct {
														
 
															+	requestCount   atomic.Int64
														
 
															+	successCount   atomic.Int64
														
 
															+	totalLatencyMs atomic.Int64
														
 
															+	ttftSumMs      atomic.Int64
														
 
															+	ttftCount      atomic.Int64
														
 
															+}
														
 
															+
														
 
															+func (b *atomicBucket) add(sample Sample) {
														
 
															+	b.requestCount.Add(1)
														
 
															+	if sample.Success {
														
 
															+		b.successCount.Add(1)
														
 
															+	}
														
 
															+	if sample.LatencyMs > 0 {
														
 
															+		b.totalLatencyMs.Add(sample.LatencyMs)
														
 
															+	}
														
 
															+	if sample.HasTtft && sample.TtftMs >= 0 {
														
 
															+		b.ttftSumMs.Add(sample.TtftMs)
														
 
															+		b.ttftCount.Add(1)
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func (b *atomicBucket) snapshot() counters {
														
 
															+	return counters{
														
 
															+		requestCount:   b.requestCount.Load(),
														
 
															+		successCount:   b.successCount.Load(),
														
 
															+		totalLatencyMs: b.totalLatencyMs.Load(),
														
 
															+		ttftSumMs:      b.ttftSumMs.Load(),
														
 
															+		ttftCount:      b.ttftCount.Load(),
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func (b *atomicBucket) drain() counters {
														
 
															+	return counters{
														
 
															+		requestCount:   b.requestCount.Swap(0),
														
 
															+		successCount:   b.successCount.Swap(0),
														
 
															+		totalLatencyMs: b.totalLatencyMs.Swap(0),
														
 
															+		ttftSumMs:      b.ttftSumMs.Swap(0),
														
 
															+		ttftCount:      b.ttftCount.Swap(0),
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func (b *atomicBucket) addCounters(c counters) {
														
 
															+	if c.requestCount != 0 {
														
 
															+		b.requestCount.Add(c.requestCount)
														
 
															+	}
														
 
															+	if c.successCount != 0 {
														
 
															+		b.successCount.Add(c.successCount)
														
 
															+	}
														
 
															+	if c.totalLatencyMs != 0 {
														
 
															+		b.totalLatencyMs.Add(c.totalLatencyMs)
														
 
															+	}
														
 
															+	if c.ttftSumMs != 0 {
														
 
															+		b.ttftSumMs.Add(c.ttftSumMs)
														
 
															+	}
														
 
															+	if c.ttftCount != 0 {
														
 
															+		b.ttftCount.Add(c.ttftCount)
														
 
															+	}
														
 
															+}
														
--- a/router/api-router.go
+++ b/router/api-router.go
@@ -31,6 +31,7 @@ func SetApiRouter(router *gin.Engine) {
 
															 		//apiRouter.GET("/midjourney", controller.GetMidjourney)
														
 
															 		apiRouter.GET("/home_page_content", controller.GetHomePageContent)
														
 
															 		apiRouter.GET("/pricing", middleware.TryUserAuth(), controller.GetPricing)
														
 
															+		apiRouter.GET("/perf-metrics", middleware.TryUserAuth(), controller.GetPerfMetrics)
														
 
															 		apiRouter.GET("/verification", middleware.EmailVerificationRateLimit(), middleware.TurnstileCheck(), controller.SendEmailVerification)
														
 
															 		apiRouter.GET("/reset_password", middleware.CriticalRateLimit(), middleware.TurnstileCheck(), controller.SendPasswordResetEmail)
														
 
															 		apiRouter.POST("/user/reset", middleware.CriticalRateLimit(), controller.ResetPassword)
														
--- a/service/quota.go
+++ b/service/quota.go
@@ -14,6 +14,7 @@ import (
 
															 	"github.com/QuantumNous/new-api/logger"
														
 
															 	"github.com/QuantumNous/new-api/model"
														
 
															 	"github.com/QuantumNous/new-api/pkg/billingexpr"
														
 
															+	perfmetrics "github.com/QuantumNous/new-api/pkg/perf_metrics"
														
 
															 	relaycommon "github.com/QuantumNous/new-api/relay/common"
														
 
															 	"github.com/QuantumNous/new-api/setting/ratio_setting"
														
 
															 	"github.com/QuantumNous/new-api/setting/system_setting"
														
@@ -219,7 +220,7 @@ func PostWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, mod
 
															 		// in this case, must be some error happened
														
 
															 		// we cannot just return, because we may have to return the pre-consumed quota
														
 
															 		quota = 0
														
 
															-		logContent += fmt.Sprintf("（可能是上游超时）")
														
 
															+		logContent += "（可能是上游超时）"
														
 
															 		logger.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
														
 
															 			"tokenId %d, model %s， pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, modelName, relayInfo.FinalPreConsumedQuota))
														
 
															 	} else {
														
@@ -340,7 +341,7 @@ func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, u
 
															 		// in this case, must be some error happened
														
 
															 		// we cannot just return, because we may have to return the pre-consumed quota
														
 
															 		quota = 0
														
 
															-		logContent += fmt.Sprintf("（可能是上游超时）")
														
 
															+		logContent += "（可能是上游超时）"
														
 
															 		logger.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
														
 
															 			"tokenId %d, model %s， pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, relayInfo.OriginModelName, relayInfo.FinalPreConsumedQuota))
														
 
															 	} else {
														
@@ -375,6 +376,9 @@ func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, u
 
															 		Group:            relayInfo.UsingGroup,
														
 
															 		Other:            other,
														
 
															 	})
														
 
															+	gopool.Go(func() {
														
 
															+		perfmetrics.RecordRelaySample(relayInfo, true)
														
 
															+	})
														
 
															 }
														
 
															 func PreConsumeTokenQuota(relayInfo *relaycommon.RelayInfo, quota int) error {
														
--- a/service/text_quota.go
+++ b/service/text_quota.go
@@ -11,10 +11,12 @@ import (
 
															 	"github.com/QuantumNous/new-api/logger"
														
 
															 	"github.com/QuantumNous/new-api/model"
														
 
															 	"github.com/QuantumNous/new-api/pkg/billingexpr"
														
 
															+	perfmetrics "github.com/QuantumNous/new-api/pkg/perf_metrics"
														
 
															 	relaycommon "github.com/QuantumNous/new-api/relay/common"
														
 
															 	"github.com/QuantumNous/new-api/setting/operation_setting"
														
 
															 	"github.com/QuantumNous/new-api/types"
														
 
															+	"github.com/bytedance/gopkg/util/gopool"
														
 
															 	"github.com/gin-gonic/gin"
														
 
															 	"github.com/shopspring/decimal"
														
 
															 )
														
@@ -471,4 +473,7 @@ func PostTextConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, us
 
															 		Group:            relayInfo.UsingGroup,
														
 
															 		Other:            other,
														
 
															 	})
														
 
															+	gopool.Go(func() {
														
 
															+		perfmetrics.RecordRelaySample(relayInfo, true)
														
 
															+	})
														
 
															 }
														
--- a/setting/perf_metrics_setting/config.go
+++ b/setting/perf_metrics_setting/config.go
@@ -0,0 +1,45 @@
 
															+package perf_metrics_setting
														
 
															+
														
 
															+import "github.com/QuantumNous/new-api/setting/config"
														
 
															+
														
 
															+type PerfMetricsSetting struct {
														
 
															+	Enabled       bool   `json:"enabled"`
														
 
															+	FlushInterval int    `json:"flush_interval"`
														
 
															+	BucketTime    string `json:"bucket_time"`
														
 
															+	RetentionDays int    `json:"retention_days"`
														
 
															+}
														
 
															+
														
 
															+var perfMetricsSetting = PerfMetricsSetting{
														
 
															+	Enabled:       true,
														
 
															+	FlushInterval: 5,
														
 
															+	BucketTime:    "hour",
														
 
															+	RetentionDays: 0,
														
 
															+}
														
 
															+
														
 
															+func init() {
														
 
															+	config.GlobalConfig.Register("perf_metrics_setting", &perfMetricsSetting)
														
 
															+}
														
 
															+
														
 
															+func GetSetting() PerfMetricsSetting {
														
 
															+	return perfMetricsSetting
														
 
															+}
														
 
															+
														
 
															+func GetBucketSeconds() int64 {
														
 
															+	switch perfMetricsSetting.BucketTime {
														
 
															+	case "minute":
														
 
															+		return 60
														
 
															+	case "5min":
														
 
															+		return 300
														
 
															+	case "hour":
														
 
															+		return 3600
														
 
															+	default:
														
 
															+		return 3600
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func GetFlushIntervalMinutes() int {
														
 
															+	if perfMetricsSetting.FlushInterval < 1 {
														
 
															+		return 1
														
 
															+	}
														
 
															+	return perfMetricsSetting.FlushInterval
														
 
															+}
														
--- a/web/default/src/features/pricing/api.ts
+++ b/web/default/src/features/pricing/api.ts
@@ -10,3 +10,46 @@ export async function getPricing(): Promise<PricingData> {
 
															   const res = await api.get('/api/pricing')
														
 
															   return res.data
														
 
															 }
														
 
															+
														
 
															+export type PerformanceSeriesPoint = {
														
 
															+  ts: number
														
 
															+  avg_ttft_ms: number
														
 
															+  avg_latency_ms: number
														
 
															+  success_rate: number
														
 
															+  count: number
														
 
															+  success_count: number
														
 
															+  ttft_count: number
														
 
															+}
														
 
															+
														
 
															+export type PerformanceGroup = {
														
 
															+  group: string
														
 
															+  avg_ttft_ms: number
														
 
															+  avg_latency_ms: number
														
 
															+  success_rate: number
														
 
															+  request_count: number
														
 
															+  success_count: number
														
 
															+  ttft_count: number
														
 
															+  series: PerformanceSeriesPoint[]
														
 
															+}
														
 
															+
														
 
															+export type PerformanceMetricsData = {
														
 
															+  success: boolean
														
 
															+  message?: string
														
 
															+  data: {
														
 
															+    model_name: string
														
 
															+    series_schema?: string
														
 
															+    groups: PerformanceGroup[]
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+export async function getPerfMetrics(
														
 
															+  modelName: string,
														
 
															+  hours = 24
														
 
															+): Promise<PerformanceMetricsData> {
														
 
															+  const params = new URLSearchParams({
														
 
															+    model: modelName,
														
 
															+    hours: String(hours),
														
 
															+  })
														
 
															+  const res = await api.get(`/api/perf-metrics?${params.toString()}`)
														
 
															+  return res.data
														
 
															+}
														
--- a/web/default/src/features/pricing/components/model-details-charts.tsx
+++ b/web/default/src/features/pricing/components/model-details-charts.tsx
@@ -14,6 +14,13 @@ function formatHourLabel(iso: string): string {
 
															 function formatDayLabel(date: string): string {
														
 
															   const parsed = new Date(date)
														
 
															+  if (date.includes('T')) {
														
 
															+    return parsed.toLocaleString(undefined, {
														
 
															+      month: 'short',
														
 
															+      day: 'numeric',
														
 
															+      hour: '2-digit',
														
 
															+    })
														
 
															+  }
														
 
															   return parsed.toLocaleDateString(undefined, {
														
 
															     month: 'short',
														
 
															     day: 'numeric',
														
--- a/web/default/src/features/pricing/components/model-details-performance.tsx
+++ b/web/default/src/features/pricing/components/model-details-performance.tsx
@@ -1,8 +1,8 @@
 
															 import { useMemo } from 'react'
														
 
															+import { useQuery } from '@tanstack/react-query'
														
 
															 import {
														
 
															   Activity,
														
 
															   AlertTriangle,
														
 
															-  Gauge,
														
 
															   HeartPulse,
														
 
															   Timer,
														
 
															   TrendingUp,
														
@@ -18,22 +18,14 @@ import {
 
															   TableRow,
														
 
															 } from '@/components/ui/table'
														
 
															 import { GroupBadge } from '@/components/group-badge'
														
 
															+import { getPerfMetrics, type PerformanceGroup } from '../api'
														
 
															 import {
														
 
															-  aggregateUptime,
														
 
															-  buildGroupPerformance,
														
 
															-  buildLatencyTimeSeries,
														
 
															-  buildUptimeSeries,
														
 
															   formatLatency,
														
 
															-  formatThroughput,
														
 
															   formatUptimePct,
														
 
															   type UptimeDayPoint,
														
 
															 } from '../lib/mock-stats'
														
 
															 import type { PricingModel } from '../types'
														
 
															-import {
														
 
															-  LatencyTrendChart,
														
 
															-  ThroughputBarChart,
														
 
															-  UptimeBarChart,
														
 
															-} from './model-details-charts'
														
 
															+import { LatencyTrendChart, UptimeBarChart } from './model-details-charts'
														
 
															 import { UptimeSparkline } from './model-details-uptime-sparkline'
														
 
															 const COMPACT_NUMBER = new Intl.NumberFormat(undefined, {
														
@@ -74,33 +66,102 @@ function StatCard(props: {
 
															   )
														
 
															 }
														
 
															+type PerformanceRow = {
														
 
															+  group: string
														
 
															+  avg_ttft_ms: number
														
 
															+  avg_latency_ms: number
														
 
															+  success_rate: number
														
 
															+  request_count: number
														
 
															+}
														
 
															+
														
 
															+function toLatencySeries(groups: PerformanceGroup[]) {
														
 
															+  return groups.flatMap((group) =>
														
 
															+    group.series
														
 
															+      .filter((point) => point.ttft_count > 0 && point.avg_ttft_ms > 0)
														
 
															+      .map((point) => ({
														
 
															+        timestamp: new Date(point.ts * 1000).toISOString(),
														
 
															+        group: group.group,
														
 
															+        ttft_ms: point.avg_ttft_ms,
														
 
															+      }))
														
 
															+  )
														
 
															+}
														
 
															+
														
 
															+function toUptimeSeries(groups: PerformanceGroup[]): UptimeDayPoint[] {
														
 
															+  const byTs = new Map<number, { count: number; success: number }>()
														
 
															+  for (const group of groups) {
														
 
															+    for (const point of group.series) {
														
 
															+      const current = byTs.get(point.ts) ?? { count: 0, success: 0 }
														
 
															+      current.count += point.count
														
 
															+      current.success += point.success_count
														
 
															+      byTs.set(point.ts, current)
														
 
															+    }
														
 
															+  }
														
 
															+  return Array.from(byTs.entries())
														
 
															+    .sort(([a], [b]) => a - b)
														
 
															+    .map(([ts, value]) => {
														
 
															+      const uptime = value.count > 0 ? (value.success / value.count) * 100 : 0
														
 
															+      return {
														
 
															+        date: new Date(ts * 1000).toISOString(),
														
 
															+        uptime_pct: Math.round(uptime * 100) / 100,
														
 
															+        incidents: value.success < value.count ? 1 : 0,
														
 
															+        outage_minutes: 0,
														
 
															+      }
														
 
															+    })
														
 
															+}
														
 
															+
														
 
															+function toGroupUptimeSeries(group: PerformanceGroup): UptimeDayPoint[] {
														
 
															+  return group.series.map((point) => ({
														
 
															+    date: new Date(point.ts * 1000).toISOString(),
														
 
															+    uptime_pct: Math.round(point.success_rate * 100) / 100,
														
 
															+    incidents: point.success_count < point.count ? 1 : 0,
														
 
															+    outage_minutes: 0,
														
 
															+  }))
														
 
															+}
														
 
															+
														
 
															+function weightedAverage(
														
 
															+  rows: PerformanceRow[],
														
 
															+  field: 'avg_ttft_ms' | 'avg_latency_ms'
														
 
															+): number {
														
 
															+  let total = 0
														
 
															+  let count = 0
														
 
															+  for (const row of rows) {
														
 
															+    if (row[field] <= 0 || row.request_count <= 0) continue
														
 
															+    total += row[field] * row.request_count
														
 
															+    count += row.request_count
														
 
															+  }
														
 
															+  return count > 0 ? Math.round(total / count) : 0
														
 
															+}
														
 
															+
														
 
															 export function ModelDetailsPerformance(props: { model: PricingModel }) {
														
 
															   const { t } = useTranslation()
														
 
															-  const performances = useMemo(
														
 
															-    () => buildGroupPerformance(props.model),
														
 
															-    [props.model]
														
 
															-  )
														
 
															-  const latencySeries = useMemo(
														
 
															-    () => buildLatencyTimeSeries(props.model),
														
 
															-    [props.model]
														
 
															-  )
														
 
															-  const uptimeSeries = useMemo(
														
 
															-    () => buildUptimeSeries(props.model),
														
 
															-    [props.model]
														
 
															-  )
														
 
															-  const aggregated = useMemo(
														
 
															-    () => aggregateUptime(uptimeSeries),
														
 
															-    [uptimeSeries]
														
 
															+  const metricsQuery = useQuery({
														
 
															+    queryKey: ['perf-metrics', props.model.model_name],
														
 
															+    queryFn: () => getPerfMetrics(props.model.model_name, 24),
														
 
															+    staleTime: 60 * 1000,
														
 
															+  })
														
 
															+  const groups = metricsQuery.data?.data.groups ?? []
														
 
															+  const performances = useMemo<PerformanceRow[]>(
														
 
															+    () =>
														
 
															+      groups.map((group) => ({
														
 
															+        group: group.group,
														
 
															+        avg_ttft_ms: group.avg_ttft_ms,
														
 
															+        avg_latency_ms: group.avg_latency_ms,
														
 
															+        success_rate: group.success_rate,
														
 
															+        request_count: group.request_count,
														
 
															+      })),
														
 
															+    [groups]
														
 
															   )
														
 
															+  const latencySeries = useMemo(() => toLatencySeries(groups), [groups])
														
 
															+  const uptimeSeries = useMemo(() => toUptimeSeries(groups), [groups])
														
 
															   const uptimeByGroup = useMemo<Record<string, UptimeDayPoint[]>>(() => {
														
 
															     const map: Record<string, UptimeDayPoint[]> = {}
														
 
															-    for (const perf of performances) {
														
 
															-      map[perf.group] = buildUptimeSeries(props.model, perf.group)
														
 
															+    for (const group of groups) {
														
 
															+      map[group.group] = toGroupUptimeSeries(group)
														
 
															     }
														
 
															     return map
														
 
															-  }, [performances, props.model])
														
 
															+  }, [groups])
														
 
															-  if (performances.length === 0) {
														
 
															+  if (metricsQuery.isLoading || performances.length === 0) {
														
 
															     return (
														
 
															       <div className='text-muted-foreground rounded-lg border p-6 text-center text-sm'>
														
 
															         {t('Performance data is not yet available for this model.')}
														
@@ -108,18 +169,22 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
 
															     )
														
 
															   }
														
 
															-  const bestTtft = Math.min(...performances.map((p) => p.ttft_p50_ms))
														
 
															-  const bestThroughput = Math.max(...performances.map((p) => p.throughput_tps))
														
 
															-  const totalRequests = performances.reduce(
														
 
															-    (s, p) => s + p.request_volume_24h,
														
 
															-    0
														
 
															-  )
														
 
															-  const intent =
														
 
															-    aggregated.uptime_pct >= 99.9
														
 
															-      ? 'success'
														
 
															-      : aggregated.uptime_pct >= 99
														
 
															-        ? 'default'
														
 
															-        : 'warning'
														
 
															+  const ttftValues = performances
														
 
															+    .map((p) => p.avg_ttft_ms)
														
 
															+    .filter((value) => value > 0)
														
 
															+  const bestTtft = ttftValues.length > 0 ? Math.min(...ttftValues) : 0
														
 
															+  const avgLatency = weightedAverage(performances, 'avg_latency_ms')
														
 
															+  const totalRequests = performances.reduce((s, p) => s + p.request_count, 0)
														
 
															+  const totalSuccess = groups.reduce((s, p) => s + p.success_count, 0)
														
 
															+  const successRate =
														
 
															+    totalRequests > 0 ? (totalSuccess / totalRequests) * 100 : 0
														
 
															+  const incidentCount = uptimeSeries.reduce((s, p) => s + p.incidents, 0)
														
 
															+  let intent: 'default' | 'warning' | 'success' = 'warning'
														
 
															+  if (successRate >= 99.9) {
														
 
															+    intent = 'success'
														
 
															+  } else if (successRate >= 99) {
														
 
															+    intent = 'default'
														
 
															+  }
														
 
															   const headerCellClass =
														
 
															     'text-muted-foreground py-2 text-[10px] font-medium tracking-wider uppercase'
														
@@ -134,21 +199,21 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
 
															           hint={t('Lowest median first-token latency')}
														
 
															         />
														
 
															         <StatCard
														
 
															-          icon={Gauge}
														
 
															-          label={t('Peak throughput')}
														
 
															-          value={formatThroughput(bestThroughput)}
														
 
															+          icon={Timer}
														
 
															+          label={t('Average latency')}
														
 
															+          value={formatLatency(avgLatency)}
														
 
															           hint={t('Across all groups')}
														
 
															         />
														
 
															         <StatCard
														
 
															           icon={HeartPulse}
														
 
															-          label={t('Uptime (30d)')}
														
 
															-          value={formatUptimePct(aggregated.uptime_pct)}
														
 
															+          label={t('Success rate')}
														
 
															+          value={formatUptimePct(successRate)}
														
 
															           hint={
														
 
															-            aggregated.incidents > 0
														
 
															-              ? t('{{count}} incidents in the last 30 days', {
														
 
															-                  count: aggregated.incidents,
														
 
															+            incidentCount > 0
														
 
															+              ? t('{{count}} incidents in the last 24 hours', {
														
 
															+                  count: incidentCount,
														
 
															                 })
														
 
															-              : t('No incidents in the last 30 days')
														
 
															+              : t('No incidents in the last 24 hours')
														
 
															           }
														
 
															           intent={intent}
														
 
															         />
														
@@ -164,9 +229,7 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
 
															         <SectionHeader
														
 
															           icon={Activity}
														
 
															           title={t('Per-group performance')}
														
 
															-          description={t(
														
 
															-            'TTFT percentiles, throughput, and 30-day uptime by group'
														
 
															-          )}
														
 
															+          description={t('Average latency, TTFT, and success rate by group')}
														
 
															         />
														
 
															         <div className='overflow-x-auto rounded-lg border'>
														
 
															           <Table className='text-sm'>
														
@@ -174,31 +237,24 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
 
															               <TableRow className='hover:bg-transparent'>
														
 
															                 <TableHead className={headerCellClass}>{t('Group')}</TableHead>
														
 
															                 <TableHead className={`${headerCellClass} text-right`}>
														
 
															-                  {t('TTFT P50')}
														
 
															-                </TableHead>
														
 
															-                <TableHead className={`${headerCellClass} text-right`}>
														
 
															-                  {t('TTFT P95')}
														
 
															-                </TableHead>
														
 
															-                <TableHead className={`${headerCellClass} text-right`}>
														
 
															-                  {t('TTFT P99')}
														
 
															+                  {t('Average TTFT')}
														
 
															                 </TableHead>
														
 
															                 <TableHead className={`${headerCellClass} text-right`}>
														
 
															-                  {t('Throughput')}
														
 
															+                  {t('Average latency')}
														
 
															                 </TableHead>
														
 
															                 <TableHead
														
 
															                   className={`${headerCellClass} min-w-[160px] text-left`}
														
 
															                 >
														
 
															-                  {t('Uptime (30d)')}
														
 
															+                  {t('Success rate')}
														
 
															                 </TableHead>
														
 
															                 <TableHead className={`${headerCellClass} text-right`}>
														
 
															-                  {t('Requests / 24h')}
														
 
															+                  {t('Request Count')}
														
 
															                 </TableHead>
														
 
															               </TableRow>
														
 
															             </TableHeader>
														
 
															             <TableBody>
														
 
															               {performances.map((perf) => {
														
 
															-                const isBestTtft = perf.ttft_p50_ms === bestTtft
														
 
															-                const isBestTput = perf.throughput_tps === bestThroughput
														
 
															+                const isBestTtft = perf.avg_ttft_ms === bestTtft
														
 
															                 return (
														
 
															                   <TableRow key={perf.group}>
														
 
															                     <TableCell className='py-2.5'>
														
@@ -210,23 +266,10 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
 
															                         isBestTtft && 'text-emerald-600 dark:text-emerald-400'
														
 
															                       )}
														
 
															                     >
														
 
															-                      {formatLatency(perf.ttft_p50_ms)}
														
 
															-                    </TableCell>
														
 
															-                    <TableCell className='text-muted-foreground py-2.5 text-right font-mono'>
														
 
															-                      {formatLatency(perf.ttft_p95_ms)}
														
 
															+                      {formatLatency(perf.avg_ttft_ms)}
														
 
															                     </TableCell>
														
 
															                     <TableCell className='text-muted-foreground py-2.5 text-right font-mono'>
														
 
															-                      {formatLatency(perf.ttft_p99_ms)}
														
 
															-                    </TableCell>
														
 
															-                    <TableCell
														
 
															-                      className={cn(
														
 
															-                        'py-2.5 text-right font-mono',
														
 
															-                        isBestTput &&
														
 
															-                          perf.throughput_tps > 0 &&
														
 
															-                          'text-emerald-600 dark:text-emerald-400'
														
 
															-                      )}
														
 
															-                    >
														
 
															-                      {formatThroughput(perf.throughput_tps)}
														
 
															+                      {formatLatency(perf.avg_latency_ms)}
														
 
															                     </TableCell>
														
 
															                     <TableCell className='py-2.5'>
														
 
															                       <UptimeSparkline
														
@@ -235,7 +278,7 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
 
															                       />
														
 
															                     </TableCell>
														
 
															                     <TableCell className='text-muted-foreground py-2.5 text-right font-mono'>
														
 
															-                      {COMPACT_NUMBER.format(perf.request_volume_24h)}
														
 
															+                      {COMPACT_NUMBER.format(perf.request_count)}
														
 
															                     </TableCell>
														
 
															                   </TableRow>
														
 
															                 )
														
@@ -249,45 +292,31 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
 
															         <SectionHeader
														
 
															           icon={Timer}
														
 
															           title={t('Latency trend (last 24h)')}
														
 
															-          description={t(
														
 
															-            'Median time-to-first-token (TTFT) sampled hourly per group'
														
 
															-          )}
														
 
															+          description={t('Average time-to-first-token (TTFT) by group')}
														
 
															         />
														
 
															         <LatencyTrendChart series={latencySeries} />
														
 
															       </section>
														
 
															-      {bestThroughput > 0 && (
														
 
															-        <section>
														
 
															-          <SectionHeader
														
 
															-            icon={Gauge}
														
 
															-            title={t('Throughput by group')}
														
 
															-            description={t('Average tokens per second sustained per group')}
														
 
															-          />
														
 
															-          <ThroughputBarChart rows={performances} />
														
 
															-        </section>
														
 
															-      )}
														
 
															-
														
 
															       <section>
														
 
															         <SectionHeader
														
 
															           icon={HeartPulse}
														
 
															-          title={t('Uptime (last 30 days)')}
														
 
															+          title={t('Availability (last 24h)')}
														
 
															           description={
														
 
															-            aggregated.incidents > 0
														
 
															+            incidentCount > 0
														
 
															               ? t(
														
 
															-                  'Daily uptime; {{incidents}} incidents totalling {{minutes}} minutes',
														
 
															+                  'Request success rate; {{incidents}} incident buckets in the last 24 hours',
														
 
															                   {
														
 
															-                    incidents: aggregated.incidents,
														
 
															-                    minutes: aggregated.outage_minutes,
														
 
															+                    incidents: incidentCount,
														
 
															                   }
														
 
															                 )
														
 
															-              : t('Daily uptime over the last 30 days')
														
 
															+              : t('Request success rate sampled over the last 24 hours')
														
 
															           }
														
 
															           accent={
														
 
															-            aggregated.incidents > 0 ? (
														
 
															+            incidentCount > 0 ? (
														
 
															               <span className='inline-flex items-center gap-1 text-amber-600 dark:text-amber-400'>
														
 
															                 <AlertTriangle className='size-3.5' />
														
 
															                 {t('{{count}} incidents', {
														
 
															-                  count: aggregated.incidents,
														
 
															+                  count: incidentCount,
														
 
															                 })}
														
 
															               </span>
														
 
															             ) : null
														
@@ -295,12 +324,6 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
 
															         />
														
 
															         <UptimeBarChart series={uptimeSeries} />
														
 
															       </section>
														
 
															-
														
 
															-      <p className='text-muted-foreground/60 text-[11px] leading-relaxed'>
														
 
															-        {t(
														
 
															-          'Performance metrics shown here are simulated for preview purposes and will be replaced with live observability data once the backend integration is complete.'
														
 
															-        )}
														
 
															-      </p>
														
 
															     </div>
														
 
															   )
														
 
															 }
														
--- a/web/default/src/features/pricing/components/model-details.tsx
+++ b/web/default/src/features/pricing/components/model-details.tsx
@@ -41,7 +41,6 @@ import {
 
															   isDynamicPricingModel,
														
 
															 } from '../lib/dynamic-price'
														
 
															 import { parseTags } from '../lib/filters'
														
 
															-import { buildUptimeSeries } from '../lib/mock-stats'
														
 
															 import {
														
 
															   getAvailableGroups,
														
 
															   isTokenBasedModel,
														
@@ -57,7 +56,6 @@ import { ModelDetailsCapabilities } from './model-details-capabilities'
 
															 import { ModalitiesMatrix } from './model-details-modalities'
														
 
															 import { ModelDetailsPerformance } from './model-details-performance'
														
 
															 import { ModelDetailsQuickStats } from './model-details-quick-stats'
														
 
															-import { UptimeStatusRow } from './model-details-uptime-sparkline'
														
 
															 // ----------------------------------------------------------------------------
														
 
															 // Local UI helpers
														
@@ -782,10 +780,6 @@ export function ModelDetailsContent(props: ModelDetailsContentProps) {
 
															   const { t } = useTranslation()
														
 
															   const showRechargePrice = props.showRechargePrice ?? false
														
 
															   const metadata = useMemo(() => inferModelMetadata(props.model), [props.model])
														
 
															-  const uptimeSeries = useMemo(
														
 
															-    () => buildUptimeSeries(props.model),
														
 
															-    [props.model]
														
 
															-  )
														
 
															   const isDynamic =
														
 
															     props.model.billing_mode === 'tiered_expr' &&
														
@@ -797,8 +791,6 @@ export function ModelDetailsContent(props: ModelDetailsContentProps) {
 
															       <ModelDetailsQuickStats metadata={metadata} />
														
 
															-      <UptimeStatusRow series={uptimeSeries} />
														
 
															-
														
 
															       <Tabs defaultValue='overview' className='gap-4'>
														
 
															         <TabsList className='bg-muted/60 h-auto w-full justify-start gap-1 overflow-x-auto rounded-lg p-1'>
														
 
															           {TAB_VALUES.map((value) => {
														
--- a/web/default/src/features/system-settings/maintenance/config.ts
+++ b/web/default/src/features/system-settings/maintenance/config.ts
@@ -75,6 +75,10 @@ export const DEFAULT_MAINTENANCE_SETTINGS: MaintenanceSettings = {
 
															   'performance_setting.monitor_cpu_threshold': 90,
														
 
															   'performance_setting.monitor_memory_threshold': 90,
														
 
															   'performance_setting.monitor_disk_threshold': 95,
														
 
															+  'perf_metrics_setting.enabled': true,
														
 
															+  'perf_metrics_setting.flush_interval': 5,
														
 
															+  'perf_metrics_setting.bucket_time': 'hour',
														
 
															+  'perf_metrics_setting.retention_days': 0,
														
 
															 }
														
 
															 const toBoolean = (value: unknown, fallback: boolean): boolean => {
														
--- a/web/default/src/features/system-settings/maintenance/performance-section.tsx
+++ b/web/default/src/features/system-settings/maintenance/performance-section.tsx
@@ -59,6 +59,10 @@ const perfSchema = z.object({
 
															     .number()
														
 
															     .min(0)
														
 
															     .max(100),
														
 
															+  'perf_metrics_setting.enabled': z.boolean(),
														
 
															+  'perf_metrics_setting.flush_interval': z.coerce.number().min(1),
														
 
															+  'perf_metrics_setting.bucket_time': z.enum(['minute', '5min', 'hour']),
														
 
															+  'perf_metrics_setting.retention_days': z.coerce.number().min(0),
														
 
															 })
														
 
															 type PerfFormValues = z.infer<typeof perfSchema>
														
@@ -248,6 +252,7 @@ export function PerformanceSection(props: Props) {
 
															   const diskEnabled = form.watch('performance_setting.disk_cache_enabled')
														
 
															   const monitorEnabled = form.watch('performance_setting.monitor_enabled')
														
 
															+  const perfMetricsEnabled = form.watch('perf_metrics_setting.enabled')
														
 
															   const maxCacheSizeMb = form.watch(
														
 
															     'performance_setting.disk_cache_max_size_mb'
														
 
															   )
														
@@ -452,6 +457,97 @@ export function PerformanceSection(props: Props) {
 
															             />
														
 
															           </div>
														
 
															+          <Separator />
														
 
															+
														
 
															+          <div>
														
 
															+            <h4 className='font-medium'>{t('Model performance metrics')}</h4>
														
 
															+            <p className='text-muted-foreground mt-1 text-xs'>
														
 
															+              {t(
														
 
															+                'Collect relay latency and success-rate metrics for the model square.'
														
 
															+              )}
														
 
															+            </p>
														
 
															+          </div>
														
 
															+
														
 
															+          <div className='grid grid-cols-1 gap-4 md:grid-cols-4'>
														
 
															+            <FormField
														
 
															+              control={form.control}
														
 
															+              name='perf_metrics_setting.enabled'
														
 
															+              render={({ field }) => (
														
 
															+                <FormItem className='flex items-center gap-2'>
														
 
															+                  <FormControl>
														
 
															+                    <Switch
														
 
															+                      checked={field.value}
														
 
															+                      onCheckedChange={field.onChange}
														
 
															+                    />
														
 
															+                  </FormControl>
														
 
															+                  <FormLabel>{t('Enable model performance metrics')}</FormLabel>
														
 
															+                </FormItem>
														
 
															+              )}
														
 
															+            />
														
 
															+            <FormField
														
 
															+              control={form.control}
														
 
															+              name='perf_metrics_setting.flush_interval'
														
 
															+              render={({ field }) => (
														
 
															+                <FormItem>
														
 
															+                  <FormLabel>{t('Flush interval (minutes)')}</FormLabel>
														
 
															+                  <FormControl>
														
 
															+                    <Input
														
 
															+                      type='number'
														
 
															+                      min={1}
														
 
															+                      {...field}
														
 
															+                      disabled={!perfMetricsEnabled}
														
 
															+                    />
														
 
															+                  </FormControl>
														
 
															+                </FormItem>
														
 
															+              )}
														
 
															+            />
														
 
															+            <FormField
														
 
															+              control={form.control}
														
 
															+              name='perf_metrics_setting.bucket_time'
														
 
															+              render={({ field }) => (
														
 
															+                <FormItem>
														
 
															+                  <FormLabel>{t('Aggregation bucket')}</FormLabel>
														
 
															+                  <Select
														
 
															+                    value={field.value}
														
 
															+                    onValueChange={field.onChange}
														
 
															+                    disabled={!perfMetricsEnabled}
														
 
															+                  >
														
 
															+                    <FormControl>
														
 
															+                      <SelectTrigger>
														
 
															+                        <SelectValue />
														
 
															+                      </SelectTrigger>
														
 
															+                    </FormControl>
														
 
															+                    <SelectContent>
														
 
															+                      <SelectItem value='minute'>{t('1 minute')}</SelectItem>
														
 
															+                      <SelectItem value='5min'>{t('5 minutes')}</SelectItem>
														
 
															+                      <SelectItem value='hour'>{t('1 hour')}</SelectItem>
														
 
															+                    </SelectContent>
														
 
															+                  </Select>
														
 
															+                </FormItem>
														
 
															+              )}
														
 
															+            />
														
 
															+            <FormField
														
 
															+              control={form.control}
														
 
															+              name='perf_metrics_setting.retention_days'
														
 
															+              render={({ field }) => (
														
 
															+                <FormItem>
														
 
															+                  <FormLabel>{t('Retention days')}</FormLabel>
														
 
															+                  <FormControl>
														
 
															+                    <Input
														
 
															+                      type='number'
														
 
															+                      min={0}
														
 
															+                      {...field}
														
 
															+                      disabled={!perfMetricsEnabled}
														
 
															+                    />
														
 
															+                  </FormControl>
														
 
															+                  <FormDescription>
														
 
															+                    {t('0 means data is kept permanently')}
														
 
															+                  </FormDescription>
														
 
															+                </FormItem>
														
 
															+              )}
														
 
															+            />
														
 
															+          </div>
														
 
															+
														
 
															           <Button type='submit' disabled={updateOption.isPending}>
														
 
															             {updateOption.isPending ? t('Saving...') : t('Save Changes')}
														
 
															           </Button>
														
--- a/web/default/src/features/system-settings/maintenance/section-registry.tsx
+++ b/web/default/src/features/system-settings/maintenance/section-registry.tsx
@@ -102,6 +102,14 @@ const MAINTENANCE_SECTIONS = [
 
															             settings['performance_setting.monitor_memory_threshold'] ?? 90,
														
 
															           'performance_setting.monitor_disk_threshold':
														
 
															             settings['performance_setting.monitor_disk_threshold'] ?? 95,
														
 
															+          'perf_metrics_setting.enabled':
														
 
															+            settings['perf_metrics_setting.enabled'] ?? true,
														
 
															+          'perf_metrics_setting.flush_interval':
														
 
															+            settings['perf_metrics_setting.flush_interval'] ?? 5,
														
 
															+          'perf_metrics_setting.bucket_time':
														
 
															+            settings['perf_metrics_setting.bucket_time'] ?? 'hour',
														
 
															+          'perf_metrics_setting.retention_days':
														
 
															+            settings['perf_metrics_setting.retention_days'] ?? 0,
														
 
															         }}
														
 
															       />
														
 
															     ),
														
--- a/web/default/src/features/system-settings/types.ts
+++ b/web/default/src/features/system-settings/types.ts
@@ -254,6 +254,10 @@ export type MaintenanceSettings = {
 
															   'performance_setting.monitor_cpu_threshold': number
														
 
															   'performance_setting.monitor_memory_threshold': number
														
 
															   'performance_setting.monitor_disk_threshold': number
														
 
															+  'perf_metrics_setting.enabled': boolean
														
 
															+  'perf_metrics_setting.flush_interval': number
														
 
															+  'perf_metrics_setting.bucket_time': 'hour' | 'minute' | '5min'
														
 
															+  'perf_metrics_setting.retention_days': number
														
 
															 }
														
 
															 export type RequestLimitsSettings = {
														
--- a/web/default/src/i18n/locales/en.json
+++ b/web/default/src/i18n/locales/en.json
@@ -29,6 +29,7 @@
 
															     "{{count}} disabled channel(s) deleted": "{{count}} disabled channel(s) deleted",
														
 
															     "{{count}} hours ago": "{{count}} hours ago",
														
 
															     "{{count}} incidents": "{{count}} incidents",
														
 
															+    "{{count}} incidents in the last 24 hours": "{{count}} incidents in the last 24 hours",
														
 
															     "{{count}} incidents in the last 30 days": "{{count}} incidents in the last 30 days",
														
 
															     "{{count}} IP(s)": "{{count}} IP(s)",
														
 
															     "{{count}} log entries removed.": "{{count}} log entries removed.",
														
@@ -59,11 +60,14 @@
 
															     "© 2025 Your Company. All rights reserved.": "© 2025 Your Company. All rights reserved.",
														
 
															     "+{{count}} more": "+{{count}} more",
														
 
															     "| Based on": "| Based on",
														
 
															+    "0 means data is kept permanently": "0 means data is kept permanently",
														
 
															     "0 means unlimited": "0 means unlimited",
														
 
															     "1 Day": "1 Day",
														
 
															     "1 day ago": "1 day ago",
														
 
															+    "1 hour": "1 hour",
														
 
															     "1 Hour": "1H",
														
 
															     "1 hour ago": "1 hour ago",
														
 
															+    "1 minute": "1 minute",
														
 
															     "1 minute ago": "1 minute ago",
														
 
															     "1 Month": "1M",
														
 
															     "1 month ago": "1 month ago",
														
@@ -86,6 +90,7 @@
 
															     "30 Days": "30 Days",
														
 
															     "30 days ago": "30 days ago",
														
 
															     "30d change": "30d change",
														
 
															+    "5 minutes": "5 minutes",
														
 
															     "5-Hour Window": "5-Hour Window",
														
 
															     "50 / page": "50 / page",
														
 
															     "7 Days": "7 Days",
														
@@ -218,6 +223,7 @@
 
															     "Aggregated traffic by upstream model provider": "Aggregated traffic by upstream model provider",
														
 
															     "Aggregated usage metrics and trend charts.": "Aggregated usage metrics and trend charts.",
														
 
															     "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.": "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.",
														
 
															+    "Aggregation bucket": "Aggregation bucket",
														
 
															     "AGPL v3.0 License": "AGPL v3.0 License",
														
 
															     "AI model testing environment": "AI model testing environment",
														
 
															     "AI models": "AI models",
														
@@ -423,13 +429,18 @@
 
															     "Automatically selects the best available group with circuit breaker mechanism": "Automatically selects the best available group with circuit breaker mechanism",
														
 
															     "Automatically sync model list when upstream changes are detected": "Automatically sync model list when upstream changes are detected",
														
 
															     "Automatically test channels and notify users when limits are hit": "Automatically test channels and notify users when limits are hit",
														
 
															+    "Availability (last 24h)": "Availability (last 24h)",
														
 
															     "Available": "Available",
														
 
															     "Available disk space": "Available disk space",
														
 
															     "Available Models": "Available Models",
														
 
															     "Available Rewards": "Available Rewards",
														
 
															+    "Average latency": "Average latency",
														
 
															+    "Average latency, TTFT, and success rate by group": "Average latency, TTFT, and success rate by group",
														
 
															     "Average RPM": "Average RPM",
														
 
															+    "Average time-to-first-token (TTFT) by group": "Average time-to-first-token (TTFT) by group",
														
 
															     "Average tokens per second sustained per group": "Average tokens per second sustained per group",
														
 
															     "Average TPM": "Average TPM",
														
 
															+    "Average TTFT": "Average TTFT",
														
 
															     "AWS": "AWS",
														
 
															     "AWS Bedrock Claude Compat": "AWS Bedrock Claude Compat",
														
 
															     "AWS Key Format": "AWS Key Format",
														
@@ -710,6 +721,7 @@
 
															     "Cohere": "Cohere",
														
 
															     "Collapse": "Collapse",
														
 
															     "Collapse All": "Collapse All",
														
 
															+    "Collect relay latency and success-rate metrics for the model square.": "Collect relay latency and success-rate metrics for the model square.",
														
 
															     "Color": "Color",
														
 
															     "Color is required": "Color is required",
														
 
															     "Color preset": "Color preset",
														
@@ -1294,6 +1306,7 @@
 
															     "Enable io.net deployments": "Enable io.net deployments",
														
 
															     "Enable io.net model deployment service in console": "Enable io.net model deployment service in console",
														
 
															     "Enable LinuxDO OAuth": "Enable LinuxDO OAuth",
														
 
															+    "Enable model performance metrics": "Enable model performance metrics",
														
 
															     "Enable OIDC": "Enable OIDC",
														
 
															     "Enable or disable this channel": "Enable or disable this channel",
														
 
															     "Enable or disable this model": "Enable or disable this model",
														
@@ -1659,6 +1672,7 @@
 
															     "Fixed price (USD)": "Fixed price (USD)",
														
 
															     "Floating": "Floating",
														
 
															     "FluentRead extension not detected. Please ensure it is installed and active.": "FluentRead extension not detected. Please ensure it is installed and active.",
														
 
															+    "Flush interval (minutes)": "Flush interval (minutes)",
														
 
															     "Follow the guided steps to prepare your workspace before the first login.": "Follow the guided steps to prepare your workspace before the first login.",
														
 
															     "Footer": "Footer",
														
 
															     "Footer text displayed at the bottom of pages": "Footer text displayed at the bottom of pages",
														
@@ -2221,6 +2235,7 @@
 
															     "Model name is required": "Model name is required",
														
 
															     "Model names copied to clipboard": "Model names copied to clipboard",
														
 
															     "Model not found": "Model not found",
														
 
															+    "Model performance metrics": "Model performance metrics",
														
 
															     "Model Price": "Model Price",
														
 
															     "Model Price Not Configured": "Model Price Not Configured",
														
 
															     "Model Pricing": "Model Pricing",
														
@@ -2396,6 +2411,7 @@
 
															     "No groups match your search": "No groups match your search",
														
 
															     "No header overrides configured.": "No header overrides configured.",
														
 
															     "No history data available": "No history data available",
														
 
															+    "No incidents in the last 24 hours": "No incidents in the last 24 hours",
														
 
															     "No incidents in the last 30 days": "No incidents in the last 30 days",
														
 
															     "No Inviter": "No Inviter",
														
 
															     "No keys found": "No keys found",
														
@@ -3106,6 +3122,8 @@
 
															     "Request Model:": "Request Model:",
														
 
															     "Request overrides, routing behavior, and upstream model automation": "Request overrides, routing behavior, and upstream model automation",
														
 
															     "Request rule pricing": "Request rule pricing",
														
 
															+    "Request success rate sampled over the last 24 hours": "Request success rate sampled over the last 24 hours",
														
 
															+    "Request success rate; {{incidents}} incident buckets in the last 24 hours": "Request success rate; {{incidents}} incident buckets in the last 24 hours",
														
 
															     "Request timed out, please refresh and restart GitHub login": "Request timed out, please refresh and restart GitHub login",
														
 
															     "Request-based": "Request-based",
														
 
															     "Requests (24h)": "Requests (24h)",
														
@@ -3154,6 +3172,7 @@
 
															     "Restrict user model request frequency (may impact high concurrency performance)": "Restrict user model request frequency (may impact high concurrency performance)",
														
 
															     "Retain last N days": "Retain last N days",
														
 
															     "Retain last N files": "Retain last N files",
														
 
															+    "Retention days": "Retention days",
														
 
															     "Retry": "Retry",
														
 
															     "Retry Chain": "Retry Chain",
														
 
															     "Retry Suggestion": "Retry Suggestion",
														
@@ -3527,6 +3546,7 @@
 
															     "Subscription Plans": "Subscription Plans",
														
 
															     "Subtract": "Subtract",
														
 
															     "Success": "Success",
														
 
															+    "Success rate": "Success rate",
														
 
															     "Successfully created {{count}} API Key(s)": "Successfully created {{count}} API Key(s)",
														
 
															     "Successfully created {{count}} redemption codes": "Successfully created {{count}} redemption codes",
														
 
															     "Successfully deleted {{count}} API key(s)": "Successfully deleted {{count}} API key(s)",
														
--- a/web/default/src/i18n/locales/fr.json
+++ b/web/default/src/i18n/locales/fr.json
@@ -29,6 +29,7 @@
 
															     "{{count}} disabled channel(s) deleted": "{{count}} canal(canaux) désactivé(s) supprimé(s)",
														
 
															     "{{count}} hours ago": "il y a {{count}} heures",
														
 
															     "{{count}} incidents": "{{count}} incidents",
														
 
															+    "{{count}} incidents in the last 24 hours": "{{count}} incidents au cours des dernières 24 heures",
														
 
															     "{{count}} incidents in the last 30 days": "{{count}} incidents au cours des 30 derniers jours",
														
 
															     "{{count}} IP(s)": "{{count}} IP",
														
 
															     "{{count}} log entries removed.": "{{count}} entrées de journal supprimées.",
														
@@ -59,11 +60,14 @@
 
															     "© 2025 Your Company. All rights reserved.": "© 2025 Votre entreprise. Tous droits réservés.",
														
 
															     "+{{count}} more": "+{{count}} de plus",
														
 
															     "| Based on": "| Basé sur",
														
 
															+    "0 means data is kept permanently": "0 signifie que les données sont conservées indéfiniment",
														
 
															     "0 means unlimited": "0 signifie illimité",
														
 
															     "1 Day": "1 jour",
														
 
															     "1 day ago": "Il y a 1 jour",
														
 
															+    "1 hour": "1 heure",
														
 
															     "1 Hour": "1H",
														
 
															     "1 hour ago": "Il y a 1 heure",
														
 
															+    "1 minute": "1 minute",
														
 
															     "1 minute ago": "Il y a 1 minute",
														
 
															     "1 Month": "1M",
														
 
															     "1 month ago": "Il y a 1 mois",
														
@@ -86,6 +90,7 @@
 
															     "30 Days": "30 jours",
														
 
															     "30 days ago": "Il y a 30 jours",
														
 
															     "30d change": "Variation 30 j",
														
 
															+    "5 minutes": "5 minutes",
														
 
															     "5-Hour Window": "Fenêtre de 5 heures",
														
 
															     "50 / page": "50 / page",
														
 
															     "7 Days": "7 jours",
														
@@ -218,6 +223,7 @@
 
															     "Aggregated traffic by upstream model provider": "Trafic agrégé par fournisseur de modèle amont",
														
 
															     "Aggregated usage metrics and trend charts.": "Métriques d'utilisation agrégées et graphiques de tendances.",
														
 
															     "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.": "agrège plus de 50 fournisseurs IA derrière une API unifiée. Gérez l'accès, suivez les coûts et évoluez sans effort.",
														
 
															+    "Aggregation bucket": "Fenêtre d’agrégation",
														
 
															     "AGPL v3.0 License": "Licence AGPL v3.0",
														
 
															     "AI model testing environment": "Environnement de test de modèle IA",
														
 
															     "AI models": "Modèles d'IA",
														
@@ -423,13 +429,18 @@
 
															     "Automatically selects the best available group with circuit breaker mechanism": "Sélectionne automatiquement le meilleur groupe disponible avec un mécanisme de disjoncteur de circuit",
														
 
															     "Automatically sync model list when upstream changes are detected": "Synchroniser automatiquement la liste des modèles lorsque des changements en amont sont détectés",
														
 
															     "Automatically test channels and notify users when limits are hit": "Tester automatiquement les canaux et notifier les utilisateurs lorsque les limites sont atteintes",
														
 
															+    "Availability (last 24h)": "Disponibilité (dernières 24 h)",
														
 
															     "Available": "Disponible",
														
 
															     "Available disk space": "Espace disque disponible",
														
 
															     "Available Models": "Modèles disponibles",
														
 
															     "Available Rewards": "Récompenses disponibles",
														
 
															+    "Average latency": "Latence moyenne",
														
 
															+    "Average latency, TTFT, and success rate by group": "Latence moyenne, TTFT et taux de réussite par groupe",
														
 
															     "Average RPM": "RPM moyen",
														
 
															+    "Average time-to-first-token (TTFT) by group": "Temps moyen jusqu’au premier token (TTFT) par groupe",
														
 
															     "Average tokens per second sustained per group": "Tokens par seconde soutenus en moyenne par groupe",
														
 
															     "Average TPM": "TPM moyen",
														
 
															+    "Average TTFT": "TTFT moyen",
														
 
															     "AWS": "AWS",
														
 
															     "AWS Bedrock Claude Compat": "AWS Bedrock Claude Compat",
														
 
															     "AWS Key Format": "Format de clé AWS",
														
@@ -710,6 +721,7 @@
 
															     "Cohere": "Cohere",
														
 
															     "Collapse": "Réduire",
														
 
															     "Collapse All": "Tout réduire",
														
 
															+    "Collect relay latency and success-rate metrics for the model square.": "Collecte les métriques de latence Relay et de taux de réussite pour la place des modèles.",
														
 
															     "Color": "Couleur",
														
 
															     "Color is required": "La couleur est requise",
														
 
															     "Color preset": "Préréglage de couleur",
														
@@ -1294,6 +1306,7 @@
 
															     "Enable io.net deployments": "Activer les déploiements io.net",
														
 
															     "Enable io.net model deployment service in console": "Activer le service de déploiement de modèles io.net dans la console",
														
 
															     "Enable LinuxDO OAuth": "Activer LinuxDO OAuth",
														
 
															+    "Enable model performance metrics": "Activer les indicateurs de performance des modèles",
														
 
															     "Enable OIDC": "Activer OIDC",
														
 
															     "Enable or disable this channel": "Activer ou désactiver ce canal",
														
 
															     "Enable or disable this model": "Activer ou désactiver ce modèle",
														
@@ -1659,6 +1672,7 @@
 
															     "Fixed price (USD)": "Prix fixe (USD)",
														
 
															     "Floating": "Flottant",
														
 
															     "FluentRead extension not detected. Please ensure it is installed and active.": "Extension FluentRead non détectée. Veuillez vous assurer qu'elle est installée et activée.",
														
 
															+    "Flush interval (minutes)": "Intervalle d’écriture (minutes)",
														
 
															     "Follow the guided steps to prepare your workspace before the first login.": "Suivez les étapes guidées pour préparer votre espace de travail avant la première connexion.",
														
 
															     "Footer": "Pied de page",
														
 
															     "Footer text displayed at the bottom of pages": "Texte de pied de page affiché en bas des pages",
														
@@ -2221,6 +2235,7 @@
 
															     "Model name is required": "Le nom du modèle est requis",
														
 
															     "Model names copied to clipboard": "Noms des modèles copiés dans le presse-papiers",
														
 
															     "Model not found": "Modèle introuvable",
														
 
															+    "Model performance metrics": "Indicateurs de performance des modèles",
														
 
															     "Model Price": "Prix du modèle",
														
 
															     "Model Price Not Configured": "Prix du modèle non configuré",
														
 
															     "Model Pricing": "Tarification des modèles",
														
@@ -2396,6 +2411,7 @@
 
															     "No groups match your search": "Aucun groupe ne correspond à votre recherche",
														
 
															     "No header overrides configured.": "Aucune surcharge d'en-têtes configurée.",
														
 
															     "No history data available": "Aucune donnée historique disponible",
														
 
															+    "No incidents in the last 24 hours": "Aucun incident au cours des dernières 24 heures",
														
 
															     "No incidents in the last 30 days": "Aucun incident sur les 30 derniers jours",
														
 
															     "No Inviter": "Pas d'inviteur",
														
 
															     "No keys found": "Aucune clé trouvée",
														
@@ -3106,6 +3122,8 @@
 
															     "Request Model:": "Modèle demandé :",
														
 
															     "Request overrides, routing behavior, and upstream model automation": "Surcharges de requête, comportement de routage et automatisation des modèles amont",
														
 
															     "Request rule pricing": "Règles de tarification de requête",
														
 
															+    "Request success rate sampled over the last 24 hours": "Taux de réussite des requêtes échantillonné sur les dernières 24 heures",
														
 
															+    "Request success rate; {{incidents}} incident buckets in the last 24 hours": "Taux de réussite des requêtes ; {{incidents}} créneaux avec incident sur les dernières 24 heures",
														
 
															     "Request timed out, please refresh and restart GitHub login": "Délai dépassé, veuillez actualiser la page puis relancer la connexion GitHub",
														
 
															     "Request-based": "Selon la requête",
														
 
															     "Requests (24h)": "Requêtes (24 h)",
														
@@ -3154,6 +3172,7 @@
 
															     "Restrict user model request frequency (may impact high concurrency performance)": "Restreindre la fréquence des requêtes du modèle utilisateur (peut impacter les performances en cas de forte concurrence)",
														
 
															     "Retain last N days": "Conserver les N derniers jours",
														
 
															     "Retain last N files": "Conserver les N derniers fichiers",
														
 
															+    "Retention days": "Jours de rétention",
														
 
															     "Retry": "Réessayer",
														
 
															     "Retry Chain": "Chaîne de tentatives",
														
 
															     "Retry Suggestion": "Suggestion de relance",
														
@@ -3527,6 +3546,7 @@
 
															     "Subscription Plans": "Plans d'abonnement",
														
 
															     "Subtract": "Soustraire",
														
 
															     "Success": "Succès",
														
 
															+    "Success rate": "Taux de réussite",
														
 
															     "Successfully created {{count}} API Key(s)": "{{count}} clé(s) API créée(s) avec succès",
														
 
															     "Successfully created {{count}} redemption codes": "{{count}} codes de réduction créés avec succès",
														
 
															     "Successfully deleted {{count}} API key(s)": "{{count}} clé(s) API supprimée(s) avec succès",
														
--- a/web/default/src/i18n/locales/ja.json
+++ b/web/default/src/i18n/locales/ja.json
@@ -29,6 +29,7 @@
 
															     "{{count}} disabled channel(s) deleted": "{{count}} 個の無効チャネルを削除しました",
														
 
															     "{{count}} hours ago": "{{count}} 時間前",
														
 
															     "{{count}} incidents": "{{count}} 件のインシデント",
														
 
															+    "{{count}} incidents in the last 24 hours": "過去 24 時間に {{count}} 件のインシデント",
														
 
															     "{{count}} incidents in the last 30 days": "過去 30 日間で {{count}} 件のインシデント",
														
 
															     "{{count}} IP(s)": "{{count}} IP",
														
 
															     "{{count}} log entries removed.": "{{count}} 件のログエントリを削除しました。",
														
@@ -59,11 +60,14 @@
 
															     "© 2025 Your Company. All rights reserved.": "© 2025 Your Company. 全著作権所有。",
														
 
															     "+{{count}} more": "他 {{count}} 件",
														
 
															     "| Based on": "| に基づく",
														
 
															+    "0 means data is kept permanently": "0 はデータを永続的に保持することを意味します",
														
 
															     "0 means unlimited": "0は無制限を意味します",
														
 
															     "1 Day": "1日",
														
 
															     "1 day ago": "1日前",
														
 
															+    "1 hour": "1 時間",
														
 
															     "1 Hour": "1時間",
														
 
															     "1 hour ago": "1時間前",
														
 
															+    "1 minute": "1 分",
														
 
															     "1 minute ago": "1分前",
														
 
															     "1 Month": "1ヶ月",
														
 
															     "1 month ago": "1ヶ月前",
														
@@ -86,6 +90,7 @@
 
															     "30 Days": "30日",
														
 
															     "30 days ago": "30日前",
														
 
															     "30d change": "30日変化",
														
 
															+    "5 minutes": "5 分",
														
 
															     "5-Hour Window": "5時間ウィンドウ",
														
 
															     "50 / page": "50 / ページ",
														
 
															     "7 Days": "7日",
														
@@ -218,6 +223,7 @@
 
															     "Aggregated traffic by upstream model provider": "上流モデルプロバイダー別の集計トラフィック",
														
 
															     "Aggregated usage metrics and trend charts.": "集計された使用量メトリクスとトレンドチャート。",
														
 
															     "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.": "50以上のAIプロバイダーを統一APIで集約。アクセス管理、コスト追跡、スケーリングを簡単に。",
														
 
															+    "Aggregation bucket": "集計バケット",
														
 
															     "AGPL v3.0 License": "AGPL v3.0ライセンス",
														
 
															     "AI model testing environment": "AIモデルテスト環境",
														
 
															     "AI models": "AIモデル",
														
@@ -423,13 +429,18 @@
 
															     "Automatically selects the best available group with circuit breaker mechanism": "回路ブレーカーメカニズム付きで最適な利用可能なグループを自動的に選択",
														
 
															     "Automatically sync model list when upstream changes are detected": "アップストリームの変更が検出されたときにモデルリストを自動的に同期",
														
 
															     "Automatically test channels and notify users when limits are hit": "チャネルを自動的にテストし、制限に達したときにユーザーに通知する",
														
 
															+    "Availability (last 24h)": "可用性（過去 24 時間）",
														
 
															     "Available": "空き",
														
 
															     "Available disk space": "利用可能なディスク容量",
														
 
															     "Available Models": "利用可能なモデル",
														
 
															     "Available Rewards": "利用可能な報酬",
														
 
															+    "Average latency": "平均レイテンシ",
														
 
															+    "Average latency, TTFT, and success rate by group": "グループ別の平均レイテンシ、TTFT、成功率",
														
 
															     "Average RPM": "平均RPM",
														
 
															+    "Average time-to-first-token (TTFT) by group": "グループ別の平均 Time to First Token（TTFT）",
														
 
															     "Average tokens per second sustained per group": "グループごとに持続する平均スループット (tokens/秒)",
														
 
															     "Average TPM": "平均TPM",
														
 
															+    "Average TTFT": "平均 TTFT",
														
 
															     "AWS": "AWS",
														
 
															     "AWS Bedrock Claude Compat": "AWS Bedrock Claude 互換テンプレート",
														
 
															     "AWS Key Format": "AWSキーフォーマット",
														
@@ -710,6 +721,7 @@
 
															     "Cohere": "Cohere",
														
 
															     "Collapse": "折りたたむ",
														
 
															     "Collapse All": "すべて折りたたむ",
														
 
															+    "Collect relay latency and success-rate metrics for the model square.": "モデル広場向けに Relay のレイテンシと成功率メトリクスを収集します。",
														
 
															     "Color": "カラー",
														
 
															     "Color is required": "色は必須です",
														
 
															     "Color preset": "カラープリセット",
														
@@ -1294,6 +1306,7 @@
 
															     "Enable io.net deployments": "io.net デプロイを有効化",
														
 
															     "Enable io.net model deployment service in console": "コンソールで io.net モデルデプロイサービスを有効化",
														
 
															     "Enable LinuxDO OAuth": "LinuxDO OAuthを有効にする",
														
 
															+    "Enable model performance metrics": "モデル性能メトリクスを有効化",
														
 
															     "Enable OIDC": "OIDCを有効にする",
														
 
															     "Enable or disable this channel": "このチャネルを有効または無効にする",
														
 
															     "Enable or disable this model": "このモデルを有効または無効にする",
														
@@ -1659,6 +1672,7 @@
 
															     "Fixed price (USD)": "固定価格 (USD)",
														
 
															     "Floating": "フローティング",
														
 
															     "FluentRead extension not detected. Please ensure it is installed and active.": "FluentRead 拡張機能が検出されませんでした。インストールされていて有効になっていることを確認してください。",
														
 
															+    "Flush interval (minutes)": "書き込み間隔（分）",
														
 
															     "Follow the guided steps to prepare your workspace before the first login.": "初回ログイン前に、ガイド付きの手順に従ってワークスペースを準備してください。",
														
 
															     "Footer": "フッター",
														
 
															     "Footer text displayed at the bottom of pages": "ページ下部に表示されるフッターテキスト",
														
@@ -2221,6 +2235,7 @@
 
															     "Model name is required": "モデル名は必須です",
														
 
															     "Model names copied to clipboard": "モデル名がクリップボードにコピーされました",
														
 
															     "Model not found": "モデルが見つかりません",
														
 
															+    "Model performance metrics": "モデル性能メトリクス",
														
 
															     "Model Price": "モデル価格",
														
 
															     "Model Price Not Configured": "モデル価格が未設定",
														
 
															     "Model Pricing": "モデル料金",
														
@@ -2396,6 +2411,7 @@
 
															     "No groups match your search": "検索に一致するグループがありません",
														
 
															     "No header overrides configured.": "ヘッダーのオーバーライドが設定されていません。",
														
 
															     "No history data available": "履歴データがありません",
														
 
															+    "No incidents in the last 24 hours": "過去 24 時間にインシデントはありません",
														
 
															     "No incidents in the last 30 days": "過去 30 日間でインシデントはありません",
														
 
															     "No Inviter": "招待者なし",
														
 
															     "No keys found": "キーが見つかりません",
														
@@ -3106,6 +3122,8 @@
 
															     "Request Model:": "リクエストモデル：",
														
 
															     "Request overrides, routing behavior, and upstream model automation": "リクエスト上書き、ルーティング動作、上流モデル自動化",
														
 
															     "Request rule pricing": "リクエストルールの課金",
														
 
															+    "Request success rate sampled over the last 24 hours": "過去 24 時間にサンプリングされたリクエスト成功率",
														
 
															+    "Request success rate; {{incidents}} incident buckets in the last 24 hours": "リクエスト成功率；過去 24 時間に {{incidents}} 個のインシデント時間枠",
														
 
															     "Request timed out, please refresh and restart GitHub login": "タイムアウトしました。ページをリロードして GitHub ログインをやり直してください",
														
 
															     "Request-based": "リクエスト条件あり",
														
 
															     "Requests (24h)": "リクエスト (24h)",
														
@@ -3154,6 +3172,7 @@
 
															     "Restrict user model request frequency (may impact high concurrency performance)": "ユーザーモデルのリクエスト頻度を制限する（高並行性パフォーマンスに影響を与える可能性があります）",
														
 
															     "Retain last N days": "最新N日間を保持",
														
 
															     "Retain last N files": "最新N個のファイルを保持",
														
 
															+    "Retention days": "保持日数",
														
 
															     "Retry": "再試行",
														
 
															     "Retry Chain": "リトライチェーン",
														
 
															     "Retry Suggestion": "リトライ提案",
														
@@ -3527,6 +3546,7 @@
 
															     "Subscription Plans": "サブスクリプションプラン",
														
 
															     "Subtract": "減算",
														
 
															     "Success": "成功",
														
 
															+    "Success rate": "成功率",
														
 
															     "Successfully created {{count}} API Key(s)": "{{count}}個のAPIキーが正常に作成されました",
														
 
															     "Successfully created {{count}} redemption codes": "{{count}}件の引き換えコードが正常に作成されました",
														
 
															     "Successfully deleted {{count}} API key(s)": "{{count}}個のAPIキーが正常に削除されました",
														
--- a/web/default/src/i18n/locales/ru.json
+++ b/web/default/src/i18n/locales/ru.json
@@ -29,6 +29,7 @@
 
															     "{{count}} disabled channel(s) deleted": "Удалено {{count}} отключённых каналов",
														
 
															     "{{count}} hours ago": "{{count}} часов назад",
														
 
															     "{{count}} incidents": "{{count}} инцидентов",
														
 
															+    "{{count}} incidents in the last 24 hours": "{{count}} инцидентов за последние 24 часа",
														
 
															     "{{count}} incidents in the last 30 days": "{{count}} инцидентов за последние 30 дней",
														
 
															     "{{count}} IP(s)": "{{count}} IP",
														
 
															     "{{count}} log entries removed.": "Удалено {{count}} записей журнала.",
														
@@ -59,11 +60,14 @@
 
															     "© 2025 Your Company. All rights reserved.": "© 2025 Ваша Компания. Все права защищены.",
														
 
															     "+{{count}} more": "ещё {{count}}",
														
 
															     "| Based on": "| На основе",
														
 
															+    "0 means data is kept permanently": "0 означает, что данные хранятся постоянно",
														
 
															     "0 means unlimited": "0 означает без ограничений",
														
 
															     "1 Day": "1 день",
														
 
															     "1 day ago": "1 день назад",
														
 
															+    "1 hour": "1 час",
														
 
															     "1 Hour": "1 ч.",
														
 
															     "1 hour ago": "1 час назад",
														
 
															+    "1 minute": "1 минута",
														
 
															     "1 minute ago": "1 минуту назад",
														
 
															     "1 Month": "1 мес.",
														
 
															     "1 month ago": "1 месяц назад",
														
@@ -86,6 +90,7 @@
 
															     "30 Days": "30 дней",
														
 
															     "30 days ago": "30 дней назад",
														
 
															     "30d change": "Изменение за 30 дней",
														
 
															+    "5 minutes": "5 минут",
														
 
															     "5-Hour Window": "5-часовое окно",
														
 
															     "50 / page": "50 / страница",
														
 
															     "7 Days": "7 дней",
														
@@ -218,6 +223,7 @@
 
															     "Aggregated traffic by upstream model provider": "Агрегированный трафик по поставщикам моделей",
														
 
															     "Aggregated usage metrics and trend charts.": "Агрегированные метрики использования и графики трендов.",
														
 
															     "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.": "объединяет 50+ ИИ-провайдеров за единым API. Управляйте доступом, отслеживайте затраты и масштабируйтесь без усилий.",
														
 
															+    "Aggregation bucket": "Интервал агрегации",
														
 
															     "AGPL v3.0 License": "Лицензия AGPL v3.0",
														
 
															     "AI model testing environment": "Среда тестирования ИИ моделей",
														
 
															     "AI models": "Модели ИИ",
														
@@ -423,13 +429,18 @@
 
															     "Automatically selects the best available group with circuit breaker mechanism": "Автоматически выбирает лучшую доступную группу с механизмом circuit breaker",
														
 
															     "Automatically sync model list when upstream changes are detected": "Автоматически синхронизировать список моделей при обнаружении изменений у провайдера",
														
 
															     "Automatically test channels and notify users when limits are hit": "Автоматически тестировать каналы и уведомлять пользователей при достижении лимитов",
														
 
															+    "Availability (last 24h)": "Доступность (последние 24 ч)",
														
 
															     "Available": "Доступно",
														
 
															     "Available disk space": "Доступное дисковое пространство",
														
 
															     "Available Models": "Доступные модели",
														
 
															     "Available Rewards": "Доступные награды",
														
 
															+    "Average latency": "Средняя задержка",
														
 
															+    "Average latency, TTFT, and success rate by group": "Средняя задержка, TTFT и доля успешных запросов по группам",
														
 
															     "Average RPM": "Среднее число оборотов в минуту",
														
 
															+    "Average time-to-first-token (TTFT) by group": "Среднее время до первого токена (TTFT) по группам",
														
 
															     "Average tokens per second sustained per group": "Средняя устойчивая пропускная способность (токенов/с) по группам",
														
 
															     "Average TPM": "Среднее число транзакций в минуту",
														
 
															+    "Average TTFT": "Средний TTFT",
														
 
															     "AWS": "AWS",
														
 
															     "AWS Bedrock Claude Compat": "AWS Bedrock Claude совместимость",
														
 
															     "AWS Key Format": "Формат ключа AWS",
														
@@ -710,6 +721,7 @@
 
															     "Cohere": "Cohere",
														
 
															     "Collapse": "Свернуть",
														
 
															     "Collapse All": "Свернуть все",
														
 
															+    "Collect relay latency and success-rate metrics for the model square.": "Собирает метрики задержки Relay и доли успешных запросов для витрины моделей.",
														
 
															     "Color": "Цвет",
														
 
															     "Color is required": "Цвет обязателен",
														
 
															     "Color preset": "Цветовая предустановка",
														
@@ -1294,6 +1306,7 @@
 
															     "Enable io.net deployments": "Включить развертывания io.net",
														
 
															     "Enable io.net model deployment service in console": "Включить сервис развертывания моделей io.net в консоли",
														
 
															     "Enable LinuxDO OAuth": "Включить LinuxDO OAuth",
														
 
															+    "Enable model performance metrics": "Включить метрики производительности моделей",
														
 
															     "Enable OIDC": "Включить OIDC",
														
 
															     "Enable or disable this channel": "Включить или отключить этот канал",
														
 
															     "Enable or disable this model": "Включить или отключить эту модель",
														
@@ -1659,6 +1672,7 @@
 
															     "Fixed price (USD)": "Фиксированная цена (USD)",
														
 
															     "Floating": "Плавающая",
														
 
															     "FluentRead extension not detected. Please ensure it is installed and active.": "Расширение FluentRead не обнаружено. Убедитесь, что оно установлено и активно.",
														
 
															+    "Flush interval (minutes)": "Интервал записи (минуты)",
														
 
															     "Follow the guided steps to prepare your workspace before the first login.": "Следуйте пошаговым инструкциям, чтобы подготовить рабочее пространство перед первым входом.",
														
 
															     "Footer": "Подвал",
														
 
															     "Footer text displayed at the bottom of pages": "Текст нижнего колонтитула, отображаемый внизу страниц",
														
@@ -2221,6 +2235,7 @@
 
															     "Model name is required": "Название модели обязательно",
														
 
															     "Model names copied to clipboard": "Названия моделей скопированы в буфер обмена",
														
 
															     "Model not found": "Модель не найдена",
														
 
															+    "Model performance metrics": "Метрики производительности моделей",
														
 
															     "Model Price": "Цена модели",
														
 
															     "Model Price Not Configured": "Цена модели не настроена",
														
 
															     "Model Pricing": "Цены на модели",
														
@@ -2396,6 +2411,7 @@
 
															     "No groups match your search": "Нет групп, соответствующих вашему поиску",
														
 
															     "No header overrides configured.": "Нет настроенных переопределений заголовков.",
														
 
															     "No history data available": "Исторические данные недоступны",
														
 
															+    "No incidents in the last 24 hours": "За последние 24 часа инцидентов не было",
														
 
															     "No incidents in the last 30 days": "За последние 30 дней инцидентов не было",
														
 
															     "No Inviter": "Нет пригласившего",
														
 
															     "No keys found": "Ключи не найдены",
														
@@ -3106,6 +3122,8 @@
 
															     "Request Model:": "Модель запроса:",
														
 
															     "Request overrides, routing behavior, and upstream model automation": "Переопределения запросов, маршрутизация и автоматизация upstream-моделей",
														
 
															     "Request rule pricing": "Правила ценообразования по запросу",
														
 
															+    "Request success rate sampled over the last 24 hours": "Доля успешных запросов по выборкам за последние 24 часа",
														
 
															+    "Request success rate; {{incidents}} incident buckets in the last 24 hours": "Доля успешных запросов; {{incidents}} интервалов с инцидентами за последние 24 часа",
														
 
															     "Request timed out, please refresh and restart GitHub login": "Время ожидания истекло, обновите страницу и снова запустите вход через GitHub",
														
 
															     "Request-based": "Зависит от запроса",
														
 
															     "Requests (24h)": "Запросы (24 ч)",
														
@@ -3154,6 +3172,7 @@
 
															     "Restrict user model request frequency (may impact high concurrency performance)": "Ограничить частоту запросов пользовательских моделей (может повлиять на производительность при высокой конкуренции)",
														
 
															     "Retain last N days": "Хранить последние N дней",
														
 
															     "Retain last N files": "Хранить последние N файлов",
														
 
															+    "Retention days": "Дней хранения",
														
 
															     "Retry": "Повторить попытку",
														
 
															     "Retry Chain": "Цепочка повторов",
														
 
															     "Retry Suggestion": "Рекомендация по повтору",
														
@@ -3527,6 +3546,7 @@
 
															     "Subscription Plans": "Планы подписки",
														
 
															     "Subtract": "Вычесть",
														
 
															     "Success": "Успешно",
														
 
															+    "Success rate": "Доля успешных запросов",
														
 
															     "Successfully created {{count}} API Key(s)": "Успешно создано {{count}} API-ключ(а/ей)",
														
 
															     "Successfully created {{count}} redemption codes": "Успешно создано {{count}} кодов активации",
														
 
															     "Successfully deleted {{count}} API key(s)": "Успешно удалено {{count}} API-ключ(а/ей)",
														
--- a/web/default/src/i18n/locales/vi.json
+++ b/web/default/src/i18n/locales/vi.json
@@ -29,6 +29,7 @@
 
															     "{{count}} disabled channel(s) deleted": "Đã xóa {{count}} kênh đã tắt",
														
 
															     "{{count}} hours ago": "{{count}} giờ trước",
														
 
															     "{{count}} incidents": "{{count}} sự cố",
														
 
															+    "{{count}} incidents in the last 24 hours": "{{count}} sự cố trong 24 giờ qua",
														
 
															     "{{count}} incidents in the last 30 days": "{{count}} sự cố trong 30 ngày qua",
														
 
															     "{{count}} IP(s)": "{{count}} IP",
														
 
															     "{{count}} log entries removed.": "Đã xóa {{count}} mục nhật ký.",
														
@@ -59,11 +60,14 @@
 
															     "© 2025 Your Company. All rights reserved.": "© 2025 Công ty của bạn. Mọi quyền được bảo lưu.",
														
 
															     "+{{count}} more": "thêm {{count}} mục",
														
 
															     "| Based on": "| Dựa trên",
														
 
															+    "0 means data is kept permanently": "0 nghĩa là dữ liệu được giữ vĩnh viễn",
														
 
															     "0 means unlimited": "0 có nghĩa là không giới hạn",
														
 
															     "1 Day": "1 ngày",
														
 
															     "1 day ago": "1 ngày trước",
														
 
															+    "1 hour": "1 giờ",
														
 
															     "1 Hour": "1 giờ",
														
 
															     "1 hour ago": "1 giờ trước",
														
 
															+    "1 minute": "1 phút",
														
 
															     "1 minute ago": "1 phút trước",
														
 
															     "1 Month": "1 tháng",
														
 
															     "1 month ago": "1 tháng trước",
														
@@ -86,6 +90,7 @@
 
															     "30 Days": "30 ngày",
														
 
															     "30 days ago": "30 ngày trước",
														
 
															     "30d change": "Thay đổi 30 ngày",
														
 
															+    "5 minutes": "5 phút",
														
 
															     "5-Hour Window": "Cửa sổ 5 giờ",
														
 
															     "50 / page": "50 / trang",
														
 
															     "7 Days": "7 ngày",
														
@@ -218,6 +223,7 @@
 
															     "Aggregated traffic by upstream model provider": "Lưu lượng tổng hợp theo nhà cung cấp mô hình",
														
 
															     "Aggregated usage metrics and trend charts.": "Chỉ số sử dụng tổng hợp và biểu đồ xu hướng.",
														
 
															     "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.": "tổng hợp hơn 50 nhà cung cấp AI sau một API thống nhất. Quản lý truy cập, theo dõi chi phí và mở rộng dễ dàng.",
														
 
															+    "Aggregation bucket": "Khoảng tổng hợp",
														
 
															     "AGPL v3.0 License": "Giấy phép AGPL v3.0",
														
 
															     "AI model testing environment": "Môi trường thử nghiệm mô hình AI",
														
 
															     "AI models": "mô hình AI",
														
@@ -423,13 +429,18 @@
 
															     "Automatically selects the best available group with circuit breaker mechanism": "Tự động chọn nhóm tốt nhất hiện có với cơ chế ngắt mạch",
														
 
															     "Automatically sync model list when upstream changes are detected": "Tự động đồng bộ danh sách mô hình khi phát hiện thay đổi từ nguồn",
														
 
															     "Automatically test channels and notify users when limits are hit": "Tự động kiểm tra các kênh và thông báo cho người dùng khi đạt đến giới hạn",
														
 
															+    "Availability (last 24h)": "Khả dụng (24 giờ qua)",
														
 
															     "Available": "Khả dụng",
														
 
															     "Available disk space": "Dung lượng đĩa khả dụng",
														
 
															     "Available Models": "Mô hình khả dụng",
														
 
															     "Available Rewards": "Phần thưởng hiện có",
														
 
															+    "Average latency": "Độ trễ trung bình",
														
 
															+    "Average latency, TTFT, and success rate by group": "Độ trễ trung bình, TTFT và tỷ lệ thành công theo nhóm",
														
 
															     "Average RPM": "RPM trung bình",
														
 
															+    "Average time-to-first-token (TTFT) by group": "Thời gian trung bình tới token đầu tiên (TTFT) theo nhóm",
														
 
															     "Average tokens per second sustained per group": "Số token mỗi giây trung bình duy trì cho từng nhóm",
														
 
															     "Average TPM": "TPM trung bình",
														
 
															+    "Average TTFT": "TTFT trung bình",
														
 
															     "AWS": "AWS",
														
 
															     "AWS Bedrock Claude Compat": "AWS Bedrock Claude tương thích",
														
 
															     "AWS Key Format": "Định dạng khóa AWS",
														
@@ -710,6 +721,7 @@
 
															     "Cohere": "Cohere",
														
 
															     "Collapse": "Thu gọn",
														
 
															     "Collapse All": "Thu gọn tất cả",
														
 
															+    "Collect relay latency and success-rate metrics for the model square.": "Thu thập độ trễ Relay và tỷ lệ thành công cho quảng trường mô hình.",
														
 
															     "Color": "Màu",
														
 
															     "Color is required": "Màu sắc là bắt buộc",
														
 
															     "Color preset": "Cài đặt màu sẵn",
														
@@ -1294,6 +1306,7 @@
 
															     "Enable io.net deployments": "Bật triển khai io.net",
														
 
															     "Enable io.net model deployment service in console": "Bật dịch vụ triển khai mô hình io.net trong bảng điều khiển",
														
 
															     "Enable LinuxDO OAuth": "Bật LinuxDO OAuth",
														
 
															+    "Enable model performance metrics": "Bật chỉ số hiệu năng mô hình",
														
 
															     "Enable OIDC": "Bật OIDC",
														
 
															     "Enable or disable this channel": "Bật hoặc tắt kênh này",
														
 
															     "Enable or disable this model": "Bật hoặc tắt mô hình này",
														
@@ -1659,6 +1672,7 @@
 
															     "Fixed price (USD)": "Giá cố định (USD)",
														
 
															     "Floating": "Nổi",
														
 
															     "FluentRead extension not detected. Please ensure it is installed and active.": "Không phát hiện tiện ích mở rộng FluentRead. Vui lòng đảm bảo nó đã được cài đặt và kích hoạt.",
														
 
															+    "Flush interval (minutes)": "Khoảng ghi xuống DB (phút)",
														
 
															     "Follow the guided steps to prepare your workspace before the first login.": "Thực hiện theo các bước hướng dẫn để chuẩn bị không gian làm việc của bạn trước lần đăng nhập đầu tiên.",
														
 
															     "Footer": "Chân trang",
														
 
															     "Footer text displayed at the bottom of pages": "Văn bản chân trang hiển thị ở cuối các trang",
														
@@ -2221,6 +2235,7 @@
 
															     "Model name is required": "Tên mô hình là bắt buộc",
														
 
															     "Model names copied to clipboard": "Tên mô hình đã được sao chép vào bộ nhớ tạm",
														
 
															     "Model not found": "Không tìm thấy mô hình",
														
 
															+    "Model performance metrics": "Chỉ số hiệu năng mô hình",
														
 
															     "Model Price": "Giá mô hình",
														
 
															     "Model Price Not Configured": "Giá mô hình chưa được cấu hình",
														
 
															     "Model Pricing": "Bảng giá mô hình",
														
@@ -2396,6 +2411,7 @@
 
															     "No groups match your search": "Không có nhóm nào khớp với tìm kiếm của bạn",
														
 
															     "No header overrides configured.": "Không có ghi đè tiêu đề nào được cấu hình.",
														
 
															     "No history data available": "Không có dữ liệu lịch sử",
														
 
															+    "No incidents in the last 24 hours": "Không có sự cố trong 24 giờ qua",
														
 
															     "No incidents in the last 30 days": "Không có sự cố trong 30 ngày qua",
														
 
															     "No Inviter": "Không có người mời",
														
 
															     "No keys found": "Không tìm thấy khóa",
														
@@ -3106,6 +3122,8 @@
 
															     "Request Model:": "Mô hình yêu cầu:",
														
 
															     "Request overrides, routing behavior, and upstream model automation": "Ghi đè yêu cầu, hành vi định tuyến và tự động hóa mô hình upstream",
														
 
															     "Request rule pricing": "Quy tắc tính giá theo request",
														
 
															+    "Request success rate sampled over the last 24 hours": "Tỷ lệ yêu cầu thành công được lấy mẫu trong 24 giờ qua",
														
 
															+    "Request success rate; {{incidents}} incident buckets in the last 24 hours": "Tỷ lệ yêu cầu thành công; {{incidents}} khoảng có sự cố trong 24 giờ qua",
														
 
															     "Request timed out, please refresh and restart GitHub login": "Yêu cầu đã hết thời gian chờ, vui lòng làm mới và đăng nhập lại GitHub",
														
 
															     "Request-based": "Theo yêu cầu",
														
 
															     "Requests (24h)": "Yêu cầu (24h)",
														
@@ -3154,6 +3172,7 @@
 
															     "Restrict user model request frequency (may impact high concurrency performance)": "Hạn chế tần suất yêu cầu mô hình người dùng (có thể ảnh hưởng đến hiệu suất khi có độ đồng thời cao)",
														
 
															     "Retain last N days": "Giữ lại N ngày gần nhất",
														
 
															     "Retain last N files": "Giữ lại N tệp gần nhất",
														
 
															+    "Retention days": "Số ngày lưu giữ",
														
 
															     "Retry": "Thử lại",
														
 
															     "Retry Chain": "Chuỗi thử lại",
														
 
															     "Retry Suggestion": "Gợi ý thử lại",
														
@@ -3527,6 +3546,7 @@
 
															     "Subscription Plans": "Gói đăng ký",
														
 
															     "Subtract": "Trừ",
														
 
															     "Success": "Thành công",
														
 
															+    "Success rate": "Tỷ lệ thành công",
														
 
															     "Successfully created {{count}} API Key(s)": "Đã tạo thành công {{count}} khóa API",
														
 
															     "Successfully created {{count}} redemption codes": "Đã tạo thành công {{count}} mã đổi thưởng",
														
 
															     "Successfully deleted {{count}} API key(s)": "Đã xóa thành công {{count}} khóa API",
														
--- a/web/default/src/i18n/locales/zh.json
+++ b/web/default/src/i18n/locales/zh.json
@@ -29,6 +29,7 @@
 
															     "{{count}} disabled channel(s) deleted": "已删除 {{count}} 个已禁用的渠道",
														
 
															     "{{count}} hours ago": "{{count}} 小时前",
														
 
															     "{{count}} incidents": "{{count}} 起事件",
														
 
															+    "{{count}} incidents in the last 24 hours": "最近 24 小时 {{count}} 个异常桶",
														
 
															     "{{count}} incidents in the last 30 days": "最近 30 天 {{count}} 起事件",
														
 
															     "{{count}} IP(s)": "{{count}} 个 IP",
														
 
															     "{{count}} log entries removed.": "已删除 {{count}} 条日志。",
														
@@ -59,11 +60,14 @@
 
															     "© 2025 Your Company. All rights reserved.": "© 2025 您的公司。保留所有权利。",
														
 
															     "+{{count}} more": "还有 {{count}} 项",
														
 
															     "| Based on": "| 基于",
														
 
															+    "0 means data is kept permanently": "0 表示永久保留数据",
														
 
															     "0 means unlimited": "0 表示不限",
														
 
															     "1 Day": "1 天",
														
 
															     "1 day ago": "1 天前",
														
 
															+    "1 hour": "1 小时",
														
 
															     "1 Hour": "1 小时",
														
 
															     "1 hour ago": "1 小时前",
														
 
															+    "1 minute": "1 分钟",
														
 
															     "1 minute ago": "1 分钟前",
														
 
															     "1 Month": "1 个月",
														
 
															     "1 month ago": "1 个月前",
														
@@ -86,6 +90,7 @@
 
															     "30 Days": "30 天",
														
 
															     "30 days ago": "30 天前",
														
 
															     "30d change": "30 天变化",
														
 
															+    "5 minutes": "5 分钟",
														
 
															     "5-Hour Window": "5小时窗口",
														
 
															     "50 / page": "50 条/页",
														
 
															     "7 Days": "7 天",
														
@@ -218,6 +223,7 @@
 
															     "Aggregated traffic by upstream model provider": "按上游模型提供商聚合的流量",
														
 
															     "Aggregated usage metrics and trend charts.": "聚合使用指标和趋势图表。",
														
 
															     "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.": "聚合 50+ AI 提供商于统一 API 之后。轻松管理访问、追踪成本、弹性扩展。",
														
 
															+    "Aggregation bucket": "聚合时间桶",
														
 
															     "AGPL v3.0 License": "AGPL v3.0 协议",
														
 
															     "AI model testing environment": "AI模型测试环境",
														
 
															     "AI models": "AI 模型",
														
@@ -423,13 +429,18 @@
 
															     "Automatically selects the best available group with circuit breaker mechanism": "自动选择可用分组，失败时触发熔断切换",
														
 
															     "Automatically sync model list when upstream changes are detected": "检测到上游模型变更时自动同步模型列表",
														
 
															     "Automatically test channels and notify users when limits are hit": "自动测试渠道并在达到限制时通知用户",
														
 
															+    "Availability (last 24h)": "可用率（最近 24 小时）",
														
 
															     "Available": "可用",
														
 
															     "Available disk space": "可用磁盘空间",
														
 
															     "Available Models": "可用模型",
														
 
															     "Available Rewards": "可用奖励",
														
 
															+    "Average latency": "平均延迟",
														
 
															+    "Average latency, TTFT, and success rate by group": "各分组的平均延迟、首 Token 延迟和成功率",
														
 
															     "Average RPM": "平均 RPM",
														
 
															+    "Average time-to-first-token (TTFT) by group": "各分组的平均首 Token 延迟（TTFT）",
														
 
															     "Average tokens per second sustained per group": "各分组持续输出的平均每秒 token 数",
														
 
															     "Average TPM": "平均 TPM",
														
 
															+    "Average TTFT": "平均首 Token 延迟",
														
 
															     "AWS": "AWS",
														
 
															     "AWS Bedrock Claude Compat": "AWS Bedrock Claude 兼容模板",
														
 
															     "AWS Key Format": "AWS 密钥格式",
														
@@ -710,6 +721,7 @@
 
															     "Cohere": "Cohere",
														
 
															     "Collapse": "收起",
														
 
															     "Collapse All": "全部收起",
														
 
															+    "Collect relay latency and success-rate metrics for the model square.": "收集 Relay 延迟和成功率指标，用于模型广场展示。",
														
 
															     "Color": "颜色",
														
 
															     "Color is required": "颜色为必填项",
														
 
															     "Color preset": "颜色预设",
														
@@ -1294,6 +1306,7 @@
 
															     "Enable io.net deployments": "启用 io.net 部署",
														
 
															     "Enable io.net model deployment service in console": "在控制台启用 io.net 模型部署服务",
														
 
															     "Enable LinuxDO OAuth": "启用 LinuxDO OAuth",
														
 
															+    "Enable model performance metrics": "启用模型性能指标",
														
 
															     "Enable OIDC": "启用 OIDC",
														
 
															     "Enable or disable this channel": "启用或禁用此渠道",
														
 
															     "Enable or disable this model": "启用或禁用此模型",
														
@@ -1659,6 +1672,7 @@
 
															     "Fixed price (USD)": "固定价格 (USD)",
														
 
															     "Floating": "浮动",
														
 
															     "FluentRead extension not detected. Please ensure it is installed and active.": "未检测到 FluentRead 扩展。请确保已安装并激活。",
														
 
															+    "Flush interval (minutes)": "刷库间隔（分钟）",
														
 
															     "Follow the guided steps to prepare your workspace before the first login.": "请按照引导步骤在首次登录前准备您的工作区。",
														
 
															     "Footer": "页脚",
														
 
															     "Footer text displayed at the bottom of pages": "显示在页面底部的页脚文本",
														
@@ -2221,6 +2235,7 @@
 
															     "Model name is required": "模型名称为必填项",
														
 
															     "Model names copied to clipboard": "模型名称已复制到剪贴板",
														
 
															     "Model not found": "模型未找到",
														
 
															+    "Model performance metrics": "模型性能指标",
														
 
															     "Model Price": "模型价格",
														
 
															     "Model Price Not Configured": "模型价格未配置",
														
 
															     "Model Pricing": "模型定价",
														
@@ -2396,6 +2411,7 @@
 
															     "No groups match your search": "没有组匹配您的搜索",
														
 
															     "No header overrides configured.": "未配置标头覆盖。",
														
 
															     "No history data available": "暂无历史数据",
														
 
															+    "No incidents in the last 24 hours": "最近 24 小时无异常",
														
 
															     "No incidents in the last 30 days": "最近 30 天无事件",
														
 
															     "No Inviter": "无邀请人",
														
 
															     "No keys found": "未找到密钥",
														
@@ -3106,6 +3122,8 @@
 
															     "Request Model:": "请求模型：",
														
 
															     "Request overrides, routing behavior, and upstream model automation": "请求覆盖、路由行为和上游模型自动化",
														
 
															     "Request rule pricing": "请求规则计费",
														
 
															+    "Request success rate sampled over the last 24 hours": "最近 24 小时按时间桶采样的请求成功率",
														
 
															+    "Request success rate; {{incidents}} incident buckets in the last 24 hours": "请求成功率；最近 24 小时 {{incidents}} 个异常桶",
														
 
															     "Request timed out, please refresh and restart GitHub login": "请求超时，请刷新页面后重新发起 GitHub 登录",
														
 
															     "Request-based": "含请求条件",
														
 
															     "Requests (24h)": "请求数（24 小时）",
														
@@ -3154,6 +3172,7 @@
 
															     "Restrict user model request frequency (may impact high concurrency performance)": "限制用户模型请求频率（可能会影响高并发性能）",
														
 
															     "Retain last N days": "保留最近N天",
														
 
															     "Retain last N files": "保留最近 N 个文件",
														
 
															+    "Retention days": "保留天数",
														
 
															     "Retry": "重试",
														
 
															     "Retry Chain": "重试链路",
														
 
															     "Retry Suggestion": "重试建议",
														
@@ -3527,6 +3546,7 @@
 
															     "Subscription Plans": "订阅套餐",
														
 
															     "Subtract": "减少",
														
 
															     "Success": "成功",
														
 
															+    "Success rate": "成功率",
														
 
															     "Successfully created {{count}} API Key(s)": "成功创建了 {{count}} 个 API 密钥",
														
 
															     "Successfully created {{count}} redemption codes": "成功创建了 {{count}} 个兑换码",
														
 
															     "Successfully deleted {{count}} API key(s)": "成功删除了 {{count}} 个 API 密钥",