Просмотр исходного кода

feat: collect model performance metrics (#4635)

Calcium-Ion 1 неделя назад
Родитель
Сommit
9acf5fecae

+ 46 - 0
controller/perf_metrics.go

@@ -0,0 +1,46 @@
+package controller
+
+import (
+	"net/http"
+	"strconv"
+
+	perfmetrics "github.com/QuantumNous/new-api/pkg/perf_metrics"
+
+	"github.com/gin-gonic/gin"
+)
+
+func GetPerfMetrics(c *gin.Context) {
+	modelName := c.Query("model")
+	if modelName == "" {
+		c.JSON(http.StatusBadRequest, gin.H{
+			"success": false,
+			"message": "model is required",
+		})
+		return
+	}
+
+	hours := 24
+	if rawHours := c.Query("hours"); rawHours != "" {
+		if parsed, err := strconv.Atoi(rawHours); err == nil {
+			hours = parsed
+		}
+	}
+
+	result, err := perfmetrics.Query(perfmetrics.QueryParams{
+		Model: modelName,
+		Group: c.Query("group"),
+		Hours: hours,
+	})
+	if err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{
+			"success": false,
+			"message": err.Error(),
+		})
+		return
+	}
+
+	c.JSON(http.StatusOK, gin.H{
+		"success": true,
+		"data":    result,
+	})
+}

+ 6 - 0
controller/relay.go

@@ -15,6 +15,7 @@ import (
 	"github.com/QuantumNous/new-api/logger"
 	"github.com/QuantumNous/new-api/logger"
 	"github.com/QuantumNous/new-api/middleware"
 	"github.com/QuantumNous/new-api/middleware"
 	"github.com/QuantumNous/new-api/model"
 	"github.com/QuantumNous/new-api/model"
+	perfmetrics "github.com/QuantumNous/new-api/pkg/perf_metrics"
 	"github.com/QuantumNous/new-api/relay"
 	"github.com/QuantumNous/new-api/relay"
 	relaycommon "github.com/QuantumNous/new-api/relay/common"
 	relaycommon "github.com/QuantumNous/new-api/relay/common"
 	relayconstant "github.com/QuantumNous/new-api/relay/constant"
 	relayconstant "github.com/QuantumNous/new-api/relay/constant"
@@ -239,6 +240,11 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) {
 		retryLogStr := fmt.Sprintf("重试:%s", strings.Trim(strings.Join(strings.Fields(fmt.Sprint(useChannel)), "->"), "[]"))
 		retryLogStr := fmt.Sprintf("重试:%s", strings.Trim(strings.Join(strings.Fields(fmt.Sprint(useChannel)), "->"), "[]"))
 		logger.LogInfo(c, retryLogStr)
 		logger.LogInfo(c, retryLogStr)
 	}
 	}
+	if newAPIError != nil {
+		gopool.Go(func() {
+			perfmetrics.RecordRelaySample(relayInfo, false)
+		})
+	}
 }
 }
 
 
 var upgrader = websocket.Upgrader{
 var upgrader = websocket.Upgrader{

+ 3 - 0
main.go

@@ -19,6 +19,7 @@ import (
 	"github.com/QuantumNous/new-api/middleware"
 	"github.com/QuantumNous/new-api/middleware"
 	"github.com/QuantumNous/new-api/model"
 	"github.com/QuantumNous/new-api/model"
 	"github.com/QuantumNous/new-api/oauth"
 	"github.com/QuantumNous/new-api/oauth"
+	perfmetrics "github.com/QuantumNous/new-api/pkg/perf_metrics"
 	"github.com/QuantumNous/new-api/relay"
 	"github.com/QuantumNous/new-api/relay"
 	"github.com/QuantumNous/new-api/router"
 	"github.com/QuantumNous/new-api/router"
 	"github.com/QuantumNous/new-api/service"
 	"github.com/QuantumNous/new-api/service"
@@ -306,6 +307,8 @@ func InitResources() error {
 		return err
 		return err
 	}
 	}
 
 
+	perfmetrics.Init()
+
 	// 启动系统监控
 	// 启动系统监控
 	common.StartSystemMonitor()
 	common.StartSystemMonitor()
 
 

+ 2 - 0
model/main.go

@@ -280,6 +280,7 @@ func migrateDB() error {
 		&SubscriptionPreConsumeRecord{},
 		&SubscriptionPreConsumeRecord{},
 		&CustomOAuthProvider{},
 		&CustomOAuthProvider{},
 		&UserOAuthBinding{},
 		&UserOAuthBinding{},
+		&PerfMetric{},
 	)
 	)
 	if err != nil {
 	if err != nil {
 		return err
 		return err
@@ -328,6 +329,7 @@ func migrateDBFast() error {
 		{&SubscriptionPreConsumeRecord{}, "SubscriptionPreConsumeRecord"},
 		{&SubscriptionPreConsumeRecord{}, "SubscriptionPreConsumeRecord"},
 		{&CustomOAuthProvider{}, "CustomOAuthProvider"},
 		{&CustomOAuthProvider{}, "CustomOAuthProvider"},
 		{&UserOAuthBinding{}, "UserOAuthBinding"},
 		{&UserOAuthBinding{}, "UserOAuthBinding"},
+		{&PerfMetric{}, "PerfMetric"},
 	}
 	}
 	// 动态计算migration数量,确保errChan缓冲区足够大
 	// 动态计算migration数量,确保errChan缓冲区足够大
 	errChan := make(chan error, len(migrations))
 	errChan := make(chan error, len(migrations))

+ 70 - 0
model/perf_metric.go

@@ -0,0 +1,70 @@
+package model
+
+import (
+	"time"
+
+	"gorm.io/gorm"
+	"gorm.io/gorm/clause"
+)
+
+// PerfMetric stores aggregated relay performance metrics for the model square.
+type PerfMetric struct {
+	Id             int    `json:"id" gorm:"primaryKey"`
+	ModelName      string `json:"model_name" gorm:"size:128;uniqueIndex:idx_perf_model_group_bucket,priority:1"`
+	Group          string `json:"group" gorm:"column:group;size:64;uniqueIndex:idx_perf_model_group_bucket,priority:2"`
+	BucketTs       int64  `json:"bucket_ts" gorm:"uniqueIndex:idx_perf_model_group_bucket,priority:3;index:idx_perf_bucket_ts"`
+	RequestCount   int64  `json:"request_count" gorm:"default:0"`
+	SuccessCount   int64  `json:"success_count" gorm:"default:0"`
+	TotalLatencyMs int64  `json:"total_latency_ms" gorm:"default:0"`
+	TtftSumMs      int64  `json:"ttft_sum_ms" gorm:"default:0"`
+	TtftCount      int64  `json:"ttft_count" gorm:"default:0"`
+}
+
+func (PerfMetric) TableName() string {
+	return "perf_metrics"
+}
+
+func UpsertPerfMetric(metric *PerfMetric) error {
+	if metric == nil || metric.RequestCount == 0 {
+		return nil
+	}
+	return DB.Clauses(clause.OnConflict{
+		Columns: []clause.Column{
+			{Name: "model_name"},
+			{Name: "group"},
+			{Name: "bucket_ts"},
+		},
+		DoUpdates: clause.Assignments(map[string]interface{}{
+			"request_count":    gorm.Expr("request_count + ?", metric.RequestCount),
+			"success_count":    gorm.Expr("success_count + ?", metric.SuccessCount),
+			"total_latency_ms": gorm.Expr("total_latency_ms + ?", metric.TotalLatencyMs),
+			"ttft_sum_ms":      gorm.Expr("ttft_sum_ms + ?", metric.TtftSumMs),
+			"ttft_count":       gorm.Expr("ttft_count + ?", metric.TtftCount),
+		}),
+	}).Create(metric).Error
+}
+
+func GetPerfMetrics(modelName string, group string, startTs int64, endTs int64) ([]PerfMetric, error) {
+	var metrics []PerfMetric
+	query := DB.Model(&PerfMetric{}).
+		Where("model_name = ? AND bucket_ts >= ? AND bucket_ts <= ?", modelName, startTs, endTs)
+	if group != "" {
+		query = query.Where(commonGroupCol+" = ?", group)
+	}
+	err := query.Order("bucket_ts ASC").Find(&metrics).Error
+	return metrics, err
+}
+
+func DeletePerfMetricsBefore(cutoffTs int64) error {
+	if cutoffTs <= 0 {
+		return nil
+	}
+	return DB.Where("bucket_ts < ?", cutoffTs).Delete(&PerfMetric{}).Error
+}
+
+func PerfMetricStartTime(hours int) int64 {
+	if hours <= 0 {
+		hours = 24
+	}
+	return time.Now().Add(-time.Duration(hours) * time.Hour).Unix()
+}

+ 94 - 0
pkg/perf_metrics/flush.go

@@ -0,0 +1,94 @@
+package perfmetrics
+
+import (
+	"fmt"
+	"strconv"
+	"time"
+
+	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/model"
+	"github.com/QuantumNous/new-api/setting/perf_metrics_setting"
+)
+
+func flushLoop() {
+	for {
+		interval := perf_metrics_setting.GetFlushIntervalMinutes()
+		time.Sleep(time.Duration(interval) * time.Minute)
+		setting := perf_metrics_setting.GetSetting()
+		if !setting.Enabled {
+			continue
+		}
+		flushCompletedBuckets()
+		cleanupExpiredMetrics(setting.RetentionDays)
+	}
+}
+
+func flushCompletedBuckets() {
+	currentBucket := bucketStart(time.Now().Unix())
+	hotBuckets.Range(func(key, value any) bool {
+		k := key.(bucketKey)
+		if k.bucketTs >= currentBucket {
+			return true
+		}
+
+		bucket := value.(*atomicBucket)
+		drained := bucket.drain()
+		if drained.requestCount == 0 {
+			deleteOldEmptyBucket(k, key)
+			return true
+		}
+
+		err := model.UpsertPerfMetric(&model.PerfMetric{
+			ModelName:      k.model,
+			Group:          k.group,
+			BucketTs:       k.bucketTs,
+			RequestCount:   drained.requestCount,
+			SuccessCount:   drained.successCount,
+			TotalLatencyMs: drained.totalLatencyMs,
+			TtftSumMs:      drained.ttftSumMs,
+			TtftCount:      drained.ttftCount,
+		})
+		if err != nil {
+			bucket.addCounters(drained)
+			common.SysError(fmt.Sprintf("failed to flush perf metric bucket model=%s group=%s bucket=%d: %s", k.model, k.group, k.bucketTs, err.Error()))
+			return true
+		}
+
+		deleteOldEmptyBucket(k, key)
+		return true
+	})
+}
+
+func deleteOldEmptyBucket(k bucketKey, rawKey any) {
+	if k.bucketTs < bucketStart(time.Now().Add(-24*time.Hour).Unix()) {
+		hotBuckets.Delete(rawKey)
+	}
+}
+
+func cleanupExpiredMetrics(retentionDays int) {
+	if retentionDays <= 0 {
+		return
+	}
+	cutoff := time.Now().Add(-time.Duration(retentionDays) * 24 * time.Hour).Unix()
+	if err := model.DeletePerfMetricsBefore(cutoff); err != nil {
+		common.SysError("failed to cleanup expired perf metrics: " + err.Error())
+	}
+}
+
+func redisCounters(values map[string]string) counters {
+	return counters{
+		requestCount:   parseRedisInt(values["req"]),
+		successCount:   parseRedisInt(values["ok"]),
+		totalLatencyMs: parseRedisInt(values["lat"]),
+		ttftSumMs:      parseRedisInt(values["ttft"]),
+		ttftCount:      parseRedisInt(values["ttft_n"]),
+	}
+}
+
+func parseRedisInt(value string) int64 {
+	if value == "" {
+		return 0
+	}
+	parsed, _ := strconv.ParseInt(value, 10, 64)
+	return parsed
+}

+ 261 - 0
pkg/perf_metrics/metrics.go

@@ -0,0 +1,261 @@
+package perfmetrics
+
+import (
+	"context"
+	"fmt"
+	"sort"
+	"sync"
+	"time"
+
+	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/model"
+	relaycommon "github.com/QuantumNous/new-api/relay/common"
+	"github.com/QuantumNous/new-api/setting/perf_metrics_setting"
+)
+
+var hotBuckets sync.Map
+
+const seriesSchema = "dbcd0a3c01b55203"
+
+func Init() {
+	go flushLoop()
+}
+
+func RecordRelaySample(info *relaycommon.RelayInfo, success bool) {
+	if info == nil {
+		return
+	}
+	now := time.Now()
+	hasTtft := info.IsStream && info.HasSendResponse()
+	ttftMs := int64(0)
+	if hasTtft {
+		ttftMs = info.FirstResponseTime.Sub(info.StartTime).Milliseconds()
+	}
+	Record(Sample{
+		Model:     info.OriginModelName,
+		Group:     info.UsingGroup,
+		LatencyMs: now.Sub(info.StartTime).Milliseconds(),
+		TtftMs:    ttftMs,
+		HasTtft:   hasTtft,
+		Success:   success,
+	})
+}
+
+func Record(sample Sample) {
+	setting := perf_metrics_setting.GetSetting()
+	if !setting.Enabled || sample.Model == "" {
+		return
+	}
+	if sample.Group == "" {
+		sample.Group = "default"
+	}
+	if sample.LatencyMs < 0 {
+		sample.LatencyMs = 0
+	}
+
+	key := bucketKey{
+		model:    sample.Model,
+		group:    sample.Group,
+		bucketTs: bucketStart(time.Now().Unix()),
+	}
+	actual, _ := hotBuckets.LoadOrStore(key, &atomicBucket{})
+	actual.(*atomicBucket).add(sample)
+	recordRedis(key, sample)
+}
+
+func Query(params QueryParams) (QueryResult, error) {
+	if params.Hours <= 0 {
+		params.Hours = 24
+	}
+	if params.Hours > 24*30 {
+		params.Hours = 24 * 30
+	}
+	endTs := time.Now().Unix()
+	startTs := endTs - int64(params.Hours)*3600
+
+	merged := map[bucketKey]counters{}
+	rows, err := model.GetPerfMetrics(params.Model, params.Group, startTs, endTs)
+	if err != nil {
+		return QueryResult{}, err
+	}
+	for _, row := range rows {
+		mergeCounters(merged, bucketKey{
+			model:    row.ModelName,
+			group:    row.Group,
+			bucketTs: row.BucketTs,
+		}, counters{
+			requestCount:   row.RequestCount,
+			successCount:   row.SuccessCount,
+			totalLatencyMs: row.TotalLatencyMs,
+			ttftSumMs:      row.TtftSumMs,
+			ttftCount:      row.TtftCount,
+		})
+	}
+
+	hotBuckets.Range(func(key, value any) bool {
+		k := key.(bucketKey)
+		if k.model != params.Model || k.bucketTs < startTs || k.bucketTs > endTs {
+			return true
+		}
+		if params.Group != "" && k.group != params.Group {
+			return true
+		}
+		mergeCounters(merged, k, value.(*atomicBucket).snapshot())
+		return true
+	})
+
+	return buildQueryResult(params.Model, merged), nil
+}
+
+func bucketStart(ts int64) int64 {
+	bucketSeconds := perf_metrics_setting.GetBucketSeconds()
+	if bucketSeconds <= 0 {
+		bucketSeconds = 3600
+	}
+	return ts - (ts % bucketSeconds)
+}
+
+func mergeCounters(merged map[bucketKey]counters, key bucketKey, value counters) {
+	if value.requestCount == 0 {
+		return
+	}
+	current := merged[key]
+	current.requestCount += value.requestCount
+	current.successCount += value.successCount
+	current.totalLatencyMs += value.totalLatencyMs
+	current.ttftSumMs += value.ttftSumMs
+	current.ttftCount += value.ttftCount
+	merged[key] = current
+}
+
+func buildQueryResult(modelName string, merged map[bucketKey]counters) QueryResult {
+	groupBuckets := map[string]map[int64]counters{}
+	for key, value := range merged {
+		if value.requestCount == 0 {
+			continue
+		}
+		if _, ok := groupBuckets[key.group]; !ok {
+			groupBuckets[key.group] = map[int64]counters{}
+		}
+		groupBuckets[key.group][key.bucketTs] = value
+	}
+
+	groups := make([]string, 0, len(groupBuckets))
+	for group := range groupBuckets {
+		groups = append(groups, group)
+	}
+	sort.Strings(groups)
+
+	results := make([]GroupResult, 0, len(groups))
+	for _, group := range groups {
+		buckets := groupBuckets[group]
+		timestamps := make([]int64, 0, len(buckets))
+		for ts := range buckets {
+			timestamps = append(timestamps, ts)
+		}
+		sort.Slice(timestamps, func(i, j int) bool {
+			return timestamps[i] < timestamps[j]
+		})
+
+		total := counters{}
+		series := make([]BucketPoint, 0, len(timestamps))
+		for _, ts := range timestamps {
+			value := buckets[ts]
+			total.requestCount += value.requestCount
+			total.successCount += value.successCount
+			total.totalLatencyMs += value.totalLatencyMs
+			total.ttftSumMs += value.ttftSumMs
+			total.ttftCount += value.ttftCount
+			series = append(series, bucketPoint(ts, value))
+		}
+
+		results = append(results, GroupResult{
+			Group:        group,
+			AvgTtftMs:    avg(total.ttftSumMs, total.ttftCount),
+			AvgLatencyMs: avg(total.totalLatencyMs, total.requestCount),
+			SuccessRate:  successRate(total),
+			RequestCount: total.requestCount,
+			SuccessCount: total.successCount,
+			TtftCount:    total.ttftCount,
+			Series:       series,
+		})
+	}
+
+	return QueryResult{
+		ModelName:    modelName,
+		SeriesSchema: seriesSchema,
+		Groups:       results,
+	}
+}
+
+func bucketPoint(ts int64, value counters) BucketPoint {
+	return BucketPoint{
+		Ts:           ts,
+		AvgTtftMs:    avg(value.ttftSumMs, value.ttftCount),
+		AvgLatencyMs: avg(value.totalLatencyMs, value.requestCount),
+		SuccessRate:  successRate(value),
+		Count:        value.requestCount,
+		SuccessCount: value.successCount,
+		TtftCount:    value.ttftCount,
+	}
+}
+
+func avg(sum int64, count int64) int64 {
+	if count <= 0 {
+		return 0
+	}
+	return sum / count
+}
+
+func successRate(value counters) float64 {
+	if value.requestCount <= 0 {
+		return 0
+	}
+	return float64(value.successCount) / float64(value.requestCount) * 100
+}
+
+func recordRedis(key bucketKey, sample Sample) {
+	if !common.RedisEnabled || common.RDB == nil {
+		return
+	}
+	ctx, cancel := context.WithTimeout(context.Background(), time.Second)
+	defer cancel()
+
+	redisKey := redisBucketKey(key)
+	pipe := common.RDB.TxPipeline()
+	pipe.HIncrBy(ctx, redisKey, "req", 1)
+	if sample.Success {
+		pipe.HIncrBy(ctx, redisKey, "ok", 1)
+	}
+	if sample.LatencyMs > 0 {
+		pipe.HIncrBy(ctx, redisKey, "lat", sample.LatencyMs)
+	}
+	if sample.HasTtft && sample.TtftMs >= 0 {
+		pipe.HIncrBy(ctx, redisKey, "ttft", sample.TtftMs)
+		pipe.HIncrBy(ctx, redisKey, "ttft_n", 1)
+	}
+	pipe.Expire(ctx, redisKey, time.Hour)
+	_, _ = pipe.Exec(ctx)
+}
+
+func mergeRedisActiveBuckets(merged map[bucketKey]counters, params QueryParams, startTs int64, endTs int64) {
+	if !common.RedisEnabled || common.RDB == nil || params.Model == "" || params.Group == "" {
+		return
+	}
+	active := bucketStart(time.Now().Unix())
+	if active < startTs || active > endTs {
+		return
+	}
+	key := bucketKey{model: params.Model, group: params.Group, bucketTs: active}
+	ctx, cancel := context.WithTimeout(context.Background(), time.Second)
+	defer cancel()
+	values, err := common.RDB.HGetAll(ctx, redisBucketKey(key)).Result()
+	if err != nil || len(values) == 0 {
+		return
+	}
+	mergeCounters(merged, key, redisCounters(values))
+}
+
+func redisBucketKey(key bucketKey) string {
+	return fmt.Sprintf("perf:%s:%s:%d", key.model, key.group, key.bucketTs)
+}

+ 124 - 0
pkg/perf_metrics/types.go

@@ -0,0 +1,124 @@
+package perfmetrics
+
+import "sync/atomic"
+
+type Store interface {
+	Record(sample Sample)
+	Query(params QueryParams) (QueryResult, error)
+}
+
+type Sample struct {
+	Model     string
+	Group     string
+	LatencyMs int64
+	TtftMs    int64
+	HasTtft   bool
+	Success   bool
+}
+
+type QueryParams struct {
+	Model string
+	Group string
+	Hours int
+}
+
+type BucketPoint struct {
+	Ts           int64   `json:"ts"`
+	AvgTtftMs    int64   `json:"avg_ttft_ms"`
+	AvgLatencyMs int64   `json:"avg_latency_ms"`
+	SuccessRate  float64 `json:"success_rate"`
+	Count        int64   `json:"count"`
+	SuccessCount int64   `json:"success_count"`
+	TtftCount    int64   `json:"ttft_count"`
+}
+
+type GroupResult struct {
+	Group        string        `json:"group"`
+	AvgTtftMs    int64         `json:"avg_ttft_ms"`
+	AvgLatencyMs int64         `json:"avg_latency_ms"`
+	SuccessRate  float64       `json:"success_rate"`
+	RequestCount int64         `json:"request_count"`
+	SuccessCount int64         `json:"success_count"`
+	TtftCount    int64         `json:"ttft_count"`
+	Series       []BucketPoint `json:"series"`
+}
+
+type QueryResult struct {
+	ModelName    string        `json:"model_name"`
+	SeriesSchema string        `json:"series_schema"`
+	Groups       []GroupResult `json:"groups"`
+}
+
+type bucketKey struct {
+	model    string
+	group    string
+	bucketTs int64
+}
+
+type counters struct {
+	requestCount   int64
+	successCount   int64
+	totalLatencyMs int64
+	ttftSumMs      int64
+	ttftCount      int64
+}
+
+type atomicBucket struct {
+	requestCount   atomic.Int64
+	successCount   atomic.Int64
+	totalLatencyMs atomic.Int64
+	ttftSumMs      atomic.Int64
+	ttftCount      atomic.Int64
+}
+
+func (b *atomicBucket) add(sample Sample) {
+	b.requestCount.Add(1)
+	if sample.Success {
+		b.successCount.Add(1)
+	}
+	if sample.LatencyMs > 0 {
+		b.totalLatencyMs.Add(sample.LatencyMs)
+	}
+	if sample.HasTtft && sample.TtftMs >= 0 {
+		b.ttftSumMs.Add(sample.TtftMs)
+		b.ttftCount.Add(1)
+	}
+}
+
+func (b *atomicBucket) snapshot() counters {
+	return counters{
+		requestCount:   b.requestCount.Load(),
+		successCount:   b.successCount.Load(),
+		totalLatencyMs: b.totalLatencyMs.Load(),
+		ttftSumMs:      b.ttftSumMs.Load(),
+		ttftCount:      b.ttftCount.Load(),
+	}
+}
+
+func (b *atomicBucket) drain() counters {
+	return counters{
+		requestCount:   b.requestCount.Swap(0),
+		successCount:   b.successCount.Swap(0),
+		totalLatencyMs: b.totalLatencyMs.Swap(0),
+		ttftSumMs:      b.ttftSumMs.Swap(0),
+		ttftCount:      b.ttftCount.Swap(0),
+	}
+}
+
+func (b *atomicBucket) addCounters(c counters) {
+	if c.requestCount != 0 {
+		b.requestCount.Add(c.requestCount)
+	}
+	if c.successCount != 0 {
+		b.successCount.Add(c.successCount)
+	}
+	if c.totalLatencyMs != 0 {
+		b.totalLatencyMs.Add(c.totalLatencyMs)
+	}
+	if c.ttftSumMs != 0 {
+		b.ttftSumMs.Add(c.ttftSumMs)
+	}
+	if c.ttftCount != 0 {
+		b.ttftCount.Add(c.ttftCount)
+	}
+}

+ 1 - 0
router/api-router.go

@@ -31,6 +31,7 @@ func SetApiRouter(router *gin.Engine) {
 		//apiRouter.GET("/midjourney", controller.GetMidjourney)
 		//apiRouter.GET("/midjourney", controller.GetMidjourney)
 		apiRouter.GET("/home_page_content", controller.GetHomePageContent)
 		apiRouter.GET("/home_page_content", controller.GetHomePageContent)
 		apiRouter.GET("/pricing", middleware.TryUserAuth(), controller.GetPricing)
 		apiRouter.GET("/pricing", middleware.TryUserAuth(), controller.GetPricing)
+		apiRouter.GET("/perf-metrics", middleware.TryUserAuth(), controller.GetPerfMetrics)
 		apiRouter.GET("/verification", middleware.EmailVerificationRateLimit(), middleware.TurnstileCheck(), controller.SendEmailVerification)
 		apiRouter.GET("/verification", middleware.EmailVerificationRateLimit(), middleware.TurnstileCheck(), controller.SendEmailVerification)
 		apiRouter.GET("/reset_password", middleware.CriticalRateLimit(), middleware.TurnstileCheck(), controller.SendPasswordResetEmail)
 		apiRouter.GET("/reset_password", middleware.CriticalRateLimit(), middleware.TurnstileCheck(), controller.SendPasswordResetEmail)
 		apiRouter.POST("/user/reset", middleware.CriticalRateLimit(), controller.ResetPassword)
 		apiRouter.POST("/user/reset", middleware.CriticalRateLimit(), controller.ResetPassword)

+ 6 - 2
service/quota.go

@@ -14,6 +14,7 @@ import (
 	"github.com/QuantumNous/new-api/logger"
 	"github.com/QuantumNous/new-api/logger"
 	"github.com/QuantumNous/new-api/model"
 	"github.com/QuantumNous/new-api/model"
 	"github.com/QuantumNous/new-api/pkg/billingexpr"
 	"github.com/QuantumNous/new-api/pkg/billingexpr"
+	perfmetrics "github.com/QuantumNous/new-api/pkg/perf_metrics"
 	relaycommon "github.com/QuantumNous/new-api/relay/common"
 	relaycommon "github.com/QuantumNous/new-api/relay/common"
 	"github.com/QuantumNous/new-api/setting/ratio_setting"
 	"github.com/QuantumNous/new-api/setting/ratio_setting"
 	"github.com/QuantumNous/new-api/setting/system_setting"
 	"github.com/QuantumNous/new-api/setting/system_setting"
@@ -219,7 +220,7 @@ func PostWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, mod
 		// in this case, must be some error happened
 		// in this case, must be some error happened
 		// we cannot just return, because we may have to return the pre-consumed quota
 		// we cannot just return, because we may have to return the pre-consumed quota
 		quota = 0
 		quota = 0
-		logContent += fmt.Sprintf("(可能是上游超时)")
+		logContent += "(可能是上游超时)"
 		logger.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
 		logger.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
 			"tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, modelName, relayInfo.FinalPreConsumedQuota))
 			"tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, modelName, relayInfo.FinalPreConsumedQuota))
 	} else {
 	} else {
@@ -340,7 +341,7 @@ func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, u
 		// in this case, must be some error happened
 		// in this case, must be some error happened
 		// we cannot just return, because we may have to return the pre-consumed quota
 		// we cannot just return, because we may have to return the pre-consumed quota
 		quota = 0
 		quota = 0
-		logContent += fmt.Sprintf("(可能是上游超时)")
+		logContent += "(可能是上游超时)"
 		logger.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
 		logger.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
 			"tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, relayInfo.OriginModelName, relayInfo.FinalPreConsumedQuota))
 			"tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, relayInfo.OriginModelName, relayInfo.FinalPreConsumedQuota))
 	} else {
 	} else {
@@ -375,6 +376,9 @@ func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, u
 		Group:            relayInfo.UsingGroup,
 		Group:            relayInfo.UsingGroup,
 		Other:            other,
 		Other:            other,
 	})
 	})
+	gopool.Go(func() {
+		perfmetrics.RecordRelaySample(relayInfo, true)
+	})
 }
 }
 
 
 func PreConsumeTokenQuota(relayInfo *relaycommon.RelayInfo, quota int) error {
 func PreConsumeTokenQuota(relayInfo *relaycommon.RelayInfo, quota int) error {

+ 5 - 0
service/text_quota.go

@@ -11,10 +11,12 @@ import (
 	"github.com/QuantumNous/new-api/logger"
 	"github.com/QuantumNous/new-api/logger"
 	"github.com/QuantumNous/new-api/model"
 	"github.com/QuantumNous/new-api/model"
 	"github.com/QuantumNous/new-api/pkg/billingexpr"
 	"github.com/QuantumNous/new-api/pkg/billingexpr"
+	perfmetrics "github.com/QuantumNous/new-api/pkg/perf_metrics"
 	relaycommon "github.com/QuantumNous/new-api/relay/common"
 	relaycommon "github.com/QuantumNous/new-api/relay/common"
 	"github.com/QuantumNous/new-api/setting/operation_setting"
 	"github.com/QuantumNous/new-api/setting/operation_setting"
 	"github.com/QuantumNous/new-api/types"
 	"github.com/QuantumNous/new-api/types"
 
 
+	"github.com/bytedance/gopkg/util/gopool"
 	"github.com/gin-gonic/gin"
 	"github.com/gin-gonic/gin"
 	"github.com/shopspring/decimal"
 	"github.com/shopspring/decimal"
 )
 )
@@ -471,4 +473,7 @@ func PostTextConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, us
 		Group:            relayInfo.UsingGroup,
 		Group:            relayInfo.UsingGroup,
 		Other:            other,
 		Other:            other,
 	})
 	})
+	gopool.Go(func() {
+		perfmetrics.RecordRelaySample(relayInfo, true)
+	})
 }
 }

+ 45 - 0
setting/perf_metrics_setting/config.go

@@ -0,0 +1,45 @@
+package perf_metrics_setting
+
+import "github.com/QuantumNous/new-api/setting/config"
+
+type PerfMetricsSetting struct {
+	Enabled       bool   `json:"enabled"`
+	FlushInterval int    `json:"flush_interval"`
+	BucketTime    string `json:"bucket_time"`
+	RetentionDays int    `json:"retention_days"`
+}
+
+var perfMetricsSetting = PerfMetricsSetting{
+	Enabled:       true,
+	FlushInterval: 5,
+	BucketTime:    "hour",
+	RetentionDays: 0,
+}
+
+func init() {
+	config.GlobalConfig.Register("perf_metrics_setting", &perfMetricsSetting)
+}
+
+func GetSetting() PerfMetricsSetting {
+	return perfMetricsSetting
+}
+
+func GetBucketSeconds() int64 {
+	switch perfMetricsSetting.BucketTime {
+	case "minute":
+		return 60
+	case "5min":
+		return 300
+	case "hour":
+		return 3600
+	default:
+		return 3600
+	}
+}
+
+func GetFlushIntervalMinutes() int {
+	if perfMetricsSetting.FlushInterval < 1 {
+		return 1
+	}
+	return perfMetricsSetting.FlushInterval
+}

+ 43 - 0
web/default/src/features/pricing/api.ts

@@ -10,3 +10,46 @@ export async function getPricing(): Promise<PricingData> {
   const res = await api.get('/api/pricing')
   const res = await api.get('/api/pricing')
   return res.data
   return res.data
 }
 }
+
+export type PerformanceSeriesPoint = {
+  ts: number
+  avg_ttft_ms: number
+  avg_latency_ms: number
+  success_rate: number
+  count: number
+  success_count: number
+  ttft_count: number
+}
+
+export type PerformanceGroup = {
+  group: string
+  avg_ttft_ms: number
+  avg_latency_ms: number
+  success_rate: number
+  request_count: number
+  success_count: number
+  ttft_count: number
+  series: PerformanceSeriesPoint[]
+}
+
+export type PerformanceMetricsData = {
+  success: boolean
+  message?: string
+  data: {
+    model_name: string
+    series_schema?: string
+    groups: PerformanceGroup[]
+  }
+}
+
+export async function getPerfMetrics(
+  modelName: string,
+  hours = 24
+): Promise<PerformanceMetricsData> {
+  const params = new URLSearchParams({
+    model: modelName,
+    hours: String(hours),
+  })
+  const res = await api.get(`/api/perf-metrics?${params.toString()}`)
+  return res.data
+}

+ 7 - 0
web/default/src/features/pricing/components/model-details-charts.tsx

@@ -14,6 +14,13 @@ function formatHourLabel(iso: string): string {
 
 
 function formatDayLabel(date: string): string {
 function formatDayLabel(date: string): string {
   const parsed = new Date(date)
   const parsed = new Date(date)
+  if (date.includes('T')) {
+    return parsed.toLocaleString(undefined, {
+      month: 'short',
+      day: 'numeric',
+      hour: '2-digit',
+    })
+  }
   return parsed.toLocaleDateString(undefined, {
   return parsed.toLocaleDateString(undefined, {
     month: 'short',
     month: 'short',
     day: 'numeric',
     day: 'numeric',

+ 133 - 110
web/default/src/features/pricing/components/model-details-performance.tsx

@@ -1,8 +1,8 @@
 import { useMemo } from 'react'
 import { useMemo } from 'react'
+import { useQuery } from '@tanstack/react-query'
 import {
 import {
   Activity,
   Activity,
   AlertTriangle,
   AlertTriangle,
-  Gauge,
   HeartPulse,
   HeartPulse,
   Timer,
   Timer,
   TrendingUp,
   TrendingUp,
@@ -18,22 +18,14 @@ import {
   TableRow,
   TableRow,
 } from '@/components/ui/table'
 } from '@/components/ui/table'
 import { GroupBadge } from '@/components/group-badge'
 import { GroupBadge } from '@/components/group-badge'
+import { getPerfMetrics, type PerformanceGroup } from '../api'
 import {
 import {
-  aggregateUptime,
-  buildGroupPerformance,
-  buildLatencyTimeSeries,
-  buildUptimeSeries,
   formatLatency,
   formatLatency,
-  formatThroughput,
   formatUptimePct,
   formatUptimePct,
   type UptimeDayPoint,
   type UptimeDayPoint,
 } from '../lib/mock-stats'
 } from '../lib/mock-stats'
 import type { PricingModel } from '../types'
 import type { PricingModel } from '../types'
-import {
-  LatencyTrendChart,
-  ThroughputBarChart,
-  UptimeBarChart,
-} from './model-details-charts'
+import { LatencyTrendChart, UptimeBarChart } from './model-details-charts'
 import { UptimeSparkline } from './model-details-uptime-sparkline'
 import { UptimeSparkline } from './model-details-uptime-sparkline'
 
 
 const COMPACT_NUMBER = new Intl.NumberFormat(undefined, {
 const COMPACT_NUMBER = new Intl.NumberFormat(undefined, {
@@ -74,33 +66,102 @@ function StatCard(props: {
   )
   )
 }
 }
 
 
+type PerformanceRow = {
+  group: string
+  avg_ttft_ms: number
+  avg_latency_ms: number
+  success_rate: number
+  request_count: number
+}
+
+function toLatencySeries(groups: PerformanceGroup[]) {
+  return groups.flatMap((group) =>
+    group.series
+      .filter((point) => point.ttft_count > 0 && point.avg_ttft_ms > 0)
+      .map((point) => ({
+        timestamp: new Date(point.ts * 1000).toISOString(),
+        group: group.group,
+        ttft_ms: point.avg_ttft_ms,
+      }))
+  )
+}
+
+function toUptimeSeries(groups: PerformanceGroup[]): UptimeDayPoint[] {
+  const byTs = new Map<number, { count: number; success: number }>()
+  for (const group of groups) {
+    for (const point of group.series) {
+      const current = byTs.get(point.ts) ?? { count: 0, success: 0 }
+      current.count += point.count
+      current.success += point.success_count
+      byTs.set(point.ts, current)
+    }
+  }
+  return Array.from(byTs.entries())
+    .sort(([a], [b]) => a - b)
+    .map(([ts, value]) => {
+      const uptime = value.count > 0 ? (value.success / value.count) * 100 : 0
+      return {
+        date: new Date(ts * 1000).toISOString(),
+        uptime_pct: Math.round(uptime * 100) / 100,
+        incidents: value.success < value.count ? 1 : 0,
+        outage_minutes: 0,
+      }
+    })
+}
+
+function toGroupUptimeSeries(group: PerformanceGroup): UptimeDayPoint[] {
+  return group.series.map((point) => ({
+    date: new Date(point.ts * 1000).toISOString(),
+    uptime_pct: Math.round(point.success_rate * 100) / 100,
+    incidents: point.success_count < point.count ? 1 : 0,
+    outage_minutes: 0,
+  }))
+}
+
+function weightedAverage(
+  rows: PerformanceRow[],
+  field: 'avg_ttft_ms' | 'avg_latency_ms'
+): number {
+  let total = 0
+  let count = 0
+  for (const row of rows) {
+    if (row[field] <= 0 || row.request_count <= 0) continue
+    total += row[field] * row.request_count
+    count += row.request_count
+  }
+  return count > 0 ? Math.round(total / count) : 0
+}
+
 export function ModelDetailsPerformance(props: { model: PricingModel }) {
 export function ModelDetailsPerformance(props: { model: PricingModel }) {
   const { t } = useTranslation()
   const { t } = useTranslation()
-  const performances = useMemo(
-    () => buildGroupPerformance(props.model),
-    [props.model]
-  )
-  const latencySeries = useMemo(
-    () => buildLatencyTimeSeries(props.model),
-    [props.model]
-  )
-  const uptimeSeries = useMemo(
-    () => buildUptimeSeries(props.model),
-    [props.model]
-  )
-  const aggregated = useMemo(
-    () => aggregateUptime(uptimeSeries),
-    [uptimeSeries]
+  const metricsQuery = useQuery({
+    queryKey: ['perf-metrics', props.model.model_name],
+    queryFn: () => getPerfMetrics(props.model.model_name, 24),
+    staleTime: 60 * 1000,
+  })
+  const groups = metricsQuery.data?.data.groups ?? []
+  const performances = useMemo<PerformanceRow[]>(
+    () =>
+      groups.map((group) => ({
+        group: group.group,
+        avg_ttft_ms: group.avg_ttft_ms,
+        avg_latency_ms: group.avg_latency_ms,
+        success_rate: group.success_rate,
+        request_count: group.request_count,
+      })),
+    [groups]
   )
   )
+  const latencySeries = useMemo(() => toLatencySeries(groups), [groups])
+  const uptimeSeries = useMemo(() => toUptimeSeries(groups), [groups])
   const uptimeByGroup = useMemo<Record<string, UptimeDayPoint[]>>(() => {
   const uptimeByGroup = useMemo<Record<string, UptimeDayPoint[]>>(() => {
     const map: Record<string, UptimeDayPoint[]> = {}
     const map: Record<string, UptimeDayPoint[]> = {}
-    for (const perf of performances) {
-      map[perf.group] = buildUptimeSeries(props.model, perf.group)
+    for (const group of groups) {
+      map[group.group] = toGroupUptimeSeries(group)
     }
     }
     return map
     return map
-  }, [performances, props.model])
+  }, [groups])
 
 
-  if (performances.length === 0) {
+  if (metricsQuery.isLoading || performances.length === 0) {
     return (
     return (
       <div className='text-muted-foreground rounded-lg border p-6 text-center text-sm'>
       <div className='text-muted-foreground rounded-lg border p-6 text-center text-sm'>
         {t('Performance data is not yet available for this model.')}
         {t('Performance data is not yet available for this model.')}
@@ -108,18 +169,22 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
     )
     )
   }
   }
 
 
-  const bestTtft = Math.min(...performances.map((p) => p.ttft_p50_ms))
-  const bestThroughput = Math.max(...performances.map((p) => p.throughput_tps))
-  const totalRequests = performances.reduce(
-    (s, p) => s + p.request_volume_24h,
-    0
-  )
-  const intent =
-    aggregated.uptime_pct >= 99.9
-      ? 'success'
-      : aggregated.uptime_pct >= 99
-        ? 'default'
-        : 'warning'
+  const ttftValues = performances
+    .map((p) => p.avg_ttft_ms)
+    .filter((value) => value > 0)
+  const bestTtft = ttftValues.length > 0 ? Math.min(...ttftValues) : 0
+  const avgLatency = weightedAverage(performances, 'avg_latency_ms')
+  const totalRequests = performances.reduce((s, p) => s + p.request_count, 0)
+  const totalSuccess = groups.reduce((s, p) => s + p.success_count, 0)
+  const successRate =
+    totalRequests > 0 ? (totalSuccess / totalRequests) * 100 : 0
+  const incidentCount = uptimeSeries.reduce((s, p) => s + p.incidents, 0)
+  let intent: 'default' | 'warning' | 'success' = 'warning'
+  if (successRate >= 99.9) {
+    intent = 'success'
+  } else if (successRate >= 99) {
+    intent = 'default'
+  }
 
 
   const headerCellClass =
   const headerCellClass =
     'text-muted-foreground py-2 text-[10px] font-medium tracking-wider uppercase'
     'text-muted-foreground py-2 text-[10px] font-medium tracking-wider uppercase'
@@ -134,21 +199,21 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
           hint={t('Lowest median first-token latency')}
           hint={t('Lowest median first-token latency')}
         />
         />
         <StatCard
         <StatCard
-          icon={Gauge}
-          label={t('Peak throughput')}
-          value={formatThroughput(bestThroughput)}
+          icon={Timer}
+          label={t('Average latency')}
+          value={formatLatency(avgLatency)}
           hint={t('Across all groups')}
           hint={t('Across all groups')}
         />
         />
         <StatCard
         <StatCard
           icon={HeartPulse}
           icon={HeartPulse}
-          label={t('Uptime (30d)')}
-          value={formatUptimePct(aggregated.uptime_pct)}
+          label={t('Success rate')}
+          value={formatUptimePct(successRate)}
           hint={
           hint={
-            aggregated.incidents > 0
-              ? t('{{count}} incidents in the last 30 days', {
-                  count: aggregated.incidents,
+            incidentCount > 0
+              ? t('{{count}} incidents in the last 24 hours', {
+                  count: incidentCount,
                 })
                 })
-              : t('No incidents in the last 30 days')
+              : t('No incidents in the last 24 hours')
           }
           }
           intent={intent}
           intent={intent}
         />
         />
@@ -164,9 +229,7 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
         <SectionHeader
         <SectionHeader
           icon={Activity}
           icon={Activity}
           title={t('Per-group performance')}
           title={t('Per-group performance')}
-          description={t(
-            'TTFT percentiles, throughput, and 30-day uptime by group'
-          )}
+          description={t('Average latency, TTFT, and success rate by group')}
         />
         />
         <div className='overflow-x-auto rounded-lg border'>
         <div className='overflow-x-auto rounded-lg border'>
           <Table className='text-sm'>
           <Table className='text-sm'>
@@ -174,31 +237,24 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
               <TableRow className='hover:bg-transparent'>
               <TableRow className='hover:bg-transparent'>
                 <TableHead className={headerCellClass}>{t('Group')}</TableHead>
                 <TableHead className={headerCellClass}>{t('Group')}</TableHead>
                 <TableHead className={`${headerCellClass} text-right`}>
                 <TableHead className={`${headerCellClass} text-right`}>
-                  {t('TTFT P50')}
-                </TableHead>
-                <TableHead className={`${headerCellClass} text-right`}>
-                  {t('TTFT P95')}
-                </TableHead>
-                <TableHead className={`${headerCellClass} text-right`}>
-                  {t('TTFT P99')}
+                  {t('Average TTFT')}
                 </TableHead>
                 </TableHead>
                 <TableHead className={`${headerCellClass} text-right`}>
                 <TableHead className={`${headerCellClass} text-right`}>
-                  {t('Throughput')}
+                  {t('Average latency')}
                 </TableHead>
                 </TableHead>
                 <TableHead
                 <TableHead
                   className={`${headerCellClass} min-w-[160px] text-left`}
                   className={`${headerCellClass} min-w-[160px] text-left`}
                 >
                 >
-                  {t('Uptime (30d)')}
+                  {t('Success rate')}
                 </TableHead>
                 </TableHead>
                 <TableHead className={`${headerCellClass} text-right`}>
                 <TableHead className={`${headerCellClass} text-right`}>
-                  {t('Requests / 24h')}
+                  {t('Request Count')}
                 </TableHead>
                 </TableHead>
               </TableRow>
               </TableRow>
             </TableHeader>
             </TableHeader>
             <TableBody>
             <TableBody>
               {performances.map((perf) => {
               {performances.map((perf) => {
-                const isBestTtft = perf.ttft_p50_ms === bestTtft
-                const isBestTput = perf.throughput_tps === bestThroughput
+                const isBestTtft = perf.avg_ttft_ms === bestTtft
                 return (
                 return (
                   <TableRow key={perf.group}>
                   <TableRow key={perf.group}>
                     <TableCell className='py-2.5'>
                     <TableCell className='py-2.5'>
@@ -210,23 +266,10 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
                         isBestTtft && 'text-emerald-600 dark:text-emerald-400'
                         isBestTtft && 'text-emerald-600 dark:text-emerald-400'
                       )}
                       )}
                     >
                     >
-                      {formatLatency(perf.ttft_p50_ms)}
-                    </TableCell>
-                    <TableCell className='text-muted-foreground py-2.5 text-right font-mono'>
-                      {formatLatency(perf.ttft_p95_ms)}
+                      {formatLatency(perf.avg_ttft_ms)}
                     </TableCell>
                     </TableCell>
                     <TableCell className='text-muted-foreground py-2.5 text-right font-mono'>
                     <TableCell className='text-muted-foreground py-2.5 text-right font-mono'>
-                      {formatLatency(perf.ttft_p99_ms)}
-                    </TableCell>
-                    <TableCell
-                      className={cn(
-                        'py-2.5 text-right font-mono',
-                        isBestTput &&
-                          perf.throughput_tps > 0 &&
-                          'text-emerald-600 dark:text-emerald-400'
-                      )}
-                    >
-                      {formatThroughput(perf.throughput_tps)}
+                      {formatLatency(perf.avg_latency_ms)}
                     </TableCell>
                     </TableCell>
                     <TableCell className='py-2.5'>
                     <TableCell className='py-2.5'>
                       <UptimeSparkline
                       <UptimeSparkline
@@ -235,7 +278,7 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
                       />
                       />
                     </TableCell>
                     </TableCell>
                     <TableCell className='text-muted-foreground py-2.5 text-right font-mono'>
                     <TableCell className='text-muted-foreground py-2.5 text-right font-mono'>
-                      {COMPACT_NUMBER.format(perf.request_volume_24h)}
+                      {COMPACT_NUMBER.format(perf.request_count)}
                     </TableCell>
                     </TableCell>
                   </TableRow>
                   </TableRow>
                 )
                 )
@@ -249,45 +292,31 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
         <SectionHeader
         <SectionHeader
           icon={Timer}
           icon={Timer}
           title={t('Latency trend (last 24h)')}
           title={t('Latency trend (last 24h)')}
-          description={t(
-            'Median time-to-first-token (TTFT) sampled hourly per group'
-          )}
+          description={t('Average time-to-first-token (TTFT) by group')}
         />
         />
         <LatencyTrendChart series={latencySeries} />
         <LatencyTrendChart series={latencySeries} />
       </section>
       </section>
 
 
-      {bestThroughput > 0 && (
-        <section>
-          <SectionHeader
-            icon={Gauge}
-            title={t('Throughput by group')}
-            description={t('Average tokens per second sustained per group')}
-          />
-          <ThroughputBarChart rows={performances} />
-        </section>
-      )}
-
       <section>
       <section>
         <SectionHeader
         <SectionHeader
           icon={HeartPulse}
           icon={HeartPulse}
-          title={t('Uptime (last 30 days)')}
+          title={t('Availability (last 24h)')}
           description={
           description={
-            aggregated.incidents > 0
+            incidentCount > 0
               ? t(
               ? t(
-                  'Daily uptime; {{incidents}} incidents totalling {{minutes}} minutes',
+                  'Request success rate; {{incidents}} incident buckets in the last 24 hours',
                   {
                   {
-                    incidents: aggregated.incidents,
-                    minutes: aggregated.outage_minutes,
+                    incidents: incidentCount,
                   }
                   }
                 )
                 )
-              : t('Daily uptime over the last 30 days')
+              : t('Request success rate sampled over the last 24 hours')
           }
           }
           accent={
           accent={
-            aggregated.incidents > 0 ? (
+            incidentCount > 0 ? (
               <span className='inline-flex items-center gap-1 text-amber-600 dark:text-amber-400'>
               <span className='inline-flex items-center gap-1 text-amber-600 dark:text-amber-400'>
                 <AlertTriangle className='size-3.5' />
                 <AlertTriangle className='size-3.5' />
                 {t('{{count}} incidents', {
                 {t('{{count}} incidents', {
-                  count: aggregated.incidents,
+                  count: incidentCount,
                 })}
                 })}
               </span>
               </span>
             ) : null
             ) : null
@@ -295,12 +324,6 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
         />
         />
         <UptimeBarChart series={uptimeSeries} />
         <UptimeBarChart series={uptimeSeries} />
       </section>
       </section>
-
-      <p className='text-muted-foreground/60 text-[11px] leading-relaxed'>
-        {t(
-          'Performance metrics shown here are simulated for preview purposes and will be replaced with live observability data once the backend integration is complete.'
-        )}
-      </p>
     </div>
     </div>
   )
   )
 }
 }

+ 0 - 8
web/default/src/features/pricing/components/model-details.tsx

@@ -41,7 +41,6 @@ import {
   isDynamicPricingModel,
   isDynamicPricingModel,
 } from '../lib/dynamic-price'
 } from '../lib/dynamic-price'
 import { parseTags } from '../lib/filters'
 import { parseTags } from '../lib/filters'
-import { buildUptimeSeries } from '../lib/mock-stats'
 import {
 import {
   getAvailableGroups,
   getAvailableGroups,
   isTokenBasedModel,
   isTokenBasedModel,
@@ -57,7 +56,6 @@ import { ModelDetailsCapabilities } from './model-details-capabilities'
 import { ModalitiesMatrix } from './model-details-modalities'
 import { ModalitiesMatrix } from './model-details-modalities'
 import { ModelDetailsPerformance } from './model-details-performance'
 import { ModelDetailsPerformance } from './model-details-performance'
 import { ModelDetailsQuickStats } from './model-details-quick-stats'
 import { ModelDetailsQuickStats } from './model-details-quick-stats'
-import { UptimeStatusRow } from './model-details-uptime-sparkline'
 
 
 // ----------------------------------------------------------------------------
 // ----------------------------------------------------------------------------
 // Local UI helpers
 // Local UI helpers
@@ -782,10 +780,6 @@ export function ModelDetailsContent(props: ModelDetailsContentProps) {
   const { t } = useTranslation()
   const { t } = useTranslation()
   const showRechargePrice = props.showRechargePrice ?? false
   const showRechargePrice = props.showRechargePrice ?? false
   const metadata = useMemo(() => inferModelMetadata(props.model), [props.model])
   const metadata = useMemo(() => inferModelMetadata(props.model), [props.model])
-  const uptimeSeries = useMemo(
-    () => buildUptimeSeries(props.model),
-    [props.model]
-  )
 
 
   const isDynamic =
   const isDynamic =
     props.model.billing_mode === 'tiered_expr' &&
     props.model.billing_mode === 'tiered_expr' &&
@@ -797,8 +791,6 @@ export function ModelDetailsContent(props: ModelDetailsContentProps) {
 
 
       <ModelDetailsQuickStats metadata={metadata} />
       <ModelDetailsQuickStats metadata={metadata} />
 
 
-      <UptimeStatusRow series={uptimeSeries} />
-
       <Tabs defaultValue='overview' className='gap-4'>
       <Tabs defaultValue='overview' className='gap-4'>
         <TabsList className='bg-muted/60 h-auto w-full justify-start gap-1 overflow-x-auto rounded-lg p-1'>
         <TabsList className='bg-muted/60 h-auto w-full justify-start gap-1 overflow-x-auto rounded-lg p-1'>
           {TAB_VALUES.map((value) => {
           {TAB_VALUES.map((value) => {

+ 4 - 0
web/default/src/features/system-settings/maintenance/config.ts

@@ -75,6 +75,10 @@ export const DEFAULT_MAINTENANCE_SETTINGS: MaintenanceSettings = {
   'performance_setting.monitor_cpu_threshold': 90,
   'performance_setting.monitor_cpu_threshold': 90,
   'performance_setting.monitor_memory_threshold': 90,
   'performance_setting.monitor_memory_threshold': 90,
   'performance_setting.monitor_disk_threshold': 95,
   'performance_setting.monitor_disk_threshold': 95,
+  'perf_metrics_setting.enabled': true,
+  'perf_metrics_setting.flush_interval': 5,
+  'perf_metrics_setting.bucket_time': 'hour',
+  'perf_metrics_setting.retention_days': 0,
 }
 }
 
 
 const toBoolean = (value: unknown, fallback: boolean): boolean => {
 const toBoolean = (value: unknown, fallback: boolean): boolean => {

+ 96 - 0
web/default/src/features/system-settings/maintenance/performance-section.tsx

@@ -59,6 +59,10 @@ const perfSchema = z.object({
     .number()
     .number()
     .min(0)
     .min(0)
     .max(100),
     .max(100),
+  'perf_metrics_setting.enabled': z.boolean(),
+  'perf_metrics_setting.flush_interval': z.coerce.number().min(1),
+  'perf_metrics_setting.bucket_time': z.enum(['minute', '5min', 'hour']),
+  'perf_metrics_setting.retention_days': z.coerce.number().min(0),
 })
 })
 
 
 type PerfFormValues = z.infer<typeof perfSchema>
 type PerfFormValues = z.infer<typeof perfSchema>
@@ -248,6 +252,7 @@ export function PerformanceSection(props: Props) {
 
 
   const diskEnabled = form.watch('performance_setting.disk_cache_enabled')
   const diskEnabled = form.watch('performance_setting.disk_cache_enabled')
   const monitorEnabled = form.watch('performance_setting.monitor_enabled')
   const monitorEnabled = form.watch('performance_setting.monitor_enabled')
+  const perfMetricsEnabled = form.watch('perf_metrics_setting.enabled')
   const maxCacheSizeMb = form.watch(
   const maxCacheSizeMb = form.watch(
     'performance_setting.disk_cache_max_size_mb'
     'performance_setting.disk_cache_max_size_mb'
   )
   )
@@ -452,6 +457,97 @@ export function PerformanceSection(props: Props) {
             />
             />
           </div>
           </div>
 
 
+          <Separator />
+
+          <div>
+            <h4 className='font-medium'>{t('Model performance metrics')}</h4>
+            <p className='text-muted-foreground mt-1 text-xs'>
+              {t(
+                'Collect relay latency and success-rate metrics for the model square.'
+              )}
+            </p>
+          </div>
+
+          <div className='grid grid-cols-1 gap-4 md:grid-cols-4'>
+            <FormField
+              control={form.control}
+              name='perf_metrics_setting.enabled'
+              render={({ field }) => (
+                <FormItem className='flex items-center gap-2'>
+                  <FormControl>
+                    <Switch
+                      checked={field.value}
+                      onCheckedChange={field.onChange}
+                    />
+                  </FormControl>
+                  <FormLabel>{t('Enable model performance metrics')}</FormLabel>
+                </FormItem>
+              )}
+            />
+            <FormField
+              control={form.control}
+              name='perf_metrics_setting.flush_interval'
+              render={({ field }) => (
+                <FormItem>
+                  <FormLabel>{t('Flush interval (minutes)')}</FormLabel>
+                  <FormControl>
+                    <Input
+                      type='number'
+                      min={1}
+                      {...field}
+                      disabled={!perfMetricsEnabled}
+                    />
+                  </FormControl>
+                </FormItem>
+              )}
+            />
+            <FormField
+              control={form.control}
+              name='perf_metrics_setting.bucket_time'
+              render={({ field }) => (
+                <FormItem>
+                  <FormLabel>{t('Aggregation bucket')}</FormLabel>
+                  <Select
+                    value={field.value}
+                    onValueChange={field.onChange}
+                    disabled={!perfMetricsEnabled}
+                  >
+                    <FormControl>
+                      <SelectTrigger>
+                        <SelectValue />
+                      </SelectTrigger>
+                    </FormControl>
+                    <SelectContent>
+                      <SelectItem value='minute'>{t('1 minute')}</SelectItem>
+                      <SelectItem value='5min'>{t('5 minutes')}</SelectItem>
+                      <SelectItem value='hour'>{t('1 hour')}</SelectItem>
+                    </SelectContent>
+                  </Select>
+                </FormItem>
+              )}
+            />
+            <FormField
+              control={form.control}
+              name='perf_metrics_setting.retention_days'
+              render={({ field }) => (
+                <FormItem>
+                  <FormLabel>{t('Retention days')}</FormLabel>
+                  <FormControl>
+                    <Input
+                      type='number'
+                      min={0}
+                      {...field}
+                      disabled={!perfMetricsEnabled}
+                    />
+                  </FormControl>
+                  <FormDescription>
+                    {t('0 means data is kept permanently')}
+                  </FormDescription>
+                </FormItem>
+              )}
+            />
+          </div>
+
           <Button type='submit' disabled={updateOption.isPending}>
           <Button type='submit' disabled={updateOption.isPending}>
             {updateOption.isPending ? t('Saving...') : t('Save Changes')}
             {updateOption.isPending ? t('Saving...') : t('Save Changes')}
           </Button>
           </Button>

+ 8 - 0
web/default/src/features/system-settings/maintenance/section-registry.tsx

@@ -102,6 +102,14 @@ const MAINTENANCE_SECTIONS = [
             settings['performance_setting.monitor_memory_threshold'] ?? 90,
             settings['performance_setting.monitor_memory_threshold'] ?? 90,
           'performance_setting.monitor_disk_threshold':
           'performance_setting.monitor_disk_threshold':
             settings['performance_setting.monitor_disk_threshold'] ?? 95,
             settings['performance_setting.monitor_disk_threshold'] ?? 95,
+          'perf_metrics_setting.enabled':
+            settings['perf_metrics_setting.enabled'] ?? true,
+          'perf_metrics_setting.flush_interval':
+            settings['perf_metrics_setting.flush_interval'] ?? 5,
+          'perf_metrics_setting.bucket_time':
+            settings['perf_metrics_setting.bucket_time'] ?? 'hour',
+          'perf_metrics_setting.retention_days':
+            settings['perf_metrics_setting.retention_days'] ?? 0,
         }}
         }}
       />
       />
     ),
     ),

+ 4 - 0
web/default/src/features/system-settings/types.ts

@@ -254,6 +254,10 @@ export type MaintenanceSettings = {
   'performance_setting.monitor_cpu_threshold': number
   'performance_setting.monitor_cpu_threshold': number
   'performance_setting.monitor_memory_threshold': number
   'performance_setting.monitor_memory_threshold': number
   'performance_setting.monitor_disk_threshold': number
   'performance_setting.monitor_disk_threshold': number
+  'perf_metrics_setting.enabled': boolean
+  'perf_metrics_setting.flush_interval': number
+  'perf_metrics_setting.bucket_time': 'hour' | 'minute' | '5min'
+  'perf_metrics_setting.retention_days': number
 }
 }
 
 
 export type RequestLimitsSettings = {
 export type RequestLimitsSettings = {

+ 20 - 0
web/default/src/i18n/locales/en.json

@@ -29,6 +29,7 @@
     "{{count}} disabled channel(s) deleted": "{{count}} disabled channel(s) deleted",
     "{{count}} disabled channel(s) deleted": "{{count}} disabled channel(s) deleted",
     "{{count}} hours ago": "{{count}} hours ago",
     "{{count}} hours ago": "{{count}} hours ago",
     "{{count}} incidents": "{{count}} incidents",
     "{{count}} incidents": "{{count}} incidents",
+    "{{count}} incidents in the last 24 hours": "{{count}} incidents in the last 24 hours",
     "{{count}} incidents in the last 30 days": "{{count}} incidents in the last 30 days",
     "{{count}} incidents in the last 30 days": "{{count}} incidents in the last 30 days",
     "{{count}} IP(s)": "{{count}} IP(s)",
     "{{count}} IP(s)": "{{count}} IP(s)",
     "{{count}} log entries removed.": "{{count}} log entries removed.",
     "{{count}} log entries removed.": "{{count}} log entries removed.",
@@ -59,11 +60,14 @@
     "© 2025 Your Company. All rights reserved.": "© 2025 Your Company. All rights reserved.",
     "© 2025 Your Company. All rights reserved.": "© 2025 Your Company. All rights reserved.",
     "+{{count}} more": "+{{count}} more",
     "+{{count}} more": "+{{count}} more",
     "| Based on": "| Based on",
     "| Based on": "| Based on",
+    "0 means data is kept permanently": "0 means data is kept permanently",
     "0 means unlimited": "0 means unlimited",
     "0 means unlimited": "0 means unlimited",
     "1 Day": "1 Day",
     "1 Day": "1 Day",
     "1 day ago": "1 day ago",
     "1 day ago": "1 day ago",
+    "1 hour": "1 hour",
     "1 Hour": "1H",
     "1 Hour": "1H",
     "1 hour ago": "1 hour ago",
     "1 hour ago": "1 hour ago",
+    "1 minute": "1 minute",
     "1 minute ago": "1 minute ago",
     "1 minute ago": "1 minute ago",
     "1 Month": "1M",
     "1 Month": "1M",
     "1 month ago": "1 month ago",
     "1 month ago": "1 month ago",
@@ -86,6 +90,7 @@
     "30 Days": "30 Days",
     "30 Days": "30 Days",
     "30 days ago": "30 days ago",
     "30 days ago": "30 days ago",
     "30d change": "30d change",
     "30d change": "30d change",
+    "5 minutes": "5 minutes",
     "5-Hour Window": "5-Hour Window",
     "5-Hour Window": "5-Hour Window",
     "50 / page": "50 / page",
     "50 / page": "50 / page",
     "7 Days": "7 Days",
     "7 Days": "7 Days",
@@ -218,6 +223,7 @@
     "Aggregated traffic by upstream model provider": "Aggregated traffic by upstream model provider",
     "Aggregated traffic by upstream model provider": "Aggregated traffic by upstream model provider",
     "Aggregated usage metrics and trend charts.": "Aggregated usage metrics and trend charts.",
     "Aggregated usage metrics and trend charts.": "Aggregated usage metrics and trend charts.",
     "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.": "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.",
     "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.": "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.",
+    "Aggregation bucket": "Aggregation bucket",
     "AGPL v3.0 License": "AGPL v3.0 License",
     "AGPL v3.0 License": "AGPL v3.0 License",
     "AI model testing environment": "AI model testing environment",
     "AI model testing environment": "AI model testing environment",
     "AI models": "AI models",
     "AI models": "AI models",
@@ -423,13 +429,18 @@
     "Automatically selects the best available group with circuit breaker mechanism": "Automatically selects the best available group with circuit breaker mechanism",
     "Automatically selects the best available group with circuit breaker mechanism": "Automatically selects the best available group with circuit breaker mechanism",
     "Automatically sync model list when upstream changes are detected": "Automatically sync model list when upstream changes are detected",
     "Automatically sync model list when upstream changes are detected": "Automatically sync model list when upstream changes are detected",
     "Automatically test channels and notify users when limits are hit": "Automatically test channels and notify users when limits are hit",
     "Automatically test channels and notify users when limits are hit": "Automatically test channels and notify users when limits are hit",
+    "Availability (last 24h)": "Availability (last 24h)",
     "Available": "Available",
     "Available": "Available",
     "Available disk space": "Available disk space",
     "Available disk space": "Available disk space",
     "Available Models": "Available Models",
     "Available Models": "Available Models",
     "Available Rewards": "Available Rewards",
     "Available Rewards": "Available Rewards",
+    "Average latency": "Average latency",
+    "Average latency, TTFT, and success rate by group": "Average latency, TTFT, and success rate by group",
     "Average RPM": "Average RPM",
     "Average RPM": "Average RPM",
+    "Average time-to-first-token (TTFT) by group": "Average time-to-first-token (TTFT) by group",
     "Average tokens per second sustained per group": "Average tokens per second sustained per group",
     "Average tokens per second sustained per group": "Average tokens per second sustained per group",
     "Average TPM": "Average TPM",
     "Average TPM": "Average TPM",
+    "Average TTFT": "Average TTFT",
     "AWS": "AWS",
     "AWS": "AWS",
     "AWS Bedrock Claude Compat": "AWS Bedrock Claude Compat",
     "AWS Bedrock Claude Compat": "AWS Bedrock Claude Compat",
     "AWS Key Format": "AWS Key Format",
     "AWS Key Format": "AWS Key Format",
@@ -710,6 +721,7 @@
     "Cohere": "Cohere",
     "Cohere": "Cohere",
     "Collapse": "Collapse",
     "Collapse": "Collapse",
     "Collapse All": "Collapse All",
     "Collapse All": "Collapse All",
+    "Collect relay latency and success-rate metrics for the model square.": "Collect relay latency and success-rate metrics for the model square.",
     "Color": "Color",
     "Color": "Color",
     "Color is required": "Color is required",
     "Color is required": "Color is required",
     "Color preset": "Color preset",
     "Color preset": "Color preset",
@@ -1294,6 +1306,7 @@
     "Enable io.net deployments": "Enable io.net deployments",
     "Enable io.net deployments": "Enable io.net deployments",
     "Enable io.net model deployment service in console": "Enable io.net model deployment service in console",
     "Enable io.net model deployment service in console": "Enable io.net model deployment service in console",
     "Enable LinuxDO OAuth": "Enable LinuxDO OAuth",
     "Enable LinuxDO OAuth": "Enable LinuxDO OAuth",
+    "Enable model performance metrics": "Enable model performance metrics",
     "Enable OIDC": "Enable OIDC",
     "Enable OIDC": "Enable OIDC",
     "Enable or disable this channel": "Enable or disable this channel",
     "Enable or disable this channel": "Enable or disable this channel",
     "Enable or disable this model": "Enable or disable this model",
     "Enable or disable this model": "Enable or disable this model",
@@ -1659,6 +1672,7 @@
     "Fixed price (USD)": "Fixed price (USD)",
     "Fixed price (USD)": "Fixed price (USD)",
     "Floating": "Floating",
     "Floating": "Floating",
     "FluentRead extension not detected. Please ensure it is installed and active.": "FluentRead extension not detected. Please ensure it is installed and active.",
     "FluentRead extension not detected. Please ensure it is installed and active.": "FluentRead extension not detected. Please ensure it is installed and active.",
+    "Flush interval (minutes)": "Flush interval (minutes)",
     "Follow the guided steps to prepare your workspace before the first login.": "Follow the guided steps to prepare your workspace before the first login.",
     "Follow the guided steps to prepare your workspace before the first login.": "Follow the guided steps to prepare your workspace before the first login.",
     "Footer": "Footer",
     "Footer": "Footer",
     "Footer text displayed at the bottom of pages": "Footer text displayed at the bottom of pages",
     "Footer text displayed at the bottom of pages": "Footer text displayed at the bottom of pages",
@@ -2221,6 +2235,7 @@
     "Model name is required": "Model name is required",
     "Model name is required": "Model name is required",
     "Model names copied to clipboard": "Model names copied to clipboard",
     "Model names copied to clipboard": "Model names copied to clipboard",
     "Model not found": "Model not found",
     "Model not found": "Model not found",
+    "Model performance metrics": "Model performance metrics",
     "Model Price": "Model Price",
     "Model Price": "Model Price",
     "Model Price Not Configured": "Model Price Not Configured",
     "Model Price Not Configured": "Model Price Not Configured",
     "Model Pricing": "Model Pricing",
     "Model Pricing": "Model Pricing",
@@ -2396,6 +2411,7 @@
     "No groups match your search": "No groups match your search",
     "No groups match your search": "No groups match your search",
     "No header overrides configured.": "No header overrides configured.",
     "No header overrides configured.": "No header overrides configured.",
     "No history data available": "No history data available",
     "No history data available": "No history data available",
+    "No incidents in the last 24 hours": "No incidents in the last 24 hours",
     "No incidents in the last 30 days": "No incidents in the last 30 days",
     "No incidents in the last 30 days": "No incidents in the last 30 days",
     "No Inviter": "No Inviter",
     "No Inviter": "No Inviter",
     "No keys found": "No keys found",
     "No keys found": "No keys found",
@@ -3106,6 +3122,8 @@
     "Request Model:": "Request Model:",
     "Request Model:": "Request Model:",
     "Request overrides, routing behavior, and upstream model automation": "Request overrides, routing behavior, and upstream model automation",
     "Request overrides, routing behavior, and upstream model automation": "Request overrides, routing behavior, and upstream model automation",
     "Request rule pricing": "Request rule pricing",
     "Request rule pricing": "Request rule pricing",
+    "Request success rate sampled over the last 24 hours": "Request success rate sampled over the last 24 hours",
+    "Request success rate; {{incidents}} incident buckets in the last 24 hours": "Request success rate; {{incidents}} incident buckets in the last 24 hours",
     "Request timed out, please refresh and restart GitHub login": "Request timed out, please refresh and restart GitHub login",
     "Request timed out, please refresh and restart GitHub login": "Request timed out, please refresh and restart GitHub login",
     "Request-based": "Request-based",
     "Request-based": "Request-based",
     "Requests (24h)": "Requests (24h)",
     "Requests (24h)": "Requests (24h)",
@@ -3154,6 +3172,7 @@
     "Restrict user model request frequency (may impact high concurrency performance)": "Restrict user model request frequency (may impact high concurrency performance)",
     "Restrict user model request frequency (may impact high concurrency performance)": "Restrict user model request frequency (may impact high concurrency performance)",
     "Retain last N days": "Retain last N days",
     "Retain last N days": "Retain last N days",
     "Retain last N files": "Retain last N files",
     "Retain last N files": "Retain last N files",
+    "Retention days": "Retention days",
     "Retry": "Retry",
     "Retry": "Retry",
     "Retry Chain": "Retry Chain",
     "Retry Chain": "Retry Chain",
     "Retry Suggestion": "Retry Suggestion",
     "Retry Suggestion": "Retry Suggestion",
@@ -3527,6 +3546,7 @@
     "Subscription Plans": "Subscription Plans",
     "Subscription Plans": "Subscription Plans",
     "Subtract": "Subtract",
     "Subtract": "Subtract",
     "Success": "Success",
     "Success": "Success",
+    "Success rate": "Success rate",
     "Successfully created {{count}} API Key(s)": "Successfully created {{count}} API Key(s)",
     "Successfully created {{count}} API Key(s)": "Successfully created {{count}} API Key(s)",
     "Successfully created {{count}} redemption codes": "Successfully created {{count}} redemption codes",
     "Successfully created {{count}} redemption codes": "Successfully created {{count}} redemption codes",
     "Successfully deleted {{count}} API key(s)": "Successfully deleted {{count}} API key(s)",
     "Successfully deleted {{count}} API key(s)": "Successfully deleted {{count}} API key(s)",

+ 20 - 0
web/default/src/i18n/locales/fr.json

@@ -29,6 +29,7 @@
     "{{count}} disabled channel(s) deleted": "{{count}} canal(canaux) désactivé(s) supprimé(s)",
     "{{count}} disabled channel(s) deleted": "{{count}} canal(canaux) désactivé(s) supprimé(s)",
     "{{count}} hours ago": "il y a {{count}} heures",
     "{{count}} hours ago": "il y a {{count}} heures",
     "{{count}} incidents": "{{count}} incidents",
     "{{count}} incidents": "{{count}} incidents",
+    "{{count}} incidents in the last 24 hours": "{{count}} incidents au cours des dernières 24 heures",
     "{{count}} incidents in the last 30 days": "{{count}} incidents au cours des 30 derniers jours",
     "{{count}} incidents in the last 30 days": "{{count}} incidents au cours des 30 derniers jours",
     "{{count}} IP(s)": "{{count}} IP",
     "{{count}} IP(s)": "{{count}} IP",
     "{{count}} log entries removed.": "{{count}} entrées de journal supprimées.",
     "{{count}} log entries removed.": "{{count}} entrées de journal supprimées.",
@@ -59,11 +60,14 @@
     "© 2025 Your Company. All rights reserved.": "© 2025 Votre entreprise. Tous droits réservés.",
     "© 2025 Your Company. All rights reserved.": "© 2025 Votre entreprise. Tous droits réservés.",
     "+{{count}} more": "+{{count}} de plus",
     "+{{count}} more": "+{{count}} de plus",
     "| Based on": "| Basé sur",
     "| Based on": "| Basé sur",
+    "0 means data is kept permanently": "0 signifie que les données sont conservées indéfiniment",
     "0 means unlimited": "0 signifie illimité",
     "0 means unlimited": "0 signifie illimité",
     "1 Day": "1 jour",
     "1 Day": "1 jour",
     "1 day ago": "Il y a 1 jour",
     "1 day ago": "Il y a 1 jour",
+    "1 hour": "1 heure",
     "1 Hour": "1H",
     "1 Hour": "1H",
     "1 hour ago": "Il y a 1 heure",
     "1 hour ago": "Il y a 1 heure",
+    "1 minute": "1 minute",
     "1 minute ago": "Il y a 1 minute",
     "1 minute ago": "Il y a 1 minute",
     "1 Month": "1M",
     "1 Month": "1M",
     "1 month ago": "Il y a 1 mois",
     "1 month ago": "Il y a 1 mois",
@@ -86,6 +90,7 @@
     "30 Days": "30 jours",
     "30 Days": "30 jours",
     "30 days ago": "Il y a 30 jours",
     "30 days ago": "Il y a 30 jours",
     "30d change": "Variation 30 j",
     "30d change": "Variation 30 j",
+    "5 minutes": "5 minutes",
     "5-Hour Window": "Fenêtre de 5 heures",
     "5-Hour Window": "Fenêtre de 5 heures",
     "50 / page": "50 / page",
     "50 / page": "50 / page",
     "7 Days": "7 jours",
     "7 Days": "7 jours",
@@ -218,6 +223,7 @@
     "Aggregated traffic by upstream model provider": "Trafic agrégé par fournisseur de modèle amont",
     "Aggregated traffic by upstream model provider": "Trafic agrégé par fournisseur de modèle amont",
     "Aggregated usage metrics and trend charts.": "Métriques d'utilisation agrégées et graphiques de tendances.",
     "Aggregated usage metrics and trend charts.": "Métriques d'utilisation agrégées et graphiques de tendances.",
     "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.": "agrège plus de 50 fournisseurs IA derrière une API unifiée. Gérez l'accès, suivez les coûts et évoluez sans effort.",
     "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.": "agrège plus de 50 fournisseurs IA derrière une API unifiée. Gérez l'accès, suivez les coûts et évoluez sans effort.",
+    "Aggregation bucket": "Fenêtre d’agrégation",
     "AGPL v3.0 License": "Licence AGPL v3.0",
     "AGPL v3.0 License": "Licence AGPL v3.0",
     "AI model testing environment": "Environnement de test de modèle IA",
     "AI model testing environment": "Environnement de test de modèle IA",
     "AI models": "Modèles d'IA",
     "AI models": "Modèles d'IA",
@@ -423,13 +429,18 @@
     "Automatically selects the best available group with circuit breaker mechanism": "Sélectionne automatiquement le meilleur groupe disponible avec un mécanisme de disjoncteur de circuit",
     "Automatically selects the best available group with circuit breaker mechanism": "Sélectionne automatiquement le meilleur groupe disponible avec un mécanisme de disjoncteur de circuit",
     "Automatically sync model list when upstream changes are detected": "Synchroniser automatiquement la liste des modèles lorsque des changements en amont sont détectés",
     "Automatically sync model list when upstream changes are detected": "Synchroniser automatiquement la liste des modèles lorsque des changements en amont sont détectés",
     "Automatically test channels and notify users when limits are hit": "Tester automatiquement les canaux et notifier les utilisateurs lorsque les limites sont atteintes",
     "Automatically test channels and notify users when limits are hit": "Tester automatiquement les canaux et notifier les utilisateurs lorsque les limites sont atteintes",
+    "Availability (last 24h)": "Disponibilité (dernières 24 h)",
     "Available": "Disponible",
     "Available": "Disponible",
     "Available disk space": "Espace disque disponible",
     "Available disk space": "Espace disque disponible",
     "Available Models": "Modèles disponibles",
     "Available Models": "Modèles disponibles",
     "Available Rewards": "Récompenses disponibles",
     "Available Rewards": "Récompenses disponibles",
+    "Average latency": "Latence moyenne",
+    "Average latency, TTFT, and success rate by group": "Latence moyenne, TTFT et taux de réussite par groupe",
     "Average RPM": "RPM moyen",
     "Average RPM": "RPM moyen",
+    "Average time-to-first-token (TTFT) by group": "Temps moyen jusqu’au premier token (TTFT) par groupe",
     "Average tokens per second sustained per group": "Tokens par seconde soutenus en moyenne par groupe",
     "Average tokens per second sustained per group": "Tokens par seconde soutenus en moyenne par groupe",
     "Average TPM": "TPM moyen",
     "Average TPM": "TPM moyen",
+    "Average TTFT": "TTFT moyen",
     "AWS": "AWS",
     "AWS": "AWS",
     "AWS Bedrock Claude Compat": "AWS Bedrock Claude Compat",
     "AWS Bedrock Claude Compat": "AWS Bedrock Claude Compat",
     "AWS Key Format": "Format de clé AWS",
     "AWS Key Format": "Format de clé AWS",
@@ -710,6 +721,7 @@
     "Cohere": "Cohere",
     "Cohere": "Cohere",
     "Collapse": "Réduire",
     "Collapse": "Réduire",
     "Collapse All": "Tout réduire",
     "Collapse All": "Tout réduire",
+    "Collect relay latency and success-rate metrics for the model square.": "Collecte les métriques de latence Relay et de taux de réussite pour la place des modèles.",
     "Color": "Couleur",
     "Color": "Couleur",
     "Color is required": "La couleur est requise",
     "Color is required": "La couleur est requise",
     "Color preset": "Préréglage de couleur",
     "Color preset": "Préréglage de couleur",
@@ -1294,6 +1306,7 @@
     "Enable io.net deployments": "Activer les déploiements io.net",
     "Enable io.net deployments": "Activer les déploiements io.net",
     "Enable io.net model deployment service in console": "Activer le service de déploiement de modèles io.net dans la console",
     "Enable io.net model deployment service in console": "Activer le service de déploiement de modèles io.net dans la console",
     "Enable LinuxDO OAuth": "Activer LinuxDO OAuth",
     "Enable LinuxDO OAuth": "Activer LinuxDO OAuth",
+    "Enable model performance metrics": "Activer les indicateurs de performance des modèles",
     "Enable OIDC": "Activer OIDC",
     "Enable OIDC": "Activer OIDC",
     "Enable or disable this channel": "Activer ou désactiver ce canal",
     "Enable or disable this channel": "Activer ou désactiver ce canal",
     "Enable or disable this model": "Activer ou désactiver ce modèle",
     "Enable or disable this model": "Activer ou désactiver ce modèle",
@@ -1659,6 +1672,7 @@
     "Fixed price (USD)": "Prix fixe (USD)",
     "Fixed price (USD)": "Prix fixe (USD)",
     "Floating": "Flottant",
     "Floating": "Flottant",
     "FluentRead extension not detected. Please ensure it is installed and active.": "Extension FluentRead non détectée. Veuillez vous assurer qu'elle est installée et activée.",
     "FluentRead extension not detected. Please ensure it is installed and active.": "Extension FluentRead non détectée. Veuillez vous assurer qu'elle est installée et activée.",
+    "Flush interval (minutes)": "Intervalle d’écriture (minutes)",
     "Follow the guided steps to prepare your workspace before the first login.": "Suivez les étapes guidées pour préparer votre espace de travail avant la première connexion.",
     "Follow the guided steps to prepare your workspace before the first login.": "Suivez les étapes guidées pour préparer votre espace de travail avant la première connexion.",
     "Footer": "Pied de page",
     "Footer": "Pied de page",
     "Footer text displayed at the bottom of pages": "Texte de pied de page affiché en bas des pages",
     "Footer text displayed at the bottom of pages": "Texte de pied de page affiché en bas des pages",
@@ -2221,6 +2235,7 @@
     "Model name is required": "Le nom du modèle est requis",
     "Model name is required": "Le nom du modèle est requis",
     "Model names copied to clipboard": "Noms des modèles copiés dans le presse-papiers",
     "Model names copied to clipboard": "Noms des modèles copiés dans le presse-papiers",
     "Model not found": "Modèle introuvable",
     "Model not found": "Modèle introuvable",
+    "Model performance metrics": "Indicateurs de performance des modèles",
     "Model Price": "Prix du modèle",
     "Model Price": "Prix du modèle",
     "Model Price Not Configured": "Prix du modèle non configuré",
     "Model Price Not Configured": "Prix du modèle non configuré",
     "Model Pricing": "Tarification des modèles",
     "Model Pricing": "Tarification des modèles",
@@ -2396,6 +2411,7 @@
     "No groups match your search": "Aucun groupe ne correspond à votre recherche",
     "No groups match your search": "Aucun groupe ne correspond à votre recherche",
     "No header overrides configured.": "Aucune surcharge d'en-têtes configurée.",
     "No header overrides configured.": "Aucune surcharge d'en-têtes configurée.",
     "No history data available": "Aucune donnée historique disponible",
     "No history data available": "Aucune donnée historique disponible",
+    "No incidents in the last 24 hours": "Aucun incident au cours des dernières 24 heures",
     "No incidents in the last 30 days": "Aucun incident sur les 30 derniers jours",
     "No incidents in the last 30 days": "Aucun incident sur les 30 derniers jours",
     "No Inviter": "Pas d'inviteur",
     "No Inviter": "Pas d'inviteur",
     "No keys found": "Aucune clé trouvée",
     "No keys found": "Aucune clé trouvée",
@@ -3106,6 +3122,8 @@
     "Request Model:": "Modèle demandé :",
     "Request Model:": "Modèle demandé :",
     "Request overrides, routing behavior, and upstream model automation": "Surcharges de requête, comportement de routage et automatisation des modèles amont",
     "Request overrides, routing behavior, and upstream model automation": "Surcharges de requête, comportement de routage et automatisation des modèles amont",
     "Request rule pricing": "Règles de tarification de requête",
     "Request rule pricing": "Règles de tarification de requête",
+    "Request success rate sampled over the last 24 hours": "Taux de réussite des requêtes échantillonné sur les dernières 24 heures",
+    "Request success rate; {{incidents}} incident buckets in the last 24 hours": "Taux de réussite des requêtes ; {{incidents}} créneaux avec incident sur les dernières 24 heures",
     "Request timed out, please refresh and restart GitHub login": "Délai dépassé, veuillez actualiser la page puis relancer la connexion GitHub",
     "Request timed out, please refresh and restart GitHub login": "Délai dépassé, veuillez actualiser la page puis relancer la connexion GitHub",
     "Request-based": "Selon la requête",
     "Request-based": "Selon la requête",
     "Requests (24h)": "Requêtes (24 h)",
     "Requests (24h)": "Requêtes (24 h)",
@@ -3154,6 +3172,7 @@
     "Restrict user model request frequency (may impact high concurrency performance)": "Restreindre la fréquence des requêtes du modèle utilisateur (peut impacter les performances en cas de forte concurrence)",
     "Restrict user model request frequency (may impact high concurrency performance)": "Restreindre la fréquence des requêtes du modèle utilisateur (peut impacter les performances en cas de forte concurrence)",
     "Retain last N days": "Conserver les N derniers jours",
     "Retain last N days": "Conserver les N derniers jours",
     "Retain last N files": "Conserver les N derniers fichiers",
     "Retain last N files": "Conserver les N derniers fichiers",
+    "Retention days": "Jours de rétention",
     "Retry": "Réessayer",
     "Retry": "Réessayer",
     "Retry Chain": "Chaîne de tentatives",
     "Retry Chain": "Chaîne de tentatives",
     "Retry Suggestion": "Suggestion de relance",
     "Retry Suggestion": "Suggestion de relance",
@@ -3527,6 +3546,7 @@
     "Subscription Plans": "Plans d'abonnement",
     "Subscription Plans": "Plans d'abonnement",
     "Subtract": "Soustraire",
     "Subtract": "Soustraire",
     "Success": "Succès",
     "Success": "Succès",
+    "Success rate": "Taux de réussite",
     "Successfully created {{count}} API Key(s)": "{{count}} clé(s) API créée(s) avec succès",
     "Successfully created {{count}} API Key(s)": "{{count}} clé(s) API créée(s) avec succès",
     "Successfully created {{count}} redemption codes": "{{count}} codes de réduction créés avec succès",
     "Successfully created {{count}} redemption codes": "{{count}} codes de réduction créés avec succès",
     "Successfully deleted {{count}} API key(s)": "{{count}} clé(s) API supprimée(s) avec succès",
     "Successfully deleted {{count}} API key(s)": "{{count}} clé(s) API supprimée(s) avec succès",

+ 20 - 0
web/default/src/i18n/locales/ja.json

@@ -29,6 +29,7 @@
     "{{count}} disabled channel(s) deleted": "{{count}} 個の無効チャネルを削除しました",
     "{{count}} disabled channel(s) deleted": "{{count}} 個の無効チャネルを削除しました",
     "{{count}} hours ago": "{{count}} 時間前",
     "{{count}} hours ago": "{{count}} 時間前",
     "{{count}} incidents": "{{count}} 件のインシデント",
     "{{count}} incidents": "{{count}} 件のインシデント",
+    "{{count}} incidents in the last 24 hours": "過去 24 時間に {{count}} 件のインシデント",
     "{{count}} incidents in the last 30 days": "過去 30 日間で {{count}} 件のインシデント",
     "{{count}} incidents in the last 30 days": "過去 30 日間で {{count}} 件のインシデント",
     "{{count}} IP(s)": "{{count}} IP",
     "{{count}} IP(s)": "{{count}} IP",
     "{{count}} log entries removed.": "{{count}} 件のログエントリを削除しました。",
     "{{count}} log entries removed.": "{{count}} 件のログエントリを削除しました。",
@@ -59,11 +60,14 @@
     "© 2025 Your Company. All rights reserved.": "© 2025 Your Company. 全著作権所有。",
     "© 2025 Your Company. All rights reserved.": "© 2025 Your Company. 全著作権所有。",
     "+{{count}} more": "他 {{count}} 件",
     "+{{count}} more": "他 {{count}} 件",
     "| Based on": "| に基づく",
     "| Based on": "| に基づく",
+    "0 means data is kept permanently": "0 はデータを永続的に保持することを意味します",
     "0 means unlimited": "0は無制限を意味します",
     "0 means unlimited": "0は無制限を意味します",
     "1 Day": "1日",
     "1 Day": "1日",
     "1 day ago": "1日前",
     "1 day ago": "1日前",
+    "1 hour": "1 時間",
     "1 Hour": "1時間",
     "1 Hour": "1時間",
     "1 hour ago": "1時間前",
     "1 hour ago": "1時間前",
+    "1 minute": "1 分",
     "1 minute ago": "1分前",
     "1 minute ago": "1分前",
     "1 Month": "1ヶ月",
     "1 Month": "1ヶ月",
     "1 month ago": "1ヶ月前",
     "1 month ago": "1ヶ月前",
@@ -86,6 +90,7 @@
     "30 Days": "30日",
     "30 Days": "30日",
     "30 days ago": "30日前",
     "30 days ago": "30日前",
     "30d change": "30日変化",
     "30d change": "30日変化",
+    "5 minutes": "5 分",
     "5-Hour Window": "5時間ウィンドウ",
     "5-Hour Window": "5時間ウィンドウ",
     "50 / page": "50 / ページ",
     "50 / page": "50 / ページ",
     "7 Days": "7日",
     "7 Days": "7日",
@@ -218,6 +223,7 @@
     "Aggregated traffic by upstream model provider": "上流モデルプロバイダー別の集計トラフィック",
     "Aggregated traffic by upstream model provider": "上流モデルプロバイダー別の集計トラフィック",
     "Aggregated usage metrics and trend charts.": "集計された使用量メトリクスとトレンドチャート。",
     "Aggregated usage metrics and trend charts.": "集計された使用量メトリクスとトレンドチャート。",
     "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.": "50以上のAIプロバイダーを統一APIで集約。アクセス管理、コスト追跡、スケーリングを簡単に。",
     "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.": "50以上のAIプロバイダーを統一APIで集約。アクセス管理、コスト追跡、スケーリングを簡単に。",
+    "Aggregation bucket": "集計バケット",
     "AGPL v3.0 License": "AGPL v3.0ライセンス",
     "AGPL v3.0 License": "AGPL v3.0ライセンス",
     "AI model testing environment": "AIモデルテスト環境",
     "AI model testing environment": "AIモデルテスト環境",
     "AI models": "AIモデル",
     "AI models": "AIモデル",
@@ -423,13 +429,18 @@
     "Automatically selects the best available group with circuit breaker mechanism": "回路ブレーカーメカニズム付きで最適な利用可能なグループを自動的に選択",
     "Automatically selects the best available group with circuit breaker mechanism": "回路ブレーカーメカニズム付きで最適な利用可能なグループを自動的に選択",
     "Automatically sync model list when upstream changes are detected": "アップストリームの変更が検出されたときにモデルリストを自動的に同期",
     "Automatically sync model list when upstream changes are detected": "アップストリームの変更が検出されたときにモデルリストを自動的に同期",
     "Automatically test channels and notify users when limits are hit": "チャネルを自動的にテストし、制限に達したときにユーザーに通知する",
     "Automatically test channels and notify users when limits are hit": "チャネルを自動的にテストし、制限に達したときにユーザーに通知する",
+    "Availability (last 24h)": "可用性(過去 24 時間)",
     "Available": "空き",
     "Available": "空き",
     "Available disk space": "利用可能なディスク容量",
     "Available disk space": "利用可能なディスク容量",
     "Available Models": "利用可能なモデル",
     "Available Models": "利用可能なモデル",
     "Available Rewards": "利用可能な報酬",
     "Available Rewards": "利用可能な報酬",
+    "Average latency": "平均レイテンシ",
+    "Average latency, TTFT, and success rate by group": "グループ別の平均レイテンシ、TTFT、成功率",
     "Average RPM": "平均RPM",
     "Average RPM": "平均RPM",
+    "Average time-to-first-token (TTFT) by group": "グループ別の平均 Time to First Token(TTFT)",
     "Average tokens per second sustained per group": "グループごとに持続する平均スループット (tokens/秒)",
     "Average tokens per second sustained per group": "グループごとに持続する平均スループット (tokens/秒)",
     "Average TPM": "平均TPM",
     "Average TPM": "平均TPM",
+    "Average TTFT": "平均 TTFT",
     "AWS": "AWS",
     "AWS": "AWS",
     "AWS Bedrock Claude Compat": "AWS Bedrock Claude 互換テンプレート",
     "AWS Bedrock Claude Compat": "AWS Bedrock Claude 互換テンプレート",
     "AWS Key Format": "AWSキーフォーマット",
     "AWS Key Format": "AWSキーフォーマット",
@@ -710,6 +721,7 @@
     "Cohere": "Cohere",
     "Cohere": "Cohere",
     "Collapse": "折りたたむ",
     "Collapse": "折りたたむ",
     "Collapse All": "すべて折りたたむ",
     "Collapse All": "すべて折りたたむ",
+    "Collect relay latency and success-rate metrics for the model square.": "モデル広場向けに Relay のレイテンシと成功率メトリクスを収集します。",
     "Color": "カラー",
     "Color": "カラー",
     "Color is required": "色は必須です",
     "Color is required": "色は必須です",
     "Color preset": "カラープリセット",
     "Color preset": "カラープリセット",
@@ -1294,6 +1306,7 @@
     "Enable io.net deployments": "io.net デプロイを有効化",
     "Enable io.net deployments": "io.net デプロイを有効化",
     "Enable io.net model deployment service in console": "コンソールで io.net モデルデプロイサービスを有効化",
     "Enable io.net model deployment service in console": "コンソールで io.net モデルデプロイサービスを有効化",
     "Enable LinuxDO OAuth": "LinuxDO OAuthを有効にする",
     "Enable LinuxDO OAuth": "LinuxDO OAuthを有効にする",
+    "Enable model performance metrics": "モデル性能メトリクスを有効化",
     "Enable OIDC": "OIDCを有効にする",
     "Enable OIDC": "OIDCを有効にする",
     "Enable or disable this channel": "このチャネルを有効または無効にする",
     "Enable or disable this channel": "このチャネルを有効または無効にする",
     "Enable or disable this model": "このモデルを有効または無効にする",
     "Enable or disable this model": "このモデルを有効または無効にする",
@@ -1659,6 +1672,7 @@
     "Fixed price (USD)": "固定価格 (USD)",
     "Fixed price (USD)": "固定価格 (USD)",
     "Floating": "フローティング",
     "Floating": "フローティング",
     "FluentRead extension not detected. Please ensure it is installed and active.": "FluentRead 拡張機能が検出されませんでした。インストールされていて有効になっていることを確認してください。",
     "FluentRead extension not detected. Please ensure it is installed and active.": "FluentRead 拡張機能が検出されませんでした。インストールされていて有効になっていることを確認してください。",
+    "Flush interval (minutes)": "書き込み間隔(分)",
     "Follow the guided steps to prepare your workspace before the first login.": "初回ログイン前に、ガイド付きの手順に従ってワークスペースを準備してください。",
     "Follow the guided steps to prepare your workspace before the first login.": "初回ログイン前に、ガイド付きの手順に従ってワークスペースを準備してください。",
     "Footer": "フッター",
     "Footer": "フッター",
     "Footer text displayed at the bottom of pages": "ページ下部に表示されるフッターテキスト",
     "Footer text displayed at the bottom of pages": "ページ下部に表示されるフッターテキスト",
@@ -2221,6 +2235,7 @@
     "Model name is required": "モデル名は必須です",
     "Model name is required": "モデル名は必須です",
     "Model names copied to clipboard": "モデル名がクリップボードにコピーされました",
     "Model names copied to clipboard": "モデル名がクリップボードにコピーされました",
     "Model not found": "モデルが見つかりません",
     "Model not found": "モデルが見つかりません",
+    "Model performance metrics": "モデル性能メトリクス",
     "Model Price": "モデル価格",
     "Model Price": "モデル価格",
     "Model Price Not Configured": "モデル価格が未設定",
     "Model Price Not Configured": "モデル価格が未設定",
     "Model Pricing": "モデル料金",
     "Model Pricing": "モデル料金",
@@ -2396,6 +2411,7 @@
     "No groups match your search": "検索に一致するグループがありません",
     "No groups match your search": "検索に一致するグループがありません",
     "No header overrides configured.": "ヘッダーのオーバーライドが設定されていません。",
     "No header overrides configured.": "ヘッダーのオーバーライドが設定されていません。",
     "No history data available": "履歴データがありません",
     "No history data available": "履歴データがありません",
+    "No incidents in the last 24 hours": "過去 24 時間にインシデントはありません",
     "No incidents in the last 30 days": "過去 30 日間でインシデントはありません",
     "No incidents in the last 30 days": "過去 30 日間でインシデントはありません",
     "No Inviter": "招待者なし",
     "No Inviter": "招待者なし",
     "No keys found": "キーが見つかりません",
     "No keys found": "キーが見つかりません",
@@ -3106,6 +3122,8 @@
     "Request Model:": "リクエストモデル:",
     "Request Model:": "リクエストモデル:",
     "Request overrides, routing behavior, and upstream model automation": "リクエスト上書き、ルーティング動作、上流モデル自動化",
     "Request overrides, routing behavior, and upstream model automation": "リクエスト上書き、ルーティング動作、上流モデル自動化",
     "Request rule pricing": "リクエストルールの課金",
     "Request rule pricing": "リクエストルールの課金",
+    "Request success rate sampled over the last 24 hours": "過去 24 時間にサンプリングされたリクエスト成功率",
+    "Request success rate; {{incidents}} incident buckets in the last 24 hours": "リクエスト成功率;過去 24 時間に {{incidents}} 個のインシデント時間枠",
     "Request timed out, please refresh and restart GitHub login": "タイムアウトしました。ページをリロードして GitHub ログインをやり直してください",
     "Request timed out, please refresh and restart GitHub login": "タイムアウトしました。ページをリロードして GitHub ログインをやり直してください",
     "Request-based": "リクエスト条件あり",
     "Request-based": "リクエスト条件あり",
     "Requests (24h)": "リクエスト (24h)",
     "Requests (24h)": "リクエスト (24h)",
@@ -3154,6 +3172,7 @@
     "Restrict user model request frequency (may impact high concurrency performance)": "ユーザーモデルのリクエスト頻度を制限する(高並行性パフォーマンスに影響を与える可能性があります)",
     "Restrict user model request frequency (may impact high concurrency performance)": "ユーザーモデルのリクエスト頻度を制限する(高並行性パフォーマンスに影響を与える可能性があります)",
     "Retain last N days": "最新N日間を保持",
     "Retain last N days": "最新N日間を保持",
     "Retain last N files": "最新N個のファイルを保持",
     "Retain last N files": "最新N個のファイルを保持",
+    "Retention days": "保持日数",
     "Retry": "再試行",
     "Retry": "再試行",
     "Retry Chain": "リトライチェーン",
     "Retry Chain": "リトライチェーン",
     "Retry Suggestion": "リトライ提案",
     "Retry Suggestion": "リトライ提案",
@@ -3527,6 +3546,7 @@
     "Subscription Plans": "サブスクリプションプラン",
     "Subscription Plans": "サブスクリプションプラン",
     "Subtract": "減算",
     "Subtract": "減算",
     "Success": "成功",
     "Success": "成功",
+    "Success rate": "成功率",
     "Successfully created {{count}} API Key(s)": "{{count}}個のAPIキーが正常に作成されました",
     "Successfully created {{count}} API Key(s)": "{{count}}個のAPIキーが正常に作成されました",
     "Successfully created {{count}} redemption codes": "{{count}}件の引き換えコードが正常に作成されました",
     "Successfully created {{count}} redemption codes": "{{count}}件の引き換えコードが正常に作成されました",
     "Successfully deleted {{count}} API key(s)": "{{count}}個のAPIキーが正常に削除されました",
     "Successfully deleted {{count}} API key(s)": "{{count}}個のAPIキーが正常に削除されました",

+ 20 - 0
web/default/src/i18n/locales/ru.json

@@ -29,6 +29,7 @@
     "{{count}} disabled channel(s) deleted": "Удалено {{count}} отключённых каналов",
     "{{count}} disabled channel(s) deleted": "Удалено {{count}} отключённых каналов",
     "{{count}} hours ago": "{{count}} часов назад",
     "{{count}} hours ago": "{{count}} часов назад",
     "{{count}} incidents": "{{count}} инцидентов",
     "{{count}} incidents": "{{count}} инцидентов",
+    "{{count}} incidents in the last 24 hours": "{{count}} инцидентов за последние 24 часа",
     "{{count}} incidents in the last 30 days": "{{count}} инцидентов за последние 30 дней",
     "{{count}} incidents in the last 30 days": "{{count}} инцидентов за последние 30 дней",
     "{{count}} IP(s)": "{{count}} IP",
     "{{count}} IP(s)": "{{count}} IP",
     "{{count}} log entries removed.": "Удалено {{count}} записей журнала.",
     "{{count}} log entries removed.": "Удалено {{count}} записей журнала.",
@@ -59,11 +60,14 @@
     "© 2025 Your Company. All rights reserved.": "© 2025 Ваша Компания. Все права защищены.",
     "© 2025 Your Company. All rights reserved.": "© 2025 Ваша Компания. Все права защищены.",
     "+{{count}} more": "ещё {{count}}",
     "+{{count}} more": "ещё {{count}}",
     "| Based on": "| На основе",
     "| Based on": "| На основе",
+    "0 means data is kept permanently": "0 означает, что данные хранятся постоянно",
     "0 means unlimited": "0 означает без ограничений",
     "0 means unlimited": "0 означает без ограничений",
     "1 Day": "1 день",
     "1 Day": "1 день",
     "1 day ago": "1 день назад",
     "1 day ago": "1 день назад",
+    "1 hour": "1 час",
     "1 Hour": "1 ч.",
     "1 Hour": "1 ч.",
     "1 hour ago": "1 час назад",
     "1 hour ago": "1 час назад",
+    "1 minute": "1 минута",
     "1 minute ago": "1 минуту назад",
     "1 minute ago": "1 минуту назад",
     "1 Month": "1 мес.",
     "1 Month": "1 мес.",
     "1 month ago": "1 месяц назад",
     "1 month ago": "1 месяц назад",
@@ -86,6 +90,7 @@
     "30 Days": "30 дней",
     "30 Days": "30 дней",
     "30 days ago": "30 дней назад",
     "30 days ago": "30 дней назад",
     "30d change": "Изменение за 30 дней",
     "30d change": "Изменение за 30 дней",
+    "5 minutes": "5 минут",
     "5-Hour Window": "5-часовое окно",
     "5-Hour Window": "5-часовое окно",
     "50 / page": "50 / страница",
     "50 / page": "50 / страница",
     "7 Days": "7 дней",
     "7 Days": "7 дней",
@@ -218,6 +223,7 @@
     "Aggregated traffic by upstream model provider": "Агрегированный трафик по поставщикам моделей",
     "Aggregated traffic by upstream model provider": "Агрегированный трафик по поставщикам моделей",
     "Aggregated usage metrics and trend charts.": "Агрегированные метрики использования и графики трендов.",
     "Aggregated usage metrics and trend charts.": "Агрегированные метрики использования и графики трендов.",
     "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.": "объединяет 50+ ИИ-провайдеров за единым API. Управляйте доступом, отслеживайте затраты и масштабируйтесь без усилий.",
     "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.": "объединяет 50+ ИИ-провайдеров за единым API. Управляйте доступом, отслеживайте затраты и масштабируйтесь без усилий.",
+    "Aggregation bucket": "Интервал агрегации",
     "AGPL v3.0 License": "Лицензия AGPL v3.0",
     "AGPL v3.0 License": "Лицензия AGPL v3.0",
     "AI model testing environment": "Среда тестирования ИИ моделей",
     "AI model testing environment": "Среда тестирования ИИ моделей",
     "AI models": "Модели ИИ",
     "AI models": "Модели ИИ",
@@ -423,13 +429,18 @@
     "Automatically selects the best available group with circuit breaker mechanism": "Автоматически выбирает лучшую доступную группу с механизмом circuit breaker",
     "Automatically selects the best available group with circuit breaker mechanism": "Автоматически выбирает лучшую доступную группу с механизмом circuit breaker",
     "Automatically sync model list when upstream changes are detected": "Автоматически синхронизировать список моделей при обнаружении изменений у провайдера",
     "Automatically sync model list when upstream changes are detected": "Автоматически синхронизировать список моделей при обнаружении изменений у провайдера",
     "Automatically test channels and notify users when limits are hit": "Автоматически тестировать каналы и уведомлять пользователей при достижении лимитов",
     "Automatically test channels and notify users when limits are hit": "Автоматически тестировать каналы и уведомлять пользователей при достижении лимитов",
+    "Availability (last 24h)": "Доступность (последние 24 ч)",
     "Available": "Доступно",
     "Available": "Доступно",
     "Available disk space": "Доступное дисковое пространство",
     "Available disk space": "Доступное дисковое пространство",
     "Available Models": "Доступные модели",
     "Available Models": "Доступные модели",
     "Available Rewards": "Доступные награды",
     "Available Rewards": "Доступные награды",
+    "Average latency": "Средняя задержка",
+    "Average latency, TTFT, and success rate by group": "Средняя задержка, TTFT и доля успешных запросов по группам",
     "Average RPM": "Среднее число оборотов в минуту",
     "Average RPM": "Среднее число оборотов в минуту",
+    "Average time-to-first-token (TTFT) by group": "Среднее время до первого токена (TTFT) по группам",
     "Average tokens per second sustained per group": "Средняя устойчивая пропускная способность (токенов/с) по группам",
     "Average tokens per second sustained per group": "Средняя устойчивая пропускная способность (токенов/с) по группам",
     "Average TPM": "Среднее число транзакций в минуту",
     "Average TPM": "Среднее число транзакций в минуту",
+    "Average TTFT": "Средний TTFT",
     "AWS": "AWS",
     "AWS": "AWS",
     "AWS Bedrock Claude Compat": "AWS Bedrock Claude совместимость",
     "AWS Bedrock Claude Compat": "AWS Bedrock Claude совместимость",
     "AWS Key Format": "Формат ключа AWS",
     "AWS Key Format": "Формат ключа AWS",
@@ -710,6 +721,7 @@
     "Cohere": "Cohere",
     "Cohere": "Cohere",
     "Collapse": "Свернуть",
     "Collapse": "Свернуть",
     "Collapse All": "Свернуть все",
     "Collapse All": "Свернуть все",
+    "Collect relay latency and success-rate metrics for the model square.": "Собирает метрики задержки Relay и доли успешных запросов для витрины моделей.",
     "Color": "Цвет",
     "Color": "Цвет",
     "Color is required": "Цвет обязателен",
     "Color is required": "Цвет обязателен",
     "Color preset": "Цветовая предустановка",
     "Color preset": "Цветовая предустановка",
@@ -1294,6 +1306,7 @@
     "Enable io.net deployments": "Включить развертывания io.net",
     "Enable io.net deployments": "Включить развертывания io.net",
     "Enable io.net model deployment service in console": "Включить сервис развертывания моделей io.net в консоли",
     "Enable io.net model deployment service in console": "Включить сервис развертывания моделей io.net в консоли",
     "Enable LinuxDO OAuth": "Включить LinuxDO OAuth",
     "Enable LinuxDO OAuth": "Включить LinuxDO OAuth",
+    "Enable model performance metrics": "Включить метрики производительности моделей",
     "Enable OIDC": "Включить OIDC",
     "Enable OIDC": "Включить OIDC",
     "Enable or disable this channel": "Включить или отключить этот канал",
     "Enable or disable this channel": "Включить или отключить этот канал",
     "Enable or disable this model": "Включить или отключить эту модель",
     "Enable or disable this model": "Включить или отключить эту модель",
@@ -1659,6 +1672,7 @@
     "Fixed price (USD)": "Фиксированная цена (USD)",
     "Fixed price (USD)": "Фиксированная цена (USD)",
     "Floating": "Плавающая",
     "Floating": "Плавающая",
     "FluentRead extension not detected. Please ensure it is installed and active.": "Расширение FluentRead не обнаружено. Убедитесь, что оно установлено и активно.",
     "FluentRead extension not detected. Please ensure it is installed and active.": "Расширение FluentRead не обнаружено. Убедитесь, что оно установлено и активно.",
+    "Flush interval (minutes)": "Интервал записи (минуты)",
     "Follow the guided steps to prepare your workspace before the first login.": "Следуйте пошаговым инструкциям, чтобы подготовить рабочее пространство перед первым входом.",
     "Follow the guided steps to prepare your workspace before the first login.": "Следуйте пошаговым инструкциям, чтобы подготовить рабочее пространство перед первым входом.",
     "Footer": "Подвал",
     "Footer": "Подвал",
     "Footer text displayed at the bottom of pages": "Текст нижнего колонтитула, отображаемый внизу страниц",
     "Footer text displayed at the bottom of pages": "Текст нижнего колонтитула, отображаемый внизу страниц",
@@ -2221,6 +2235,7 @@
     "Model name is required": "Название модели обязательно",
     "Model name is required": "Название модели обязательно",
     "Model names copied to clipboard": "Названия моделей скопированы в буфер обмена",
     "Model names copied to clipboard": "Названия моделей скопированы в буфер обмена",
     "Model not found": "Модель не найдена",
     "Model not found": "Модель не найдена",
+    "Model performance metrics": "Метрики производительности моделей",
     "Model Price": "Цена модели",
     "Model Price": "Цена модели",
     "Model Price Not Configured": "Цена модели не настроена",
     "Model Price Not Configured": "Цена модели не настроена",
     "Model Pricing": "Цены на модели",
     "Model Pricing": "Цены на модели",
@@ -2396,6 +2411,7 @@
     "No groups match your search": "Нет групп, соответствующих вашему поиску",
     "No groups match your search": "Нет групп, соответствующих вашему поиску",
     "No header overrides configured.": "Нет настроенных переопределений заголовков.",
     "No header overrides configured.": "Нет настроенных переопределений заголовков.",
     "No history data available": "Исторические данные недоступны",
     "No history data available": "Исторические данные недоступны",
+    "No incidents in the last 24 hours": "За последние 24 часа инцидентов не было",
     "No incidents in the last 30 days": "За последние 30 дней инцидентов не было",
     "No incidents in the last 30 days": "За последние 30 дней инцидентов не было",
     "No Inviter": "Нет пригласившего",
     "No Inviter": "Нет пригласившего",
     "No keys found": "Ключи не найдены",
     "No keys found": "Ключи не найдены",
@@ -3106,6 +3122,8 @@
     "Request Model:": "Модель запроса:",
     "Request Model:": "Модель запроса:",
     "Request overrides, routing behavior, and upstream model automation": "Переопределения запросов, маршрутизация и автоматизация upstream-моделей",
     "Request overrides, routing behavior, and upstream model automation": "Переопределения запросов, маршрутизация и автоматизация upstream-моделей",
     "Request rule pricing": "Правила ценообразования по запросу",
     "Request rule pricing": "Правила ценообразования по запросу",
+    "Request success rate sampled over the last 24 hours": "Доля успешных запросов по выборкам за последние 24 часа",
+    "Request success rate; {{incidents}} incident buckets in the last 24 hours": "Доля успешных запросов; {{incidents}} интервалов с инцидентами за последние 24 часа",
     "Request timed out, please refresh and restart GitHub login": "Время ожидания истекло, обновите страницу и снова запустите вход через GitHub",
     "Request timed out, please refresh and restart GitHub login": "Время ожидания истекло, обновите страницу и снова запустите вход через GitHub",
     "Request-based": "Зависит от запроса",
     "Request-based": "Зависит от запроса",
     "Requests (24h)": "Запросы (24 ч)",
     "Requests (24h)": "Запросы (24 ч)",
@@ -3154,6 +3172,7 @@
     "Restrict user model request frequency (may impact high concurrency performance)": "Ограничить частоту запросов пользовательских моделей (может повлиять на производительность при высокой конкуренции)",
     "Restrict user model request frequency (may impact high concurrency performance)": "Ограничить частоту запросов пользовательских моделей (может повлиять на производительность при высокой конкуренции)",
     "Retain last N days": "Хранить последние N дней",
     "Retain last N days": "Хранить последние N дней",
     "Retain last N files": "Хранить последние N файлов",
     "Retain last N files": "Хранить последние N файлов",
+    "Retention days": "Дней хранения",
     "Retry": "Повторить попытку",
     "Retry": "Повторить попытку",
     "Retry Chain": "Цепочка повторов",
     "Retry Chain": "Цепочка повторов",
     "Retry Suggestion": "Рекомендация по повтору",
     "Retry Suggestion": "Рекомендация по повтору",
@@ -3527,6 +3546,7 @@
     "Subscription Plans": "Планы подписки",
     "Subscription Plans": "Планы подписки",
     "Subtract": "Вычесть",
     "Subtract": "Вычесть",
     "Success": "Успешно",
     "Success": "Успешно",
+    "Success rate": "Доля успешных запросов",
     "Successfully created {{count}} API Key(s)": "Успешно создано {{count}} API-ключ(а/ей)",
     "Successfully created {{count}} API Key(s)": "Успешно создано {{count}} API-ключ(а/ей)",
     "Successfully created {{count}} redemption codes": "Успешно создано {{count}} кодов активации",
     "Successfully created {{count}} redemption codes": "Успешно создано {{count}} кодов активации",
     "Successfully deleted {{count}} API key(s)": "Успешно удалено {{count}} API-ключ(а/ей)",
     "Successfully deleted {{count}} API key(s)": "Успешно удалено {{count}} API-ключ(а/ей)",

+ 20 - 0
web/default/src/i18n/locales/vi.json

@@ -29,6 +29,7 @@
     "{{count}} disabled channel(s) deleted": "Đã xóa {{count}} kênh đã tắt",
     "{{count}} disabled channel(s) deleted": "Đã xóa {{count}} kênh đã tắt",
     "{{count}} hours ago": "{{count}} giờ trước",
     "{{count}} hours ago": "{{count}} giờ trước",
     "{{count}} incidents": "{{count}} sự cố",
     "{{count}} incidents": "{{count}} sự cố",
+    "{{count}} incidents in the last 24 hours": "{{count}} sự cố trong 24 giờ qua",
     "{{count}} incidents in the last 30 days": "{{count}} sự cố trong 30 ngày qua",
     "{{count}} incidents in the last 30 days": "{{count}} sự cố trong 30 ngày qua",
     "{{count}} IP(s)": "{{count}} IP",
     "{{count}} IP(s)": "{{count}} IP",
     "{{count}} log entries removed.": "Đã xóa {{count}} mục nhật ký.",
     "{{count}} log entries removed.": "Đã xóa {{count}} mục nhật ký.",
@@ -59,11 +60,14 @@
     "© 2025 Your Company. All rights reserved.": "© 2025 Công ty của bạn. Mọi quyền được bảo lưu.",
     "© 2025 Your Company. All rights reserved.": "© 2025 Công ty của bạn. Mọi quyền được bảo lưu.",
     "+{{count}} more": "thêm {{count}} mục",
     "+{{count}} more": "thêm {{count}} mục",
     "| Based on": "| Dựa trên",
     "| Based on": "| Dựa trên",
+    "0 means data is kept permanently": "0 nghĩa là dữ liệu được giữ vĩnh viễn",
     "0 means unlimited": "0 có nghĩa là không giới hạn",
     "0 means unlimited": "0 có nghĩa là không giới hạn",
     "1 Day": "1 ngày",
     "1 Day": "1 ngày",
     "1 day ago": "1 ngày trước",
     "1 day ago": "1 ngày trước",
+    "1 hour": "1 giờ",
     "1 Hour": "1 giờ",
     "1 Hour": "1 giờ",
     "1 hour ago": "1 giờ trước",
     "1 hour ago": "1 giờ trước",
+    "1 minute": "1 phút",
     "1 minute ago": "1 phút trước",
     "1 minute ago": "1 phút trước",
     "1 Month": "1 tháng",
     "1 Month": "1 tháng",
     "1 month ago": "1 tháng trước",
     "1 month ago": "1 tháng trước",
@@ -86,6 +90,7 @@
     "30 Days": "30 ngày",
     "30 Days": "30 ngày",
     "30 days ago": "30 ngày trước",
     "30 days ago": "30 ngày trước",
     "30d change": "Thay đổi 30 ngày",
     "30d change": "Thay đổi 30 ngày",
+    "5 minutes": "5 phút",
     "5-Hour Window": "Cửa sổ 5 giờ",
     "5-Hour Window": "Cửa sổ 5 giờ",
     "50 / page": "50 / trang",
     "50 / page": "50 / trang",
     "7 Days": "7 ngày",
     "7 Days": "7 ngày",
@@ -218,6 +223,7 @@
     "Aggregated traffic by upstream model provider": "Lưu lượng tổng hợp theo nhà cung cấp mô hình",
     "Aggregated traffic by upstream model provider": "Lưu lượng tổng hợp theo nhà cung cấp mô hình",
     "Aggregated usage metrics and trend charts.": "Chỉ số sử dụng tổng hợp và biểu đồ xu hướng.",
     "Aggregated usage metrics and trend charts.": "Chỉ số sử dụng tổng hợp và biểu đồ xu hướng.",
     "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.": "tổng hợp hơn 50 nhà cung cấp AI sau một API thống nhất. Quản lý truy cập, theo dõi chi phí và mở rộng dễ dàng.",
     "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.": "tổng hợp hơn 50 nhà cung cấp AI sau một API thống nhất. Quản lý truy cập, theo dõi chi phí và mở rộng dễ dàng.",
+    "Aggregation bucket": "Khoảng tổng hợp",
     "AGPL v3.0 License": "Giấy phép AGPL v3.0",
     "AGPL v3.0 License": "Giấy phép AGPL v3.0",
     "AI model testing environment": "Môi trường thử nghiệm mô hình AI",
     "AI model testing environment": "Môi trường thử nghiệm mô hình AI",
     "AI models": "mô hình AI",
     "AI models": "mô hình AI",
@@ -423,13 +429,18 @@
     "Automatically selects the best available group with circuit breaker mechanism": "Tự động chọn nhóm tốt nhất hiện có với cơ chế ngắt mạch",
     "Automatically selects the best available group with circuit breaker mechanism": "Tự động chọn nhóm tốt nhất hiện có với cơ chế ngắt mạch",
     "Automatically sync model list when upstream changes are detected": "Tự động đồng bộ danh sách mô hình khi phát hiện thay đổi từ nguồn",
     "Automatically sync model list when upstream changes are detected": "Tự động đồng bộ danh sách mô hình khi phát hiện thay đổi từ nguồn",
     "Automatically test channels and notify users when limits are hit": "Tự động kiểm tra các kênh và thông báo cho người dùng khi đạt đến giới hạn",
     "Automatically test channels and notify users when limits are hit": "Tự động kiểm tra các kênh và thông báo cho người dùng khi đạt đến giới hạn",
+    "Availability (last 24h)": "Khả dụng (24 giờ qua)",
     "Available": "Khả dụng",
     "Available": "Khả dụng",
     "Available disk space": "Dung lượng đĩa khả dụng",
     "Available disk space": "Dung lượng đĩa khả dụng",
     "Available Models": "Mô hình khả dụng",
     "Available Models": "Mô hình khả dụng",
     "Available Rewards": "Phần thưởng hiện có",
     "Available Rewards": "Phần thưởng hiện có",
+    "Average latency": "Độ trễ trung bình",
+    "Average latency, TTFT, and success rate by group": "Độ trễ trung bình, TTFT và tỷ lệ thành công theo nhóm",
     "Average RPM": "RPM trung bình",
     "Average RPM": "RPM trung bình",
+    "Average time-to-first-token (TTFT) by group": "Thời gian trung bình tới token đầu tiên (TTFT) theo nhóm",
     "Average tokens per second sustained per group": "Số token mỗi giây trung bình duy trì cho từng nhóm",
     "Average tokens per second sustained per group": "Số token mỗi giây trung bình duy trì cho từng nhóm",
     "Average TPM": "TPM trung bình",
     "Average TPM": "TPM trung bình",
+    "Average TTFT": "TTFT trung bình",
     "AWS": "AWS",
     "AWS": "AWS",
     "AWS Bedrock Claude Compat": "AWS Bedrock Claude tương thích",
     "AWS Bedrock Claude Compat": "AWS Bedrock Claude tương thích",
     "AWS Key Format": "Định dạng khóa AWS",
     "AWS Key Format": "Định dạng khóa AWS",
@@ -710,6 +721,7 @@
     "Cohere": "Cohere",
     "Cohere": "Cohere",
     "Collapse": "Thu gọn",
     "Collapse": "Thu gọn",
     "Collapse All": "Thu gọn tất cả",
     "Collapse All": "Thu gọn tất cả",
+    "Collect relay latency and success-rate metrics for the model square.": "Thu thập độ trễ Relay và tỷ lệ thành công cho quảng trường mô hình.",
     "Color": "Màu",
     "Color": "Màu",
     "Color is required": "Màu sắc là bắt buộc",
     "Color is required": "Màu sắc là bắt buộc",
     "Color preset": "Cài đặt màu sẵn",
     "Color preset": "Cài đặt màu sẵn",
@@ -1294,6 +1306,7 @@
     "Enable io.net deployments": "Bật triển khai io.net",
     "Enable io.net deployments": "Bật triển khai io.net",
     "Enable io.net model deployment service in console": "Bật dịch vụ triển khai mô hình io.net trong bảng điều khiển",
     "Enable io.net model deployment service in console": "Bật dịch vụ triển khai mô hình io.net trong bảng điều khiển",
     "Enable LinuxDO OAuth": "Bật LinuxDO OAuth",
     "Enable LinuxDO OAuth": "Bật LinuxDO OAuth",
+    "Enable model performance metrics": "Bật chỉ số hiệu năng mô hình",
     "Enable OIDC": "Bật OIDC",
     "Enable OIDC": "Bật OIDC",
     "Enable or disable this channel": "Bật hoặc tắt kênh này",
     "Enable or disable this channel": "Bật hoặc tắt kênh này",
     "Enable or disable this model": "Bật hoặc tắt mô hình này",
     "Enable or disable this model": "Bật hoặc tắt mô hình này",
@@ -1659,6 +1672,7 @@
     "Fixed price (USD)": "Giá cố định (USD)",
     "Fixed price (USD)": "Giá cố định (USD)",
     "Floating": "Nổi",
     "Floating": "Nổi",
     "FluentRead extension not detected. Please ensure it is installed and active.": "Không phát hiện tiện ích mở rộng FluentRead. Vui lòng đảm bảo nó đã được cài đặt và kích hoạt.",
     "FluentRead extension not detected. Please ensure it is installed and active.": "Không phát hiện tiện ích mở rộng FluentRead. Vui lòng đảm bảo nó đã được cài đặt và kích hoạt.",
+    "Flush interval (minutes)": "Khoảng ghi xuống DB (phút)",
     "Follow the guided steps to prepare your workspace before the first login.": "Thực hiện theo các bước hướng dẫn để chuẩn bị không gian làm việc của bạn trước lần đăng nhập đầu tiên.",
     "Follow the guided steps to prepare your workspace before the first login.": "Thực hiện theo các bước hướng dẫn để chuẩn bị không gian làm việc của bạn trước lần đăng nhập đầu tiên.",
     "Footer": "Chân trang",
     "Footer": "Chân trang",
     "Footer text displayed at the bottom of pages": "Văn bản chân trang hiển thị ở cuối các trang",
     "Footer text displayed at the bottom of pages": "Văn bản chân trang hiển thị ở cuối các trang",
@@ -2221,6 +2235,7 @@
     "Model name is required": "Tên mô hình là bắt buộc",
     "Model name is required": "Tên mô hình là bắt buộc",
     "Model names copied to clipboard": "Tên mô hình đã được sao chép vào bộ nhớ tạm",
     "Model names copied to clipboard": "Tên mô hình đã được sao chép vào bộ nhớ tạm",
     "Model not found": "Không tìm thấy mô hình",
     "Model not found": "Không tìm thấy mô hình",
+    "Model performance metrics": "Chỉ số hiệu năng mô hình",
     "Model Price": "Giá mô hình",
     "Model Price": "Giá mô hình",
     "Model Price Not Configured": "Giá mô hình chưa được cấu hình",
     "Model Price Not Configured": "Giá mô hình chưa được cấu hình",
     "Model Pricing": "Bảng giá mô hình",
     "Model Pricing": "Bảng giá mô hình",
@@ -2396,6 +2411,7 @@
     "No groups match your search": "Không có nhóm nào khớp với tìm kiếm của bạn",
     "No groups match your search": "Không có nhóm nào khớp với tìm kiếm của bạn",
     "No header overrides configured.": "Không có ghi đè tiêu đề nào được cấu hình.",
     "No header overrides configured.": "Không có ghi đè tiêu đề nào được cấu hình.",
     "No history data available": "Không có dữ liệu lịch sử",
     "No history data available": "Không có dữ liệu lịch sử",
+    "No incidents in the last 24 hours": "Không có sự cố trong 24 giờ qua",
     "No incidents in the last 30 days": "Không có sự cố trong 30 ngày qua",
     "No incidents in the last 30 days": "Không có sự cố trong 30 ngày qua",
     "No Inviter": "Không có người mời",
     "No Inviter": "Không có người mời",
     "No keys found": "Không tìm thấy khóa",
     "No keys found": "Không tìm thấy khóa",
@@ -3106,6 +3122,8 @@
     "Request Model:": "Mô hình yêu cầu:",
     "Request Model:": "Mô hình yêu cầu:",
     "Request overrides, routing behavior, and upstream model automation": "Ghi đè yêu cầu, hành vi định tuyến và tự động hóa mô hình upstream",
     "Request overrides, routing behavior, and upstream model automation": "Ghi đè yêu cầu, hành vi định tuyến và tự động hóa mô hình upstream",
     "Request rule pricing": "Quy tắc tính giá theo request",
     "Request rule pricing": "Quy tắc tính giá theo request",
+    "Request success rate sampled over the last 24 hours": "Tỷ lệ yêu cầu thành công được lấy mẫu trong 24 giờ qua",
+    "Request success rate; {{incidents}} incident buckets in the last 24 hours": "Tỷ lệ yêu cầu thành công; {{incidents}} khoảng có sự cố trong 24 giờ qua",
     "Request timed out, please refresh and restart GitHub login": "Yêu cầu đã hết thời gian chờ, vui lòng làm mới và đăng nhập lại GitHub",
     "Request timed out, please refresh and restart GitHub login": "Yêu cầu đã hết thời gian chờ, vui lòng làm mới và đăng nhập lại GitHub",
     "Request-based": "Theo yêu cầu",
     "Request-based": "Theo yêu cầu",
     "Requests (24h)": "Yêu cầu (24h)",
     "Requests (24h)": "Yêu cầu (24h)",
@@ -3154,6 +3172,7 @@
     "Restrict user model request frequency (may impact high concurrency performance)": "Hạn chế tần suất yêu cầu mô hình người dùng (có thể ảnh hưởng đến hiệu suất khi có độ đồng thời cao)",
     "Restrict user model request frequency (may impact high concurrency performance)": "Hạn chế tần suất yêu cầu mô hình người dùng (có thể ảnh hưởng đến hiệu suất khi có độ đồng thời cao)",
     "Retain last N days": "Giữ lại N ngày gần nhất",
     "Retain last N days": "Giữ lại N ngày gần nhất",
     "Retain last N files": "Giữ lại N tệp gần nhất",
     "Retain last N files": "Giữ lại N tệp gần nhất",
+    "Retention days": "Số ngày lưu giữ",
     "Retry": "Thử lại",
     "Retry": "Thử lại",
     "Retry Chain": "Chuỗi thử lại",
     "Retry Chain": "Chuỗi thử lại",
     "Retry Suggestion": "Gợi ý thử lại",
     "Retry Suggestion": "Gợi ý thử lại",
@@ -3527,6 +3546,7 @@
     "Subscription Plans": "Gói đăng ký",
     "Subscription Plans": "Gói đăng ký",
     "Subtract": "Trừ",
     "Subtract": "Trừ",
     "Success": "Thành công",
     "Success": "Thành công",
+    "Success rate": "Tỷ lệ thành công",
     "Successfully created {{count}} API Key(s)": "Đã tạo thành công {{count}} khóa API",
     "Successfully created {{count}} API Key(s)": "Đã tạo thành công {{count}} khóa API",
     "Successfully created {{count}} redemption codes": "Đã tạo thành công {{count}} mã đổi thưởng",
     "Successfully created {{count}} redemption codes": "Đã tạo thành công {{count}} mã đổi thưởng",
     "Successfully deleted {{count}} API key(s)": "Đã xóa thành công {{count}} khóa API",
     "Successfully deleted {{count}} API key(s)": "Đã xóa thành công {{count}} khóa API",

+ 20 - 0
web/default/src/i18n/locales/zh.json

@@ -29,6 +29,7 @@
     "{{count}} disabled channel(s) deleted": "已删除 {{count}} 个已禁用的渠道",
     "{{count}} disabled channel(s) deleted": "已删除 {{count}} 个已禁用的渠道",
     "{{count}} hours ago": "{{count}} 小时前",
     "{{count}} hours ago": "{{count}} 小时前",
     "{{count}} incidents": "{{count}} 起事件",
     "{{count}} incidents": "{{count}} 起事件",
+    "{{count}} incidents in the last 24 hours": "最近 24 小时 {{count}} 个异常桶",
     "{{count}} incidents in the last 30 days": "最近 30 天 {{count}} 起事件",
     "{{count}} incidents in the last 30 days": "最近 30 天 {{count}} 起事件",
     "{{count}} IP(s)": "{{count}} 个 IP",
     "{{count}} IP(s)": "{{count}} 个 IP",
     "{{count}} log entries removed.": "已删除 {{count}} 条日志。",
     "{{count}} log entries removed.": "已删除 {{count}} 条日志。",
@@ -59,11 +60,14 @@
     "© 2025 Your Company. All rights reserved.": "© 2025 您的公司。保留所有权利。",
     "© 2025 Your Company. All rights reserved.": "© 2025 您的公司。保留所有权利。",
     "+{{count}} more": "还有 {{count}} 项",
     "+{{count}} more": "还有 {{count}} 项",
     "| Based on": "| 基于",
     "| Based on": "| 基于",
+    "0 means data is kept permanently": "0 表示永久保留数据",
     "0 means unlimited": "0 表示不限",
     "0 means unlimited": "0 表示不限",
     "1 Day": "1 天",
     "1 Day": "1 天",
     "1 day ago": "1 天前",
     "1 day ago": "1 天前",
+    "1 hour": "1 小时",
     "1 Hour": "1 小时",
     "1 Hour": "1 小时",
     "1 hour ago": "1 小时前",
     "1 hour ago": "1 小时前",
+    "1 minute": "1 分钟",
     "1 minute ago": "1 分钟前",
     "1 minute ago": "1 分钟前",
     "1 Month": "1 个月",
     "1 Month": "1 个月",
     "1 month ago": "1 个月前",
     "1 month ago": "1 个月前",
@@ -86,6 +90,7 @@
     "30 Days": "30 天",
     "30 Days": "30 天",
     "30 days ago": "30 天前",
     "30 days ago": "30 天前",
     "30d change": "30 天变化",
     "30d change": "30 天变化",
+    "5 minutes": "5 分钟",
     "5-Hour Window": "5小时窗口",
     "5-Hour Window": "5小时窗口",
     "50 / page": "50 条/页",
     "50 / page": "50 条/页",
     "7 Days": "7 天",
     "7 Days": "7 天",
@@ -218,6 +223,7 @@
     "Aggregated traffic by upstream model provider": "按上游模型提供商聚合的流量",
     "Aggregated traffic by upstream model provider": "按上游模型提供商聚合的流量",
     "Aggregated usage metrics and trend charts.": "聚合使用指标和趋势图表。",
     "Aggregated usage metrics and trend charts.": "聚合使用指标和趋势图表。",
     "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.": "聚合 50+ AI 提供商于统一 API 之后。轻松管理访问、追踪成本、弹性扩展。",
     "aggregates 50+ AI providers behind one unified API. Manage access, track costs, and scale effortlessly.": "聚合 50+ AI 提供商于统一 API 之后。轻松管理访问、追踪成本、弹性扩展。",
+    "Aggregation bucket": "聚合时间桶",
     "AGPL v3.0 License": "AGPL v3.0 协议",
     "AGPL v3.0 License": "AGPL v3.0 协议",
     "AI model testing environment": "AI模型测试环境",
     "AI model testing environment": "AI模型测试环境",
     "AI models": "AI 模型",
     "AI models": "AI 模型",
@@ -423,13 +429,18 @@
     "Automatically selects the best available group with circuit breaker mechanism": "自动选择可用分组,失败时触发熔断切换",
     "Automatically selects the best available group with circuit breaker mechanism": "自动选择可用分组,失败时触发熔断切换",
     "Automatically sync model list when upstream changes are detected": "检测到上游模型变更时自动同步模型列表",
     "Automatically sync model list when upstream changes are detected": "检测到上游模型变更时自动同步模型列表",
     "Automatically test channels and notify users when limits are hit": "自动测试渠道并在达到限制时通知用户",
     "Automatically test channels and notify users when limits are hit": "自动测试渠道并在达到限制时通知用户",
+    "Availability (last 24h)": "可用率(最近 24 小时)",
     "Available": "可用",
     "Available": "可用",
     "Available disk space": "可用磁盘空间",
     "Available disk space": "可用磁盘空间",
     "Available Models": "可用模型",
     "Available Models": "可用模型",
     "Available Rewards": "可用奖励",
     "Available Rewards": "可用奖励",
+    "Average latency": "平均延迟",
+    "Average latency, TTFT, and success rate by group": "各分组的平均延迟、首 Token 延迟和成功率",
     "Average RPM": "平均 RPM",
     "Average RPM": "平均 RPM",
+    "Average time-to-first-token (TTFT) by group": "各分组的平均首 Token 延迟(TTFT)",
     "Average tokens per second sustained per group": "各分组持续输出的平均每秒 token 数",
     "Average tokens per second sustained per group": "各分组持续输出的平均每秒 token 数",
     "Average TPM": "平均 TPM",
     "Average TPM": "平均 TPM",
+    "Average TTFT": "平均首 Token 延迟",
     "AWS": "AWS",
     "AWS": "AWS",
     "AWS Bedrock Claude Compat": "AWS Bedrock Claude 兼容模板",
     "AWS Bedrock Claude Compat": "AWS Bedrock Claude 兼容模板",
     "AWS Key Format": "AWS 密钥格式",
     "AWS Key Format": "AWS 密钥格式",
@@ -710,6 +721,7 @@
     "Cohere": "Cohere",
     "Cohere": "Cohere",
     "Collapse": "收起",
     "Collapse": "收起",
     "Collapse All": "全部收起",
     "Collapse All": "全部收起",
+    "Collect relay latency and success-rate metrics for the model square.": "收集 Relay 延迟和成功率指标,用于模型广场展示。",
     "Color": "颜色",
     "Color": "颜色",
     "Color is required": "颜色为必填项",
     "Color is required": "颜色为必填项",
     "Color preset": "颜色预设",
     "Color preset": "颜色预设",
@@ -1294,6 +1306,7 @@
     "Enable io.net deployments": "启用 io.net 部署",
     "Enable io.net deployments": "启用 io.net 部署",
     "Enable io.net model deployment service in console": "在控制台启用 io.net 模型部署服务",
     "Enable io.net model deployment service in console": "在控制台启用 io.net 模型部署服务",
     "Enable LinuxDO OAuth": "启用 LinuxDO OAuth",
     "Enable LinuxDO OAuth": "启用 LinuxDO OAuth",
+    "Enable model performance metrics": "启用模型性能指标",
     "Enable OIDC": "启用 OIDC",
     "Enable OIDC": "启用 OIDC",
     "Enable or disable this channel": "启用或禁用此渠道",
     "Enable or disable this channel": "启用或禁用此渠道",
     "Enable or disable this model": "启用或禁用此模型",
     "Enable or disable this model": "启用或禁用此模型",
@@ -1659,6 +1672,7 @@
     "Fixed price (USD)": "固定价格 (USD)",
     "Fixed price (USD)": "固定价格 (USD)",
     "Floating": "浮动",
     "Floating": "浮动",
     "FluentRead extension not detected. Please ensure it is installed and active.": "未检测到 FluentRead 扩展。请确保已安装并激活。",
     "FluentRead extension not detected. Please ensure it is installed and active.": "未检测到 FluentRead 扩展。请确保已安装并激活。",
+    "Flush interval (minutes)": "刷库间隔(分钟)",
     "Follow the guided steps to prepare your workspace before the first login.": "请按照引导步骤在首次登录前准备您的工作区。",
     "Follow the guided steps to prepare your workspace before the first login.": "请按照引导步骤在首次登录前准备您的工作区。",
     "Footer": "页脚",
     "Footer": "页脚",
     "Footer text displayed at the bottom of pages": "显示在页面底部的页脚文本",
     "Footer text displayed at the bottom of pages": "显示在页面底部的页脚文本",
@@ -2221,6 +2235,7 @@
     "Model name is required": "模型名称为必填项",
     "Model name is required": "模型名称为必填项",
     "Model names copied to clipboard": "模型名称已复制到剪贴板",
     "Model names copied to clipboard": "模型名称已复制到剪贴板",
     "Model not found": "模型未找到",
     "Model not found": "模型未找到",
+    "Model performance metrics": "模型性能指标",
     "Model Price": "模型价格",
     "Model Price": "模型价格",
     "Model Price Not Configured": "模型价格未配置",
     "Model Price Not Configured": "模型价格未配置",
     "Model Pricing": "模型定价",
     "Model Pricing": "模型定价",
@@ -2396,6 +2411,7 @@
     "No groups match your search": "没有组匹配您的搜索",
     "No groups match your search": "没有组匹配您的搜索",
     "No header overrides configured.": "未配置标头覆盖。",
     "No header overrides configured.": "未配置标头覆盖。",
     "No history data available": "暂无历史数据",
     "No history data available": "暂无历史数据",
+    "No incidents in the last 24 hours": "最近 24 小时无异常",
     "No incidents in the last 30 days": "最近 30 天无事件",
     "No incidents in the last 30 days": "最近 30 天无事件",
     "No Inviter": "无邀请人",
     "No Inviter": "无邀请人",
     "No keys found": "未找到密钥",
     "No keys found": "未找到密钥",
@@ -3106,6 +3122,8 @@
     "Request Model:": "请求模型:",
     "Request Model:": "请求模型:",
     "Request overrides, routing behavior, and upstream model automation": "请求覆盖、路由行为和上游模型自动化",
     "Request overrides, routing behavior, and upstream model automation": "请求覆盖、路由行为和上游模型自动化",
     "Request rule pricing": "请求规则计费",
     "Request rule pricing": "请求规则计费",
+    "Request success rate sampled over the last 24 hours": "最近 24 小时按时间桶采样的请求成功率",
+    "Request success rate; {{incidents}} incident buckets in the last 24 hours": "请求成功率;最近 24 小时 {{incidents}} 个异常桶",
     "Request timed out, please refresh and restart GitHub login": "请求超时,请刷新页面后重新发起 GitHub 登录",
     "Request timed out, please refresh and restart GitHub login": "请求超时,请刷新页面后重新发起 GitHub 登录",
     "Request-based": "含请求条件",
     "Request-based": "含请求条件",
     "Requests (24h)": "请求数(24 小时)",
     "Requests (24h)": "请求数(24 小时)",
@@ -3154,6 +3172,7 @@
     "Restrict user model request frequency (may impact high concurrency performance)": "限制用户模型请求频率(可能会影响高并发性能)",
     "Restrict user model request frequency (may impact high concurrency performance)": "限制用户模型请求频率(可能会影响高并发性能)",
     "Retain last N days": "保留最近N天",
     "Retain last N days": "保留最近N天",
     "Retain last N files": "保留最近 N 个文件",
     "Retain last N files": "保留最近 N 个文件",
+    "Retention days": "保留天数",
     "Retry": "重试",
     "Retry": "重试",
     "Retry Chain": "重试链路",
     "Retry Chain": "重试链路",
     "Retry Suggestion": "重试建议",
     "Retry Suggestion": "重试建议",
@@ -3527,6 +3546,7 @@
     "Subscription Plans": "订阅套餐",
     "Subscription Plans": "订阅套餐",
     "Subtract": "减少",
     "Subtract": "减少",
     "Success": "成功",
     "Success": "成功",
+    "Success rate": "成功率",
     "Successfully created {{count}} API Key(s)": "成功创建了 {{count}} 个 API 密钥",
     "Successfully created {{count}} API Key(s)": "成功创建了 {{count}} 个 API 密钥",
     "Successfully created {{count}} redemption codes": "成功创建了 {{count}} 个兑换码",
     "Successfully created {{count}} redemption codes": "成功创建了 {{count}} 个兑换码",
     "Successfully deleted {{count}} API key(s)": "成功删除了 {{count}} 个 API 密钥",
     "Successfully deleted {{count}} API key(s)": "成功删除了 {{count}} 个 API 密钥",