package model import ( "time" "gorm.io/gorm" "gorm.io/gorm/clause" ) // PerfMetric stores aggregated relay performance metrics for the model square. type PerfMetric struct { Id int `json:"id" gorm:"primaryKey"` ModelName string `json:"model_name" gorm:"size:128;uniqueIndex:idx_perf_model_group_bucket,priority:1"` Group string `json:"group" gorm:"column:group;size:64;uniqueIndex:idx_perf_model_group_bucket,priority:2"` BucketTs int64 `json:"bucket_ts" gorm:"uniqueIndex:idx_perf_model_group_bucket,priority:3;index:idx_perf_bucket_ts"` RequestCount int64 `json:"-" gorm:"default:0"` SuccessCount int64 `json:"-" gorm:"default:0"` TotalLatencyMs int64 `json:"-" gorm:"default:0"` TtftSumMs int64 `json:"-" gorm:"default:0"` TtftCount int64 `json:"-" gorm:"default:0"` OutputTokens int64 `json:"-" gorm:"default:0"` GenerationMs int64 `json:"-" gorm:"default:0"` } func (PerfMetric) TableName() string { return "perf_metrics" } func UpsertPerfMetric(metric *PerfMetric) error { if metric == nil || metric.RequestCount == 0 { return nil } return DB.Clauses(clause.OnConflict{ Columns: []clause.Column{ {Name: "model_name"}, {Name: "group"}, {Name: "bucket_ts"}, }, DoUpdates: clause.Assignments(map[string]interface{}{ "request_count": gorm.Expr("request_count + ?", metric.RequestCount), "success_count": gorm.Expr("success_count + ?", metric.SuccessCount), "total_latency_ms": gorm.Expr("total_latency_ms + ?", metric.TotalLatencyMs), "ttft_sum_ms": gorm.Expr("ttft_sum_ms + ?", metric.TtftSumMs), "ttft_count": gorm.Expr("ttft_count + ?", metric.TtftCount), "output_tokens": gorm.Expr("output_tokens + ?", metric.OutputTokens), "generation_ms": gorm.Expr("generation_ms + ?", metric.GenerationMs), }), }).Create(metric).Error } func GetPerfMetrics(modelName string, group string, startTs int64, endTs int64) ([]PerfMetric, error) { var metrics []PerfMetric query := DB.Model(&PerfMetric{}). Where("model_name = ? AND bucket_ts >= ? AND bucket_ts <= ?", modelName, startTs, endTs) if group != "" { query = query.Where(commonGroupCol+" = ?", group) } err := query.Order("bucket_ts ASC").Find(&metrics).Error return metrics, err } type PerfMetricSummary struct { ModelName string `json:"model_name"` RequestCount int64 `json:"request_count"` SuccessCount int64 `json:"success_count"` TotalLatencyMs int64 `json:"total_latency_ms"` OutputTokens int64 `json:"output_tokens"` GenerationMs int64 `json:"generation_ms"` } func GetPerfMetricsSummaryAll(startTs int64, endTs int64) ([]PerfMetricSummary, error) { var summaries []PerfMetricSummary err := DB.Model(&PerfMetric{}). Select("model_name, SUM(request_count) as request_count, SUM(success_count) as success_count, SUM(total_latency_ms) as total_latency_ms, SUM(output_tokens) as output_tokens, SUM(generation_ms) as generation_ms"). Where("bucket_ts >= ? AND bucket_ts <= ?", startTs, endTs). Group("model_name"). Having("SUM(request_count) > 0"). Find(&summaries).Error return summaries, err } func DeletePerfMetricsBefore(cutoffTs int64) error { if cutoffTs <= 0 { return nil } return DB.Where("bucket_ts < ?", cutoffTs).Delete(&PerfMetric{}).Error } func PerfMetricStartTime(hours int) int64 { if hours <= 0 { hours = 24 } return time.Now().Add(-time.Duration(hours) * time.Hour).Unix() }