perf_metric.go 3.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. package model
  2. import (
  3. "time"
  4. "gorm.io/gorm"
  5. "gorm.io/gorm/clause"
  6. )
  7. // PerfMetric stores aggregated relay performance metrics for the model square.
  8. type PerfMetric struct {
  9. Id int `json:"id" gorm:"primaryKey"`
  10. ModelName string `json:"model_name" gorm:"size:128;uniqueIndex:idx_perf_model_group_bucket,priority:1"`
  11. Group string `json:"group" gorm:"column:group;size:64;uniqueIndex:idx_perf_model_group_bucket,priority:2"`
  12. BucketTs int64 `json:"bucket_ts" gorm:"uniqueIndex:idx_perf_model_group_bucket,priority:3;index:idx_perf_bucket_ts"`
  13. RequestCount int64 `json:"-" gorm:"default:0"`
  14. SuccessCount int64 `json:"-" gorm:"default:0"`
  15. TotalLatencyMs int64 `json:"-" gorm:"default:0"`
  16. TtftSumMs int64 `json:"-" gorm:"default:0"`
  17. TtftCount int64 `json:"-" gorm:"default:0"`
  18. OutputTokens int64 `json:"-" gorm:"default:0"`
  19. GenerationMs int64 `json:"-" gorm:"default:0"`
  20. }
  21. func (PerfMetric) TableName() string {
  22. return "perf_metrics"
  23. }
  24. func UpsertPerfMetric(metric *PerfMetric) error {
  25. if metric == nil || metric.RequestCount == 0 {
  26. return nil
  27. }
  28. return DB.Clauses(clause.OnConflict{
  29. Columns: []clause.Column{
  30. {Name: "model_name"},
  31. {Name: "group"},
  32. {Name: "bucket_ts"},
  33. },
  34. DoUpdates: clause.Assignments(map[string]interface{}{
  35. "request_count": gorm.Expr("request_count + ?", metric.RequestCount),
  36. "success_count": gorm.Expr("success_count + ?", metric.SuccessCount),
  37. "total_latency_ms": gorm.Expr("total_latency_ms + ?", metric.TotalLatencyMs),
  38. "ttft_sum_ms": gorm.Expr("ttft_sum_ms + ?", metric.TtftSumMs),
  39. "ttft_count": gorm.Expr("ttft_count + ?", metric.TtftCount),
  40. "output_tokens": gorm.Expr("output_tokens + ?", metric.OutputTokens),
  41. "generation_ms": gorm.Expr("generation_ms + ?", metric.GenerationMs),
  42. }),
  43. }).Create(metric).Error
  44. }
  45. func GetPerfMetrics(modelName string, group string, startTs int64, endTs int64) ([]PerfMetric, error) {
  46. var metrics []PerfMetric
  47. query := DB.Model(&PerfMetric{}).
  48. Where("model_name = ? AND bucket_ts >= ? AND bucket_ts <= ?", modelName, startTs, endTs)
  49. if group != "" {
  50. query = query.Where(commonGroupCol+" = ?", group)
  51. }
  52. err := query.Order("bucket_ts ASC").Find(&metrics).Error
  53. return metrics, err
  54. }
  55. type PerfMetricSummary struct {
  56. ModelName string `json:"model_name"`
  57. RequestCount int64 `json:"request_count"`
  58. SuccessCount int64 `json:"success_count"`
  59. TotalLatencyMs int64 `json:"total_latency_ms"`
  60. OutputTokens int64 `json:"output_tokens"`
  61. GenerationMs int64 `json:"generation_ms"`
  62. }
  63. func GetPerfMetricsSummaryAll(startTs int64, endTs int64) ([]PerfMetricSummary, error) {
  64. var summaries []PerfMetricSummary
  65. err := DB.Model(&PerfMetric{}).
  66. Select("model_name, SUM(request_count) as request_count, SUM(success_count) as success_count, SUM(total_latency_ms) as total_latency_ms, SUM(output_tokens) as output_tokens, SUM(generation_ms) as generation_ms").
  67. Where("bucket_ts >= ? AND bucket_ts <= ?", startTs, endTs).
  68. Group("model_name").
  69. Having("SUM(request_count) > 0").
  70. Find(&summaries).Error
  71. return summaries, err
  72. }
  73. func DeletePerfMetricsBefore(cutoffTs int64) error {
  74. if cutoffTs <= 0 {
  75. return nil
  76. }
  77. return DB.Where("bucket_ts < ?", cutoffTs).Delete(&PerfMetric{}).Error
  78. }
  79. func PerfMetricStartTime(hours int) int64 {
  80. if hours <= 0 {
  81. hours = 24
  82. }
  83. return time.Now().Add(-time.Duration(hours) * time.Hour).Unix()
  84. }