types.go 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. package perfmetrics
  2. import "sync/atomic"
  3. type Store interface {
  4. Record(sample Sample)
  5. Query(params QueryParams) (QueryResult, error)
  6. }
  7. type Sample struct {
  8. Model string
  9. Group string
  10. LatencyMs int64
  11. TtftMs int64
  12. HasTtft bool
  13. Success bool
  14. OutputTokens int64
  15. GenerationMs int64
  16. }
  17. type QueryParams struct {
  18. Model string
  19. Group string
  20. Hours int
  21. }
  22. type BucketPoint struct {
  23. Ts int64 `json:"ts"`
  24. AvgTtftMs int64 `json:"avg_ttft_ms"`
  25. AvgLatencyMs int64 `json:"avg_latency_ms"`
  26. SuccessRate float64 `json:"success_rate"`
  27. AvgTps float64 `json:"avg_tps"`
  28. }
  29. type GroupResult struct {
  30. Group string `json:"group"`
  31. AvgTtftMs int64 `json:"avg_ttft_ms"`
  32. AvgLatencyMs int64 `json:"avg_latency_ms"`
  33. SuccessRate float64 `json:"success_rate"`
  34. AvgTps float64 `json:"avg_tps"`
  35. Series []BucketPoint `json:"series"`
  36. }
  37. type QueryResult struct {
  38. ModelName string `json:"model_name"`
  39. SeriesSchema string `json:"series_schema"`
  40. Groups []GroupResult `json:"groups"`
  41. }
  42. type ModelSummary struct {
  43. ModelName string `json:"model_name"`
  44. AvgLatencyMs int64 `json:"avg_latency_ms"`
  45. SuccessRate float64 `json:"success_rate"`
  46. AvgTps float64 `json:"avg_tps"`
  47. RequestCount int64 `json:"request_count"`
  48. }
  49. type SummaryAllResult struct {
  50. Models []ModelSummary `json:"models"`
  51. }
  52. type bucketKey struct {
  53. model string
  54. group string
  55. bucketTs int64
  56. }
  57. type counters struct {
  58. requestCount int64
  59. successCount int64
  60. totalLatencyMs int64
  61. ttftSumMs int64
  62. ttftCount int64
  63. outputTokens int64
  64. generationMs int64
  65. }
  66. type atomicBucket struct {
  67. requestCount atomic.Int64
  68. successCount atomic.Int64
  69. totalLatencyMs atomic.Int64
  70. ttftSumMs atomic.Int64
  71. ttftCount atomic.Int64
  72. outputTokens atomic.Int64
  73. generationMs atomic.Int64
  74. }
  75. func (b *atomicBucket) add(sample Sample) {
  76. b.requestCount.Add(1)
  77. if sample.Success {
  78. b.successCount.Add(1)
  79. }
  80. if sample.LatencyMs > 0 {
  81. b.totalLatencyMs.Add(sample.LatencyMs)
  82. }
  83. if sample.HasTtft && sample.TtftMs >= 0 {
  84. b.ttftSumMs.Add(sample.TtftMs)
  85. b.ttftCount.Add(1)
  86. }
  87. if sample.OutputTokens > 0 && sample.GenerationMs > 0 {
  88. b.outputTokens.Add(sample.OutputTokens)
  89. b.generationMs.Add(sample.GenerationMs)
  90. }
  91. }
  92. func (b *atomicBucket) snapshot() counters {
  93. return counters{
  94. requestCount: b.requestCount.Load(),
  95. successCount: b.successCount.Load(),
  96. totalLatencyMs: b.totalLatencyMs.Load(),
  97. ttftSumMs: b.ttftSumMs.Load(),
  98. ttftCount: b.ttftCount.Load(),
  99. outputTokens: b.outputTokens.Load(),
  100. generationMs: b.generationMs.Load(),
  101. }
  102. }
  103. func (b *atomicBucket) drain() counters {
  104. return counters{
  105. requestCount: b.requestCount.Swap(0),
  106. successCount: b.successCount.Swap(0),
  107. totalLatencyMs: b.totalLatencyMs.Swap(0),
  108. ttftSumMs: b.ttftSumMs.Swap(0),
  109. ttftCount: b.ttftCount.Swap(0),
  110. outputTokens: b.outputTokens.Swap(0),
  111. generationMs: b.generationMs.Swap(0),
  112. }
  113. }
  114. func (b *atomicBucket) addCounters(c counters) {
  115. if c.requestCount != 0 {
  116. b.requestCount.Add(c.requestCount)
  117. }
  118. if c.successCount != 0 {
  119. b.successCount.Add(c.successCount)
  120. }
  121. if c.totalLatencyMs != 0 {
  122. b.totalLatencyMs.Add(c.totalLatencyMs)
  123. }
  124. if c.ttftSumMs != 0 {
  125. b.ttftSumMs.Add(c.ttftSumMs)
  126. }
  127. if c.ttftCount != 0 {
  128. b.ttftCount.Add(c.ttftCount)
  129. }
  130. if c.outputTokens != 0 {
  131. b.outputTokens.Add(c.outputTokens)
  132. }
  133. if c.generationMs != 0 {
  134. b.generationMs.Add(c.generationMs)
  135. }
  136. }