types.go 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. package perfmetrics
  2. import "sync/atomic"
  3. type Store interface {
  4. Record(sample Sample)
  5. Query(params QueryParams) (QueryResult, error)
  6. }
  7. type Sample struct {
  8. Model string
  9. Group string
  10. LatencyMs int64
  11. TtftMs int64
  12. HasTtft bool
  13. Success bool
  14. OutputTokens int64
  15. GenerationMs int64
  16. }
  17. type QueryParams struct {
  18. Model string
  19. Group string
  20. Hours int
  21. }
  22. type BucketPoint struct {
  23. Ts int64 `json:"ts"`
  24. AvgTtftMs int64 `json:"avg_ttft_ms"`
  25. AvgLatencyMs int64 `json:"avg_latency_ms"`
  26. SuccessRate float64 `json:"success_rate"`
  27. AvgTps float64 `json:"avg_tps"`
  28. }
  29. type GroupResult struct {
  30. Group string `json:"group"`
  31. AvgTtftMs int64 `json:"avg_ttft_ms"`
  32. AvgLatencyMs int64 `json:"avg_latency_ms"`
  33. SuccessRate float64 `json:"success_rate"`
  34. AvgTps float64 `json:"avg_tps"`
  35. Series []BucketPoint `json:"series"`
  36. }
  37. type QueryResult struct {
  38. ModelName string `json:"model_name"`
  39. SeriesSchema string `json:"series_schema"`
  40. Groups []GroupResult `json:"groups"`
  41. }
  42. type bucketKey struct {
  43. model string
  44. group string
  45. bucketTs int64
  46. }
  47. type counters struct {
  48. requestCount int64
  49. successCount int64
  50. totalLatencyMs int64
  51. ttftSumMs int64
  52. ttftCount int64
  53. outputTokens int64
  54. generationMs int64
  55. }
  56. type atomicBucket struct {
  57. requestCount atomic.Int64
  58. successCount atomic.Int64
  59. totalLatencyMs atomic.Int64
  60. ttftSumMs atomic.Int64
  61. ttftCount atomic.Int64
  62. outputTokens atomic.Int64
  63. generationMs atomic.Int64
  64. }
  65. func (b *atomicBucket) add(sample Sample) {
  66. b.requestCount.Add(1)
  67. if sample.Success {
  68. b.successCount.Add(1)
  69. }
  70. if sample.LatencyMs > 0 {
  71. b.totalLatencyMs.Add(sample.LatencyMs)
  72. }
  73. if sample.HasTtft && sample.TtftMs >= 0 {
  74. b.ttftSumMs.Add(sample.TtftMs)
  75. b.ttftCount.Add(1)
  76. }
  77. if sample.OutputTokens > 0 && sample.GenerationMs > 0 {
  78. b.outputTokens.Add(sample.OutputTokens)
  79. b.generationMs.Add(sample.GenerationMs)
  80. }
  81. }
  82. func (b *atomicBucket) snapshot() counters {
  83. return counters{
  84. requestCount: b.requestCount.Load(),
  85. successCount: b.successCount.Load(),
  86. totalLatencyMs: b.totalLatencyMs.Load(),
  87. ttftSumMs: b.ttftSumMs.Load(),
  88. ttftCount: b.ttftCount.Load(),
  89. outputTokens: b.outputTokens.Load(),
  90. generationMs: b.generationMs.Load(),
  91. }
  92. }
  93. func (b *atomicBucket) drain() counters {
  94. return counters{
  95. requestCount: b.requestCount.Swap(0),
  96. successCount: b.successCount.Swap(0),
  97. totalLatencyMs: b.totalLatencyMs.Swap(0),
  98. ttftSumMs: b.ttftSumMs.Swap(0),
  99. ttftCount: b.ttftCount.Swap(0),
  100. outputTokens: b.outputTokens.Swap(0),
  101. generationMs: b.generationMs.Swap(0),
  102. }
  103. }
  104. func (b *atomicBucket) addCounters(c counters) {
  105. if c.requestCount != 0 {
  106. b.requestCount.Add(c.requestCount)
  107. }
  108. if c.successCount != 0 {
  109. b.successCount.Add(c.successCount)
  110. }
  111. if c.totalLatencyMs != 0 {
  112. b.totalLatencyMs.Add(c.totalLatencyMs)
  113. }
  114. if c.ttftSumMs != 0 {
  115. b.ttftSumMs.Add(c.ttftSumMs)
  116. }
  117. if c.ttftCount != 0 {
  118. b.ttftCount.Add(c.ttftCount)
  119. }
  120. if c.outputTokens != 0 {
  121. b.outputTokens.Add(c.outputTokens)
  122. }
  123. if c.generationMs != 0 {
  124. b.generationMs.Add(c.generationMs)
  125. }
  126. }