| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152 |
- package perfmetrics
- import "sync/atomic"
- type Store interface {
- Record(sample Sample)
- Query(params QueryParams) (QueryResult, error)
- }
- type Sample struct {
- Model string
- Group string
- LatencyMs int64
- TtftMs int64
- HasTtft bool
- Success bool
- OutputTokens int64
- GenerationMs int64
- }
- type QueryParams struct {
- Model string
- Group string
- Hours int
- }
- type BucketPoint struct {
- Ts int64 `json:"ts"`
- AvgTtftMs int64 `json:"avg_ttft_ms"`
- AvgLatencyMs int64 `json:"avg_latency_ms"`
- SuccessRate float64 `json:"success_rate"`
- AvgTps float64 `json:"avg_tps"`
- }
- type GroupResult struct {
- Group string `json:"group"`
- AvgTtftMs int64 `json:"avg_ttft_ms"`
- AvgLatencyMs int64 `json:"avg_latency_ms"`
- SuccessRate float64 `json:"success_rate"`
- AvgTps float64 `json:"avg_tps"`
- Series []BucketPoint `json:"series"`
- }
- type QueryResult struct {
- ModelName string `json:"model_name"`
- SeriesSchema string `json:"series_schema"`
- Groups []GroupResult `json:"groups"`
- }
- type ModelSummary struct {
- ModelName string `json:"model_name"`
- AvgLatencyMs int64 `json:"avg_latency_ms"`
- SuccessRate float64 `json:"success_rate"`
- AvgTps float64 `json:"avg_tps"`
- RequestCount int64 `json:"request_count"`
- }
- type SummaryAllResult struct {
- Models []ModelSummary `json:"models"`
- }
- type bucketKey struct {
- model string
- group string
- bucketTs int64
- }
- type counters struct {
- requestCount int64
- successCount int64
- totalLatencyMs int64
- ttftSumMs int64
- ttftCount int64
- outputTokens int64
- generationMs int64
- }
- type atomicBucket struct {
- requestCount atomic.Int64
- successCount atomic.Int64
- totalLatencyMs atomic.Int64
- ttftSumMs atomic.Int64
- ttftCount atomic.Int64
- outputTokens atomic.Int64
- generationMs atomic.Int64
- }
- func (b *atomicBucket) add(sample Sample) {
- b.requestCount.Add(1)
- if sample.Success {
- b.successCount.Add(1)
- }
- if sample.LatencyMs > 0 {
- b.totalLatencyMs.Add(sample.LatencyMs)
- }
- if sample.HasTtft && sample.TtftMs >= 0 {
- b.ttftSumMs.Add(sample.TtftMs)
- b.ttftCount.Add(1)
- }
- if sample.OutputTokens > 0 && sample.GenerationMs > 0 {
- b.outputTokens.Add(sample.OutputTokens)
- b.generationMs.Add(sample.GenerationMs)
- }
- }
- func (b *atomicBucket) snapshot() counters {
- return counters{
- requestCount: b.requestCount.Load(),
- successCount: b.successCount.Load(),
- totalLatencyMs: b.totalLatencyMs.Load(),
- ttftSumMs: b.ttftSumMs.Load(),
- ttftCount: b.ttftCount.Load(),
- outputTokens: b.outputTokens.Load(),
- generationMs: b.generationMs.Load(),
- }
- }
- func (b *atomicBucket) drain() counters {
- return counters{
- requestCount: b.requestCount.Swap(0),
- successCount: b.successCount.Swap(0),
- totalLatencyMs: b.totalLatencyMs.Swap(0),
- ttftSumMs: b.ttftSumMs.Swap(0),
- ttftCount: b.ttftCount.Swap(0),
- outputTokens: b.outputTokens.Swap(0),
- generationMs: b.generationMs.Swap(0),
- }
- }
- func (b *atomicBucket) addCounters(c counters) {
- if c.requestCount != 0 {
- b.requestCount.Add(c.requestCount)
- }
- if c.successCount != 0 {
- b.successCount.Add(c.successCount)
- }
- if c.totalLatencyMs != 0 {
- b.totalLatencyMs.Add(c.totalLatencyMs)
- }
- if c.ttftSumMs != 0 {
- b.ttftSumMs.Add(c.ttftSumMs)
- }
- if c.ttftCount != 0 {
- b.ttftCount.Add(c.ttftCount)
- }
- if c.outputTokens != 0 {
- b.outputTokens.Add(c.outputTokens)
- }
- if c.generationMs != 0 {
- b.generationMs.Add(c.generationMs)
- }
- }
|