|
|
@@ -1,8 +1,8 @@
|
|
|
import { useMemo } from 'react'
|
|
|
+import { useQuery } from '@tanstack/react-query'
|
|
|
import {
|
|
|
Activity,
|
|
|
AlertTriangle,
|
|
|
- Gauge,
|
|
|
HeartPulse,
|
|
|
Timer,
|
|
|
TrendingUp,
|
|
|
@@ -18,22 +18,14 @@ import {
|
|
|
TableRow,
|
|
|
} from '@/components/ui/table'
|
|
|
import { GroupBadge } from '@/components/group-badge'
|
|
|
+import { getPerfMetrics, type PerformanceGroup } from '../api'
|
|
|
import {
|
|
|
- aggregateUptime,
|
|
|
- buildGroupPerformance,
|
|
|
- buildLatencyTimeSeries,
|
|
|
- buildUptimeSeries,
|
|
|
formatLatency,
|
|
|
- formatThroughput,
|
|
|
formatUptimePct,
|
|
|
type UptimeDayPoint,
|
|
|
} from '../lib/mock-stats'
|
|
|
import type { PricingModel } from '../types'
|
|
|
-import {
|
|
|
- LatencyTrendChart,
|
|
|
- ThroughputBarChart,
|
|
|
- UptimeBarChart,
|
|
|
-} from './model-details-charts'
|
|
|
+import { LatencyTrendChart, UptimeBarChart } from './model-details-charts'
|
|
|
import { UptimeSparkline } from './model-details-uptime-sparkline'
|
|
|
|
|
|
const COMPACT_NUMBER = new Intl.NumberFormat(undefined, {
|
|
|
@@ -74,33 +66,102 @@ function StatCard(props: {
|
|
|
)
|
|
|
}
|
|
|
|
|
|
+type PerformanceRow = {
|
|
|
+ group: string
|
|
|
+ avg_ttft_ms: number
|
|
|
+ avg_latency_ms: number
|
|
|
+ success_rate: number
|
|
|
+ request_count: number
|
|
|
+}
|
|
|
+
|
|
|
+function toLatencySeries(groups: PerformanceGroup[]) {
|
|
|
+ return groups.flatMap((group) =>
|
|
|
+ group.series
|
|
|
+ .filter((point) => point.ttft_count > 0 && point.avg_ttft_ms > 0)
|
|
|
+ .map((point) => ({
|
|
|
+ timestamp: new Date(point.ts * 1000).toISOString(),
|
|
|
+ group: group.group,
|
|
|
+ ttft_ms: point.avg_ttft_ms,
|
|
|
+ }))
|
|
|
+ )
|
|
|
+}
|
|
|
+
|
|
|
+function toUptimeSeries(groups: PerformanceGroup[]): UptimeDayPoint[] {
|
|
|
+ const byTs = new Map<number, { count: number; success: number }>()
|
|
|
+ for (const group of groups) {
|
|
|
+ for (const point of group.series) {
|
|
|
+ const current = byTs.get(point.ts) ?? { count: 0, success: 0 }
|
|
|
+ current.count += point.count
|
|
|
+ current.success += point.success_count
|
|
|
+ byTs.set(point.ts, current)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return Array.from(byTs.entries())
|
|
|
+ .sort(([a], [b]) => a - b)
|
|
|
+ .map(([ts, value]) => {
|
|
|
+ const uptime = value.count > 0 ? (value.success / value.count) * 100 : 0
|
|
|
+ return {
|
|
|
+ date: new Date(ts * 1000).toISOString(),
|
|
|
+ uptime_pct: Math.round(uptime * 100) / 100,
|
|
|
+ incidents: value.success < value.count ? 1 : 0,
|
|
|
+ outage_minutes: 0,
|
|
|
+ }
|
|
|
+ })
|
|
|
+}
|
|
|
+
|
|
|
+function toGroupUptimeSeries(group: PerformanceGroup): UptimeDayPoint[] {
|
|
|
+ return group.series.map((point) => ({
|
|
|
+ date: new Date(point.ts * 1000).toISOString(),
|
|
|
+ uptime_pct: Math.round(point.success_rate * 100) / 100,
|
|
|
+ incidents: point.success_count < point.count ? 1 : 0,
|
|
|
+ outage_minutes: 0,
|
|
|
+ }))
|
|
|
+}
|
|
|
+
|
|
|
+function weightedAverage(
|
|
|
+ rows: PerformanceRow[],
|
|
|
+ field: 'avg_ttft_ms' | 'avg_latency_ms'
|
|
|
+): number {
|
|
|
+ let total = 0
|
|
|
+ let count = 0
|
|
|
+ for (const row of rows) {
|
|
|
+ if (row[field] <= 0 || row.request_count <= 0) continue
|
|
|
+ total += row[field] * row.request_count
|
|
|
+ count += row.request_count
|
|
|
+ }
|
|
|
+ return count > 0 ? Math.round(total / count) : 0
|
|
|
+}
|
|
|
+
|
|
|
export function ModelDetailsPerformance(props: { model: PricingModel }) {
|
|
|
const { t } = useTranslation()
|
|
|
- const performances = useMemo(
|
|
|
- () => buildGroupPerformance(props.model),
|
|
|
- [props.model]
|
|
|
- )
|
|
|
- const latencySeries = useMemo(
|
|
|
- () => buildLatencyTimeSeries(props.model),
|
|
|
- [props.model]
|
|
|
- )
|
|
|
- const uptimeSeries = useMemo(
|
|
|
- () => buildUptimeSeries(props.model),
|
|
|
- [props.model]
|
|
|
- )
|
|
|
- const aggregated = useMemo(
|
|
|
- () => aggregateUptime(uptimeSeries),
|
|
|
- [uptimeSeries]
|
|
|
+ const metricsQuery = useQuery({
|
|
|
+ queryKey: ['perf-metrics', props.model.model_name],
|
|
|
+ queryFn: () => getPerfMetrics(props.model.model_name, 24),
|
|
|
+ staleTime: 60 * 1000,
|
|
|
+ })
|
|
|
+ const groups = metricsQuery.data?.data.groups ?? []
|
|
|
+ const performances = useMemo<PerformanceRow[]>(
|
|
|
+ () =>
|
|
|
+ groups.map((group) => ({
|
|
|
+ group: group.group,
|
|
|
+ avg_ttft_ms: group.avg_ttft_ms,
|
|
|
+ avg_latency_ms: group.avg_latency_ms,
|
|
|
+ success_rate: group.success_rate,
|
|
|
+ request_count: group.request_count,
|
|
|
+ })),
|
|
|
+ [groups]
|
|
|
)
|
|
|
+ const latencySeries = useMemo(() => toLatencySeries(groups), [groups])
|
|
|
+ const uptimeSeries = useMemo(() => toUptimeSeries(groups), [groups])
|
|
|
const uptimeByGroup = useMemo<Record<string, UptimeDayPoint[]>>(() => {
|
|
|
const map: Record<string, UptimeDayPoint[]> = {}
|
|
|
- for (const perf of performances) {
|
|
|
- map[perf.group] = buildUptimeSeries(props.model, perf.group)
|
|
|
+ for (const group of groups) {
|
|
|
+ map[group.group] = toGroupUptimeSeries(group)
|
|
|
}
|
|
|
return map
|
|
|
- }, [performances, props.model])
|
|
|
+ }, [groups])
|
|
|
|
|
|
- if (performances.length === 0) {
|
|
|
+ if (metricsQuery.isLoading || performances.length === 0) {
|
|
|
return (
|
|
|
<div className='text-muted-foreground rounded-lg border p-6 text-center text-sm'>
|
|
|
{t('Performance data is not yet available for this model.')}
|
|
|
@@ -108,18 +169,22 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
|
|
|
)
|
|
|
}
|
|
|
|
|
|
- const bestTtft = Math.min(...performances.map((p) => p.ttft_p50_ms))
|
|
|
- const bestThroughput = Math.max(...performances.map((p) => p.throughput_tps))
|
|
|
- const totalRequests = performances.reduce(
|
|
|
- (s, p) => s + p.request_volume_24h,
|
|
|
- 0
|
|
|
- )
|
|
|
- const intent =
|
|
|
- aggregated.uptime_pct >= 99.9
|
|
|
- ? 'success'
|
|
|
- : aggregated.uptime_pct >= 99
|
|
|
- ? 'default'
|
|
|
- : 'warning'
|
|
|
+ const ttftValues = performances
|
|
|
+ .map((p) => p.avg_ttft_ms)
|
|
|
+ .filter((value) => value > 0)
|
|
|
+ const bestTtft = ttftValues.length > 0 ? Math.min(...ttftValues) : 0
|
|
|
+ const avgLatency = weightedAverage(performances, 'avg_latency_ms')
|
|
|
+ const totalRequests = performances.reduce((s, p) => s + p.request_count, 0)
|
|
|
+ const totalSuccess = groups.reduce((s, p) => s + p.success_count, 0)
|
|
|
+ const successRate =
|
|
|
+ totalRequests > 0 ? (totalSuccess / totalRequests) * 100 : 0
|
|
|
+ const incidentCount = uptimeSeries.reduce((s, p) => s + p.incidents, 0)
|
|
|
+ let intent: 'default' | 'warning' | 'success' = 'warning'
|
|
|
+ if (successRate >= 99.9) {
|
|
|
+ intent = 'success'
|
|
|
+ } else if (successRate >= 99) {
|
|
|
+ intent = 'default'
|
|
|
+ }
|
|
|
|
|
|
const headerCellClass =
|
|
|
'text-muted-foreground py-2 text-[10px] font-medium tracking-wider uppercase'
|
|
|
@@ -134,21 +199,21 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
|
|
|
hint={t('Lowest median first-token latency')}
|
|
|
/>
|
|
|
<StatCard
|
|
|
- icon={Gauge}
|
|
|
- label={t('Peak throughput')}
|
|
|
- value={formatThroughput(bestThroughput)}
|
|
|
+ icon={Timer}
|
|
|
+ label={t('Average latency')}
|
|
|
+ value={formatLatency(avgLatency)}
|
|
|
hint={t('Across all groups')}
|
|
|
/>
|
|
|
<StatCard
|
|
|
icon={HeartPulse}
|
|
|
- label={t('Uptime (30d)')}
|
|
|
- value={formatUptimePct(aggregated.uptime_pct)}
|
|
|
+ label={t('Success rate')}
|
|
|
+ value={formatUptimePct(successRate)}
|
|
|
hint={
|
|
|
- aggregated.incidents > 0
|
|
|
- ? t('{{count}} incidents in the last 30 days', {
|
|
|
- count: aggregated.incidents,
|
|
|
+ incidentCount > 0
|
|
|
+ ? t('{{count}} incidents in the last 24 hours', {
|
|
|
+ count: incidentCount,
|
|
|
})
|
|
|
- : t('No incidents in the last 30 days')
|
|
|
+ : t('No incidents in the last 24 hours')
|
|
|
}
|
|
|
intent={intent}
|
|
|
/>
|
|
|
@@ -164,9 +229,7 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
|
|
|
<SectionHeader
|
|
|
icon={Activity}
|
|
|
title={t('Per-group performance')}
|
|
|
- description={t(
|
|
|
- 'TTFT percentiles, throughput, and 30-day uptime by group'
|
|
|
- )}
|
|
|
+ description={t('Average latency, TTFT, and success rate by group')}
|
|
|
/>
|
|
|
<div className='overflow-x-auto rounded-lg border'>
|
|
|
<Table className='text-sm'>
|
|
|
@@ -174,31 +237,24 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
|
|
|
<TableRow className='hover:bg-transparent'>
|
|
|
<TableHead className={headerCellClass}>{t('Group')}</TableHead>
|
|
|
<TableHead className={`${headerCellClass} text-right`}>
|
|
|
- {t('TTFT P50')}
|
|
|
- </TableHead>
|
|
|
- <TableHead className={`${headerCellClass} text-right`}>
|
|
|
- {t('TTFT P95')}
|
|
|
- </TableHead>
|
|
|
- <TableHead className={`${headerCellClass} text-right`}>
|
|
|
- {t('TTFT P99')}
|
|
|
+ {t('Average TTFT')}
|
|
|
</TableHead>
|
|
|
<TableHead className={`${headerCellClass} text-right`}>
|
|
|
- {t('Throughput')}
|
|
|
+ {t('Average latency')}
|
|
|
</TableHead>
|
|
|
<TableHead
|
|
|
className={`${headerCellClass} min-w-[160px] text-left`}
|
|
|
>
|
|
|
- {t('Uptime (30d)')}
|
|
|
+ {t('Success rate')}
|
|
|
</TableHead>
|
|
|
<TableHead className={`${headerCellClass} text-right`}>
|
|
|
- {t('Requests / 24h')}
|
|
|
+ {t('Request Count')}
|
|
|
</TableHead>
|
|
|
</TableRow>
|
|
|
</TableHeader>
|
|
|
<TableBody>
|
|
|
{performances.map((perf) => {
|
|
|
- const isBestTtft = perf.ttft_p50_ms === bestTtft
|
|
|
- const isBestTput = perf.throughput_tps === bestThroughput
|
|
|
+ const isBestTtft = perf.avg_ttft_ms === bestTtft
|
|
|
return (
|
|
|
<TableRow key={perf.group}>
|
|
|
<TableCell className='py-2.5'>
|
|
|
@@ -210,23 +266,10 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
|
|
|
isBestTtft && 'text-emerald-600 dark:text-emerald-400'
|
|
|
)}
|
|
|
>
|
|
|
- {formatLatency(perf.ttft_p50_ms)}
|
|
|
- </TableCell>
|
|
|
- <TableCell className='text-muted-foreground py-2.5 text-right font-mono'>
|
|
|
- {formatLatency(perf.ttft_p95_ms)}
|
|
|
+ {formatLatency(perf.avg_ttft_ms)}
|
|
|
</TableCell>
|
|
|
<TableCell className='text-muted-foreground py-2.5 text-right font-mono'>
|
|
|
- {formatLatency(perf.ttft_p99_ms)}
|
|
|
- </TableCell>
|
|
|
- <TableCell
|
|
|
- className={cn(
|
|
|
- 'py-2.5 text-right font-mono',
|
|
|
- isBestTput &&
|
|
|
- perf.throughput_tps > 0 &&
|
|
|
- 'text-emerald-600 dark:text-emerald-400'
|
|
|
- )}
|
|
|
- >
|
|
|
- {formatThroughput(perf.throughput_tps)}
|
|
|
+ {formatLatency(perf.avg_latency_ms)}
|
|
|
</TableCell>
|
|
|
<TableCell className='py-2.5'>
|
|
|
<UptimeSparkline
|
|
|
@@ -235,7 +278,7 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
|
|
|
/>
|
|
|
</TableCell>
|
|
|
<TableCell className='text-muted-foreground py-2.5 text-right font-mono'>
|
|
|
- {COMPACT_NUMBER.format(perf.request_volume_24h)}
|
|
|
+ {COMPACT_NUMBER.format(perf.request_count)}
|
|
|
</TableCell>
|
|
|
</TableRow>
|
|
|
)
|
|
|
@@ -249,45 +292,31 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
|
|
|
<SectionHeader
|
|
|
icon={Timer}
|
|
|
title={t('Latency trend (last 24h)')}
|
|
|
- description={t(
|
|
|
- 'Median time-to-first-token (TTFT) sampled hourly per group'
|
|
|
- )}
|
|
|
+ description={t('Average time-to-first-token (TTFT) by group')}
|
|
|
/>
|
|
|
<LatencyTrendChart series={latencySeries} />
|
|
|
</section>
|
|
|
|
|
|
- {bestThroughput > 0 && (
|
|
|
- <section>
|
|
|
- <SectionHeader
|
|
|
- icon={Gauge}
|
|
|
- title={t('Throughput by group')}
|
|
|
- description={t('Average tokens per second sustained per group')}
|
|
|
- />
|
|
|
- <ThroughputBarChart rows={performances} />
|
|
|
- </section>
|
|
|
- )}
|
|
|
-
|
|
|
<section>
|
|
|
<SectionHeader
|
|
|
icon={HeartPulse}
|
|
|
- title={t('Uptime (last 30 days)')}
|
|
|
+ title={t('Availability (last 24h)')}
|
|
|
description={
|
|
|
- aggregated.incidents > 0
|
|
|
+ incidentCount > 0
|
|
|
? t(
|
|
|
- 'Daily uptime; {{incidents}} incidents totalling {{minutes}} minutes',
|
|
|
+ 'Request success rate; {{incidents}} incident buckets in the last 24 hours',
|
|
|
{
|
|
|
- incidents: aggregated.incidents,
|
|
|
- minutes: aggregated.outage_minutes,
|
|
|
+ incidents: incidentCount,
|
|
|
}
|
|
|
)
|
|
|
- : t('Daily uptime over the last 30 days')
|
|
|
+ : t('Request success rate sampled over the last 24 hours')
|
|
|
}
|
|
|
accent={
|
|
|
- aggregated.incidents > 0 ? (
|
|
|
+ incidentCount > 0 ? (
|
|
|
<span className='inline-flex items-center gap-1 text-amber-600 dark:text-amber-400'>
|
|
|
<AlertTriangle className='size-3.5' />
|
|
|
{t('{{count}} incidents', {
|
|
|
- count: aggregated.incidents,
|
|
|
+ count: incidentCount,
|
|
|
})}
|
|
|
</span>
|
|
|
) : null
|
|
|
@@ -295,12 +324,6 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
|
|
|
/>
|
|
|
<UptimeBarChart series={uptimeSeries} />
|
|
|
</section>
|
|
|
-
|
|
|
- <p className='text-muted-foreground/60 text-[11px] leading-relaxed'>
|
|
|
- {t(
|
|
|
- 'Performance metrics shown here are simulated for preview purposes and will be replaced with live observability data once the backend integration is complete.'
|
|
|
- )}
|
|
|
- </p>
|
|
|
</div>
|
|
|
)
|
|
|
}
|