model-details-performance.tsx 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353
  1. import { useMemo } from 'react'
  2. import { useQuery } from '@tanstack/react-query'
  3. import { AlertTriangle, HeartPulse, Timer } from 'lucide-react'
  4. import { useTranslation } from 'react-i18next'
  5. import { cn } from '@/lib/utils'
  6. import {
  7. Table,
  8. TableBody,
  9. TableCell,
  10. TableHead,
  11. TableHeader,
  12. TableRow,
  13. } from '@/components/ui/table'
  14. import { GroupBadge } from '@/components/group-badge'
  15. import { getPerfMetrics, type PerformanceGroup } from '../api'
  16. import {
  17. formatLatency,
  18. formatThroughput,
  19. formatUptimePct,
  20. type UptimeDayPoint,
  21. } from '../lib/mock-stats'
  22. import type { PricingModel } from '../types'
  23. import { LatencyTrendChart, UptimeTrendChart } from './model-details-charts'
  24. import { UptimeSparkline } from './model-details-uptime-sparkline'
  25. function StatCard(props: {
  26. icon: React.ComponentType<{ className?: string }>
  27. label: string
  28. value: React.ReactNode
  29. hint?: string
  30. intent?: 'default' | 'warning' | 'success'
  31. }) {
  32. const Icon = props.icon
  33. const intent = props.intent ?? 'default'
  34. return (
  35. <div className='bg-background flex flex-col gap-1 rounded-lg border p-3'>
  36. <span className='text-muted-foreground inline-flex items-center gap-1.5 text-[10px] font-medium tracking-wider uppercase'>
  37. <Icon className='size-3' />
  38. {props.label}
  39. </span>
  40. <span
  41. className={cn(
  42. 'text-foreground font-mono text-lg font-semibold tabular-nums',
  43. intent === 'warning' && 'text-amber-600 dark:text-amber-400',
  44. intent === 'success' && 'text-emerald-600 dark:text-emerald-400'
  45. )}
  46. >
  47. {props.value}
  48. </span>
  49. {props.hint && (
  50. <span className='text-muted-foreground/70 text-[11px]'>
  51. {props.hint}
  52. </span>
  53. )}
  54. </div>
  55. )
  56. }
  57. type PerformanceRow = {
  58. group: string
  59. avg_ttft_ms: number
  60. avg_latency_ms: number
  61. success_rate: number
  62. avg_tps: number
  63. }
  64. function toLatencySeries(groups: PerformanceGroup[]) {
  65. const byTs = new Map<number, number[]>()
  66. for (const group of groups) {
  67. for (const point of group.series) {
  68. if (point.avg_ttft_ms <= 0) continue
  69. const current = byTs.get(point.ts) ?? []
  70. current.push(point.avg_ttft_ms)
  71. byTs.set(point.ts, current)
  72. }
  73. }
  74. return Array.from(byTs.entries())
  75. .sort(([a], [b]) => a - b)
  76. .map(([ts, values]) => ({
  77. timestamp: new Date(ts * 1000).toISOString(),
  78. group: 'latency',
  79. ttft_ms: Math.round(
  80. values.reduce((sum, value) => sum + value, 0) / values.length
  81. ),
  82. }))
  83. }
  84. function toUptimeSeries(groups: PerformanceGroup[]): UptimeDayPoint[] {
  85. const byTs = new Map<number, { rates: number[]; incidents: number }>()
  86. for (const group of groups) {
  87. for (const point of group.series) {
  88. const current = byTs.get(point.ts) ?? { rates: [], incidents: 0 }
  89. if (Number.isFinite(point.success_rate)) {
  90. current.rates.push(point.success_rate)
  91. if (point.success_rate < 100) current.incidents += 1
  92. }
  93. byTs.set(point.ts, current)
  94. }
  95. }
  96. return Array.from(byTs.entries())
  97. .sort(([a], [b]) => a - b)
  98. .map(([ts, value]) => {
  99. const uptime =
  100. value.rates.length > 0
  101. ? value.rates.reduce((sum, rate) => sum + rate, 0) /
  102. value.rates.length
  103. : 0
  104. return {
  105. date: new Date(ts * 1000).toISOString(),
  106. uptime_pct: Math.round(uptime * 100) / 100,
  107. incidents: value.incidents,
  108. outage_minutes: 0,
  109. }
  110. })
  111. }
  112. function toGroupUptimeSeries(group: PerformanceGroup): UptimeDayPoint[] {
  113. return group.series.map((point) => ({
  114. date: new Date(point.ts * 1000).toISOString(),
  115. uptime_pct: Math.round(point.success_rate * 100) / 100,
  116. incidents: point.success_rate < 100 ? 1 : 0,
  117. outage_minutes: 0,
  118. }))
  119. }
  120. function average(
  121. rows: PerformanceRow[],
  122. field: 'avg_ttft_ms' | 'avg_latency_ms'
  123. ) {
  124. const values = rows.map((row) => row[field]).filter((value) => value > 0)
  125. if (values.length === 0) return 0
  126. return Math.round(
  127. values.reduce((sum, value) => sum + value, 0) / values.length
  128. )
  129. }
  130. export function ModelDetailsPerformance(props: { model: PricingModel }) {
  131. const { t } = useTranslation()
  132. const metricsQuery = useQuery({
  133. queryKey: ['perf-metrics', props.model.model_name],
  134. queryFn: () => getPerfMetrics(props.model.model_name, 24),
  135. staleTime: 60 * 1000,
  136. })
  137. const groups = metricsQuery.data?.data.groups ?? []
  138. const performances = useMemo<PerformanceRow[]>(
  139. () =>
  140. groups.map((group) => ({
  141. group: group.group,
  142. avg_ttft_ms: group.avg_ttft_ms,
  143. avg_latency_ms: group.avg_latency_ms,
  144. success_rate: group.success_rate,
  145. avg_tps: group.avg_tps,
  146. })),
  147. [groups]
  148. )
  149. const latencySeries = useMemo(() => toLatencySeries(groups), [groups])
  150. const uptimeSeries = useMemo(() => toUptimeSeries(groups), [groups])
  151. const uptimeByGroup = useMemo<Record<string, UptimeDayPoint[]>>(() => {
  152. const map: Record<string, UptimeDayPoint[]> = {}
  153. for (const group of groups) {
  154. map[group.group] = toGroupUptimeSeries(group)
  155. }
  156. return map
  157. }, [groups])
  158. if (metricsQuery.isLoading || performances.length === 0) {
  159. return (
  160. <div className='text-muted-foreground rounded-lg border p-6 text-center text-sm'>
  161. {t('Performance data is not yet available for this model.')}
  162. </div>
  163. )
  164. }
  165. const tpsValues = performances
  166. .map((p) => p.avg_tps)
  167. .filter((value) => value > 0)
  168. const avgTps =
  169. tpsValues.length > 0
  170. ? tpsValues.reduce((sum, value) => sum + value, 0) / tpsValues.length
  171. : 0
  172. const avgLatency = average(performances, 'avg_latency_ms')
  173. const successRates = performances
  174. .map((perf) => perf.success_rate)
  175. .filter((value) => Number.isFinite(value))
  176. const successRate =
  177. successRates.length > 0
  178. ? successRates.reduce((sum, value) => sum + value, 0) /
  179. successRates.length
  180. : 0
  181. const incidentCount = uptimeSeries.reduce((s, p) => s + p.incidents, 0)
  182. let intent: 'default' | 'warning' | 'success' = 'warning'
  183. if (successRate >= 99.9) {
  184. intent = 'success'
  185. } else if (successRate >= 99) {
  186. intent = 'default'
  187. }
  188. const headerCellClass =
  189. 'text-muted-foreground py-2 text-[10px] font-medium tracking-wider uppercase'
  190. return (
  191. <div className='flex flex-col gap-4'>
  192. <div className='grid grid-cols-1 gap-2 sm:grid-cols-3'>
  193. <StatCard
  194. icon={Timer}
  195. label='TPS'
  196. value={formatThroughput(avgTps)}
  197. hint={t('Sustained tokens per second')}
  198. />
  199. <StatCard
  200. icon={Timer}
  201. label={t('Average latency')}
  202. value={formatLatency(avgLatency)}
  203. />
  204. <StatCard
  205. icon={HeartPulse}
  206. label={t('Success rate')}
  207. value={formatUptimePct(successRate)}
  208. hint={
  209. incidentCount > 0
  210. ? t('{{count}} incidents in the last 24 hours', {
  211. count: incidentCount,
  212. })
  213. : t('No incidents in the last 24 hours')
  214. }
  215. intent={intent}
  216. />
  217. </div>
  218. <section>
  219. <SectionHeader
  220. icon={HeartPulse}
  221. title={t('Per-group performance')}
  222. description={t('Average latency, TTFT, TPS, and success rate')}
  223. />
  224. <div className='overflow-x-auto rounded-lg border'>
  225. <Table className='text-sm'>
  226. <TableHeader>
  227. <TableRow className='hover:bg-transparent'>
  228. <TableHead className={headerCellClass}>{t('Group')}</TableHead>
  229. <TableHead className={`${headerCellClass} text-right`}>
  230. TPS
  231. </TableHead>
  232. <TableHead className={`${headerCellClass} text-right`}>
  233. {t('Average TTFT')}
  234. </TableHead>
  235. <TableHead className={`${headerCellClass} text-right`}>
  236. {t('Average latency')}
  237. </TableHead>
  238. <TableHead
  239. className={`${headerCellClass} min-w-[180px] text-left`}
  240. >
  241. {t('Success rate')}
  242. </TableHead>
  243. </TableRow>
  244. </TableHeader>
  245. <TableBody>
  246. {performances.map((perf) => (
  247. <TableRow key={perf.group}>
  248. <TableCell className='py-2.5'>
  249. <GroupBadge group={perf.group} size='sm' />
  250. </TableCell>
  251. <TableCell className='py-2.5 text-right font-mono'>
  252. {formatThroughput(perf.avg_tps)}
  253. </TableCell>
  254. <TableCell className='py-2.5 text-right font-mono'>
  255. {formatLatency(perf.avg_ttft_ms)}
  256. </TableCell>
  257. <TableCell className='text-muted-foreground py-2.5 text-right font-mono'>
  258. {formatLatency(perf.avg_latency_ms)}
  259. </TableCell>
  260. <TableCell className='py-2.5'>
  261. <UptimeSparkline
  262. size='sm'
  263. series={uptimeByGroup[perf.group] ?? []}
  264. />
  265. </TableCell>
  266. </TableRow>
  267. ))}
  268. </TableBody>
  269. </Table>
  270. </div>
  271. </section>
  272. <section>
  273. <SectionHeader
  274. icon={Timer}
  275. title={t('Latency trend (last 24h)')}
  276. description={t('Average TTFT')}
  277. />
  278. <LatencyTrendChart series={latencySeries} />
  279. </section>
  280. <section>
  281. <SectionHeader
  282. icon={HeartPulse}
  283. title={t('Availability (last 24h)')}
  284. description={
  285. incidentCount > 0
  286. ? t(
  287. 'Request success rate; {{incidents}} incident buckets in the last 24 hours',
  288. {
  289. incidents: incidentCount,
  290. }
  291. )
  292. : t('Request success rate sampled over the last 24 hours')
  293. }
  294. accent={
  295. incidentCount > 0 ? (
  296. <span className='inline-flex items-center gap-1 text-amber-600 dark:text-amber-400'>
  297. <AlertTriangle className='size-3.5' />
  298. {t('{{count}} incidents', {
  299. count: incidentCount,
  300. })}
  301. </span>
  302. ) : null
  303. }
  304. />
  305. <UptimeTrendChart series={uptimeSeries} />
  306. </section>
  307. </div>
  308. )
  309. }
  310. function SectionHeader(props: {
  311. icon: React.ComponentType<{ className?: string }>
  312. title: string
  313. description?: string
  314. accent?: React.ReactNode
  315. }) {
  316. const Icon = props.icon
  317. return (
  318. <div className='mb-2 flex flex-wrap items-center justify-between gap-2'>
  319. <div className='flex min-w-0 items-center gap-2'>
  320. <Icon className='text-muted-foreground/70 size-3.5 shrink-0' />
  321. <div className='min-w-0'>
  322. <div className='text-foreground text-sm font-semibold'>
  323. {props.title}
  324. </div>
  325. {props.description && (
  326. <p className='text-muted-foreground/80 text-xs'>
  327. {props.description}
  328. </p>
  329. )}
  330. </div>
  331. </div>
  332. {props.accent && (
  333. <div className='shrink-0 text-xs font-medium'>{props.accent}</div>
  334. )}
  335. </div>
  336. )
  337. }