scoring.ts 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. import { useEffect, useState } from 'react'
  2. import type { RecallSignals, VideoMatchEnrichedVO } from '../api/types'
  3. /**
  4. * 精排参数——前后端同构的单一来源。
  5. *
  6. * 公式 (VIDEO):
  7. * sim_norm = clip((sim - lower) / (1 - lower), 0, 1)
  8. * rov_norm = clip((rov - rovClipLow) / (rovClipHigh - rovClipLow), 0, 1)
  9. * composite = alpha × sim_norm + (1 - alpha) × rov_norm
  10. * ROV 缺失时: composite = alpha × sim_norm(质量分为 0)
  11. *
  12. * 公式 (ARTICLE):
  13. * sim_norm = clip((sim - lower) / (1 - lower), 0, 1)
  14. * ROV 缺失时退化为纯 sim 排序: composite = sim_norm
  15. *
  16. * 公式 (MATERIAL):
  17. * sim_norm = clip((sim - simThreshold) / (1 - simThreshold), 0, 1)
  18. * qualityScore = (wCtr × ctr + wViral × viral + wRoi × roi) / qualTotalW
  19. * composite = alpha × sim_norm + (1 - alpha) × qualityScore
  20. * 质量缺失时按 materialMissingStrategy 处理:"group"(分组)或 "shrink"(收缩)
  21. */
  22. export interface RankingParams {
  23. simThreshold: number
  24. simThresholdsByCode: Record<string, number>
  25. /** ROV 归一化下界(clip 低值) */
  26. rovClipLow: number
  27. /** ROV 归一化上界(clip 高值) */
  28. rovClipHigh: number
  29. /** 相关性 VS 质量的权衡权重 [0, 1],VIDEO/ARTICLE/MATERIAL 通用 */
  30. alpha: number
  31. /** 解构维度加权(兜底,未在 boostsByCode 中配置的维度使用此值) */
  32. deconstructBoost: number
  33. /** 按维度独立 boost —— 每个 configCode 可单独设置,覆盖 deconstructBoost */
  34. boostsByCode: Record<string, number>
  35. /** 素材质量子维度权重——打开率,默认 0.5(与 wViral/wRoi 之和为 1) */
  36. wCtr: number
  37. /** 素材质量子维度权重——裂变率,默认 0.3 */
  38. wViral: number
  39. /** 素材质量子维度权重——ROI,默认 0.2 */
  40. wRoi: number
  41. /** 素材质量缺失策略:"group" | "shrink" */
  42. materialMissingStrategy: 'group' | 'shrink'
  43. }
  44. /** 维度 boost 取值范围 */
  45. export const BOOST_MIN = 0.1
  46. export const BOOST_MAX = 1
  47. const TOPIC_CONFIG_CODE = 'VIDEO_TOPIC'
  48. /** 各维度默认 boost:选题 1,其余 0.4 */
  49. export function getDefaultBoostForCode(code: string): number {
  50. return code === TOPIC_CONFIG_CODE ? 1 : 0.4
  51. }
  52. export const DEFAULT_RANKING_PARAMS: RankingParams = {
  53. simThreshold: 0.65,
  54. simThresholdsByCode: {},
  55. boostsByCode: {},
  56. rovClipLow: 0,
  57. rovClipHigh: 0.07,
  58. alpha: 0.6,
  59. deconstructBoost: 0.4,
  60. wCtr: 0.5,
  61. wViral: 0.3,
  62. wRoi: 0.2,
  63. materialMissingStrategy: 'group',
  64. }
  65. export interface ScoreBreakdown {
  66. composite: number
  67. simNorm: number
  68. rovNorm: number
  69. boost: number
  70. lowerBound: number
  71. passesThreshold: boolean
  72. /** 精排加权质量分:素材=(wCtr·ctr+wViral·viral+wRoi·roi)/Σw;视频=rov_norm */
  73. weightedQuality?: number
  74. /** 素材质量缺失时置 true,调用方按策略单独成组 */
  75. qualityMissing?: boolean
  76. }
  77. const clip01 = (x: number) => Math.max(0, Math.min(1, x))
  78. /** signals.quality 缺失时,从 materialDetail.quality 构造质量信号 */
  79. function materialQualityFromDetail(
  80. item: VideoMatchEnrichedVO,
  81. ): RecallSignals['quality'] | undefined {
  82. const q = item.materialDetail?.quality
  83. if (!q) return undefined
  84. const ctr = q.conversionEfficiencyScore
  85. const viral = q.viralScore
  86. const roi = q.revenueScore
  87. const hasData = [ctr, viral, roi].some((v) => v != null && Number.isFinite(v))
  88. if (!hasData) return undefined
  89. return {
  90. hasData: true,
  91. ctr: ctr ?? null,
  92. viral: viral ?? null,
  93. roi: roi ?? null,
  94. }
  95. }
  96. export function effectiveSimThreshold(
  97. configCode: string | null | undefined,
  98. params: RankingParams,
  99. ): number {
  100. if (configCode && configCode in params.simThresholdsByCode) {
  101. return params.simThresholdsByCode[configCode]
  102. }
  103. return params.simThreshold
  104. }
  105. /**
  106. * 计算单条召回结果的综合得分——WP2 前后端同构版本。
  107. *
  108. * 关键修正:
  109. * - 读 signals 而非散落字段(sim/rov/quality)
  110. * - deconstructBoost 按 modality===VIDEO 判定,不按 configCode.startsWith("VIDEO_")
  111. * - ARTICLE 无 rov 时退化为纯 sim 排序
  112. * - MATERIAL 质量缺失按 signals.quality.hasData 统一判定,不再回退 0.5
  113. */
  114. export function computeCompositeScore(
  115. item: VideoMatchEnrichedVO,
  116. params: RankingParams,
  117. ): ScoreBreakdown | null {
  118. // WP2: 读 signals.sim,兼容旧 score 字段
  119. const sim = item.signals?.sim ?? item.score
  120. if (sim == null || !Number.isFinite(sim)) return null
  121. const lowerBound = effectiveSimThreshold(item.configCode, params)
  122. const denom = 1 - lowerBound
  123. const simNorm = denom > 0 ? clip01((sim - lowerBound) / denom) : 0
  124. const passesThreshold = sim >= lowerBound
  125. // 素材模态:多维质量加权
  126. if (item.modality === 'MATERIAL') {
  127. const quality = item.signals?.quality ?? materialQualityFromDetail(item)
  128. return rankMaterial(simNorm, lowerBound, passesThreshold, quality, params)
  129. }
  130. // VIDEO / ARTICLE 模态:ROV 公式
  131. return rankVideoArticle(simNorm, lowerBound, passesThreshold, item, params)
  132. }
  133. function rankMaterial(
  134. simNorm: number,
  135. lowerBound: number,
  136. passesThreshold: boolean,
  137. quality: RecallSignals['quality'] | undefined,
  138. params: RankingParams,
  139. ): ScoreBreakdown {
  140. const alpha = params.alpha
  141. if (quality == null || !quality.hasData) {
  142. // group(默认):无质量数据,仅依赖相关性
  143. if (params.materialMissingStrategy === 'group') {
  144. return {
  145. composite: alpha * simNorm,
  146. simNorm,
  147. rovNorm: 0,
  148. boost: 1,
  149. lowerBound,
  150. passesThreshold,
  151. qualityMissing: true,
  152. }
  153. }
  154. // shrink: 无先验均值时退化为 alpha × simNorm
  155. return {
  156. composite: alpha * simNorm,
  157. simNorm,
  158. rovNorm: 0,
  159. boost: 1,
  160. lowerBound,
  161. passesThreshold,
  162. }
  163. }
  164. const ctr = quality.ctr ?? 0
  165. const viral = quality.viral ?? 0
  166. const roi = quality.roi ?? 0
  167. const qualTotalW = params.wCtr + params.wViral + params.wRoi || 1
  168. const weightedQuality = (params.wCtr * ctr + params.wViral * viral + params.wRoi * roi) / qualTotalW
  169. const composite = alpha * simNorm + (1 - alpha) * weightedQuality
  170. return {
  171. composite,
  172. simNorm,
  173. rovNorm: 0,
  174. boost: 1,
  175. lowerBound,
  176. passesThreshold,
  177. weightedQuality,
  178. }
  179. }
  180. function rankVideoArticle(
  181. simNorm: number,
  182. lowerBound: number,
  183. passesThreshold: boolean,
  184. item: VideoMatchEnrichedVO,
  185. params: RankingParams,
  186. ): ScoreBreakdown {
  187. // WP2: 读 signals.rov,兼容旧 videoDetail.rov
  188. const rov = item.signals?.rov ?? undefined
  189. // 按维度独立 boost:优先取 boostsByCode[configCode],回退维度默认值,未知维度用 deconstructBoost
  190. const codeBoost = item.configCode
  191. ? (params.boostsByCode?.[item.configCode] ?? getDefaultBoostForCode(item.configCode))
  192. : params.deconstructBoost
  193. const hasRov = rov != null && Number.isFinite(rov)
  194. const boost = (item.modality === 'VIDEO' && hasRov) ? codeBoost : 1
  195. if (!hasRov) {
  196. if (item.modality === 'VIDEO') {
  197. // VIDEO 缺 ROV → rovNorm=0,alpha 公式生效,alpha 低时自然沉底
  198. const composite = boost * params.alpha * simNorm
  199. return { composite, simNorm, rovNorm: 0, boost, lowerBound, passesThreshold }
  200. }
  201. // ARTICLE 缺 ROV → 退化为纯 sim 排序
  202. const composite = boost * simNorm
  203. return { composite, simNorm, rovNorm: 0, boost, lowerBound, passesThreshold }
  204. }
  205. const rovDenom = params.rovClipHigh - params.rovClipLow
  206. const rovNorm = rovDenom > 0 ? clip01((rov - params.rovClipLow) / rovDenom) : 0
  207. const composite = boost * (params.alpha * simNorm + (1 - params.alpha) * rovNorm)
  208. return { composite, simNorm, rovNorm, boost, lowerBound, passesThreshold, weightedQuality: rovNorm }
  209. }
  210. const STORAGE_KEY = 'vector_recall_ranking_params'
  211. /** 维度 boost 默认值变更版本;升级后清理旧版 localStorage 脏数据 */
  212. const RANKING_STORAGE_VERSION = 2
  213. const RANKING_VERSION_KEY = 'vector_recall_ranking_params_version'
  214. /** 旧版 UI 常见落盘值(deconstructBoost 兜底 / 批量微调),不等于新版维度默认时应清除 */
  215. const LEGACY_BOOST_SNAPSHOTS = new Set([0.55, 0.6, 0.65, 1.0])
  216. function clampBoost(v: number): number {
  217. return Math.max(BOOST_MIN, Math.min(BOOST_MAX, v))
  218. }
  219. function sanitizeBoostsByCode(boosts: Record<string, number> | undefined): Record<string, number> {
  220. if (!boosts) return {}
  221. const next: Record<string, number> = {}
  222. for (const [code, val] of Object.entries(boosts)) {
  223. if (!Number.isFinite(val)) continue
  224. const expected = getDefaultBoostForCode(code)
  225. // 保留用户真实自定义;清除旧版自动落盘的 0.6/0.65 等快照
  226. if (val === expected || !LEGACY_BOOST_SNAPSHOTS.has(val)) {
  227. next[code] = clampBoost(val)
  228. }
  229. }
  230. return next
  231. }
  232. function migrateDeconstructBoost(v: unknown): number {
  233. if (typeof v !== 'number' || !Number.isFinite(v)) return DEFAULT_RANKING_PARAMS.deconstructBoost
  234. // 旧版默认 1.0 → 新版默认 0.4
  235. if (v === 1.0) return DEFAULT_RANKING_PARAMS.deconstructBoost
  236. return clampBoost(v)
  237. }
  238. function loadFromStorage(): RankingParams {
  239. try {
  240. const raw = localStorage.getItem(STORAGE_KEY)
  241. if (!raw) return DEFAULT_RANKING_PARAMS
  242. const parsed = JSON.parse(raw) as Record<string, unknown>
  243. const storedVersion = Number(localStorage.getItem(RANKING_VERSION_KEY) || 0)
  244. const needsBoostMigration = storedVersion < RANKING_STORAGE_VERSION
  245. // WP2 迁移:rovP5/rovP95 → rovClipLow/rovClipHigh
  246. if (parsed.rovClipLow === undefined && typeof parsed.rovP5 === 'number') {
  247. parsed.rovClipLow = parsed.rovP5
  248. }
  249. if (parsed.rovClipHigh === undefined && typeof parsed.rovP95 === 'number') {
  250. parsed.rovClipHigh = parsed.rovP95
  251. }
  252. // 清理已迁移/废弃的字段,避免脏数据残留
  253. delete parsed.rovP5
  254. delete parsed.rovP95
  255. delete parsed.wSim
  256. const boostsByCode = needsBoostMigration
  257. ? sanitizeBoostsByCode(parsed.boostsByCode as Record<string, number> | undefined)
  258. : ((parsed.boostsByCode as Record<string, number>) ?? {})
  259. const params = {
  260. ...DEFAULT_RANKING_PARAMS,
  261. ...parsed,
  262. boostsByCode,
  263. deconstructBoost: needsBoostMigration
  264. ? migrateDeconstructBoost(parsed.deconstructBoost)
  265. : clampBoost(
  266. typeof parsed.deconstructBoost === 'number'
  267. ? parsed.deconstructBoost
  268. : DEFAULT_RANKING_PARAMS.deconstructBoost,
  269. ),
  270. simThresholdsByCode:
  271. (parsed.simThresholdsByCode as Record<string, number>) ?? {},
  272. } as RankingParams
  273. if (needsBoostMigration) {
  274. localStorage.setItem(RANKING_VERSION_KEY, String(RANKING_STORAGE_VERSION))
  275. }
  276. return params
  277. } catch {
  278. return DEFAULT_RANKING_PARAMS
  279. }
  280. }
  281. function saveToStorage(p: RankingParams) {
  282. try {
  283. localStorage.setItem(STORAGE_KEY, JSON.stringify(p))
  284. localStorage.setItem(RANKING_VERSION_KEY, String(RANKING_STORAGE_VERSION))
  285. } catch {
  286. // localStorage 失败时静默,当次会话仍可用
  287. }
  288. }
  289. /** 展开 boostsByCode:未单独配置的 configCode 使用维度默认值 */
  290. export function expandRankingBoosts(r: RankingParams, codes: string[]): RankingParams {
  291. const expanded: Record<string, number> = { ...r.boostsByCode }
  292. for (const code of codes) {
  293. if (!(code in expanded)) expanded[code] = getDefaultBoostForCode(code)
  294. }
  295. return { ...r, boostsByCode: expanded }
  296. }
  297. /** 召回请求用精排参数:展开维度 boost + 完整字段 */
  298. export function rankingForRequest(r: RankingParams, codes: string[]): RankingParams {
  299. return toRankingPayload(expandRankingBoosts(r, codes))
  300. }
  301. /** 随召回请求提交的精排参数(字段与后端 RankingSpec 对齐) */
  302. export function toRankingPayload(params: RankingParams): RankingParams {
  303. return {
  304. simThreshold: params.simThreshold,
  305. simThresholdsByCode: params.simThresholdsByCode ?? {},
  306. rovClipLow: params.rovClipLow,
  307. rovClipHigh: params.rovClipHigh,
  308. alpha: params.alpha,
  309. deconstructBoost: params.deconstructBoost,
  310. boostsByCode: params.boostsByCode ?? {},
  311. wCtr: params.wCtr,
  312. wViral: params.wViral,
  313. wRoi: params.wRoi,
  314. materialMissingStrategy: params.materialMissingStrategy,
  315. }
  316. }
  317. export function useRankingParams(): [RankingParams, (next: RankingParams) => void] {
  318. const [params, setParams] = useState<RankingParams>(() => loadFromStorage())
  319. useEffect(() => {
  320. saveToStorage(params)
  321. }, [params])
  322. return [params, setParams]
  323. }