scoring.ts 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. import { useEffect, useState } from 'react'
  2. import type { RecallSignals, VideoMatchEnrichedVO } from '../api/types'
  3. /**
  4. * 精排参数——前后端同构的单一来源。
  5. *
  6. * 公式 (VIDEO/ARTICLE):
  7. * sim_norm = clip((sim - lower) / (1 - lower), 0, 1)
  8. * rov_norm = clip((rov - rovClipLow) / (rovClipHigh - rovClipLow), 0, 1)
  9. * composite = boost × (alpha × sim_norm + (1 - alpha) × rov_norm)
  10. * boost = deconstructBoost(仅 VIDEO 模态生效,按 modality 判定)
  11. *
  12. * 公式 (MATERIAL):
  13. * sim_norm = clip((sim - simThreshold) / (1 - simThreshold), 0, 1)
  14. * qualityScore = (wCtr × ctr + wViral × viral + wRoi × roi) / qualTotalW
  15. * composite = alpha × sim_norm + (1 - alpha) × qualityScore
  16. * 质量缺失时按 materialMissingStrategy 处理:"group"(分组)或 "shrink"(收缩)
  17. */
  18. export interface RankingParams {
  19. simThreshold: number
  20. simThresholdsByCode: Record<string, number>
  21. /** ROV 归一化下界(clip 低值) */
  22. rovClipLow: number
  23. /** ROV 归一化上界(clip 高值) */
  24. rovClipHigh: number
  25. /** 相关性 VS 质量的权衡权重 [0, 1],VIDEO/ARTICLE/MATERIAL 通用 */
  26. alpha: number
  27. /** 解构维度加权(兜底,未在 boostsByCode 中配置的维度使用此值) */
  28. deconstructBoost: number
  29. /** 按维度独立 boost —— 每个 configCode 可单独设置,覆盖 deconstructBoost */
  30. boostsByCode: Record<string, number>
  31. /** 素材质量子维度权重——打开率,默认 0.5(与 wViral/wRoi 之和为 1) */
  32. wCtr: number
  33. /** 素材质量子维度权重——裂变率,默认 0.3 */
  34. wViral: number
  35. /** 素材质量子维度权重——ROI,默认 0.2 */
  36. wRoi: number
  37. /** 素材质量缺失策略:"group" | "shrink" */
  38. materialMissingStrategy: 'group' | 'shrink'
  39. }
  40. export const DEFAULT_RANKING_PARAMS: RankingParams = {
  41. simThreshold: 0.65,
  42. simThresholdsByCode: {},
  43. boostsByCode: {},
  44. rovClipLow: 0,
  45. rovClipHigh: 0.07,
  46. alpha: 0.6,
  47. deconstructBoost: 1.0,
  48. wCtr: 0.5,
  49. wViral: 0.3,
  50. wRoi: 0.2,
  51. materialMissingStrategy: 'group',
  52. }
  53. export interface ScoreBreakdown {
  54. composite: number
  55. simNorm: number
  56. rovNorm: number
  57. boost: number
  58. lowerBound: number
  59. passesThreshold: boolean
  60. /** 精排加权质量分:素材=(wCtr·ctr+wViral·viral+wRoi·roi)/Σw;视频=rov_norm */
  61. weightedQuality?: number
  62. /** 素材质量缺失时置 true,调用方按策略单独成组 */
  63. qualityMissing?: boolean
  64. }
  65. const clip01 = (x: number) => Math.max(0, Math.min(1, x))
  66. /** signals.quality 缺失时,从 materialDetail.quality 构造质量信号 */
  67. function materialQualityFromDetail(
  68. item: VideoMatchEnrichedVO,
  69. ): RecallSignals['quality'] | undefined {
  70. const q = item.materialDetail?.quality
  71. if (!q) return undefined
  72. const ctr = q.conversionEfficiencyScore
  73. const viral = q.viralScore
  74. const roi = q.revenueScore
  75. const hasData = [ctr, viral, roi].some((v) => v != null && Number.isFinite(v))
  76. if (!hasData) return undefined
  77. return {
  78. hasData: true,
  79. ctr: ctr ?? null,
  80. viral: viral ?? null,
  81. roi: roi ?? null,
  82. }
  83. }
  84. export function effectiveSimThreshold(
  85. configCode: string | null | undefined,
  86. params: RankingParams,
  87. ): number {
  88. if (configCode && configCode in params.simThresholdsByCode) {
  89. return params.simThresholdsByCode[configCode]
  90. }
  91. return params.simThreshold
  92. }
  93. /**
  94. * 计算单条召回结果的综合得分——WP2 前后端同构版本。
  95. *
  96. * 关键修正:
  97. * - 读 signals 而非散落字段(sim/rov/quality)
  98. * - deconstructBoost 按 modality===VIDEO 判定,不按 configCode.startsWith("VIDEO_")
  99. * - ARTICLE 无 rov 时退化为纯 sim 排序
  100. * - MATERIAL 质量缺失按 signals.quality.hasData 统一判定,不再回退 0.5
  101. */
  102. export function computeCompositeScore(
  103. item: VideoMatchEnrichedVO,
  104. params: RankingParams,
  105. ): ScoreBreakdown | null {
  106. // WP2: 读 signals.sim,兼容旧 score 字段
  107. const sim = item.signals?.sim ?? item.score
  108. if (sim == null || !Number.isFinite(sim)) return null
  109. const lowerBound = effectiveSimThreshold(item.configCode, params)
  110. const denom = 1 - lowerBound
  111. const simNorm = denom > 0 ? clip01((sim - lowerBound) / denom) : 0
  112. const passesThreshold = sim >= lowerBound
  113. // 素材模态:多维质量加权
  114. if (item.modality === 'MATERIAL') {
  115. const quality = item.signals?.quality ?? materialQualityFromDetail(item)
  116. return rankMaterial(simNorm, lowerBound, passesThreshold, quality, params)
  117. }
  118. // VIDEO / ARTICLE 模态:ROV 公式
  119. return rankVideoArticle(simNorm, lowerBound, passesThreshold, item, params)
  120. }
  121. function rankMaterial(
  122. simNorm: number,
  123. lowerBound: number,
  124. passesThreshold: boolean,
  125. quality: RecallSignals['quality'] | undefined,
  126. params: RankingParams,
  127. ): ScoreBreakdown {
  128. const alpha = params.alpha
  129. if (quality == null || !quality.hasData) {
  130. // group(默认):无质量数据,仅依赖相关性
  131. if (params.materialMissingStrategy === 'group') {
  132. return {
  133. composite: alpha * simNorm,
  134. simNorm,
  135. rovNorm: 0,
  136. boost: 1,
  137. lowerBound,
  138. passesThreshold,
  139. qualityMissing: true,
  140. }
  141. }
  142. // shrink: 无先验均值时退化为 alpha × simNorm
  143. return {
  144. composite: alpha * simNorm,
  145. simNorm,
  146. rovNorm: 0,
  147. boost: 1,
  148. lowerBound,
  149. passesThreshold,
  150. }
  151. }
  152. const ctr = quality.ctr ?? 0
  153. const viral = quality.viral ?? 0
  154. const roi = quality.roi ?? 0
  155. const qualTotalW = params.wCtr + params.wViral + params.wRoi || 1
  156. const weightedQuality = (params.wCtr * ctr + params.wViral * viral + params.wRoi * roi) / qualTotalW
  157. const composite = alpha * simNorm + (1 - alpha) * weightedQuality
  158. return {
  159. composite,
  160. simNorm,
  161. rovNorm: 0,
  162. boost: 1,
  163. lowerBound,
  164. passesThreshold,
  165. weightedQuality,
  166. }
  167. }
  168. function rankVideoArticle(
  169. simNorm: number,
  170. lowerBound: number,
  171. passesThreshold: boolean,
  172. item: VideoMatchEnrichedVO,
  173. params: RankingParams,
  174. ): ScoreBreakdown {
  175. // WP2: 读 signals.rov,兼容旧 videoDetail.rov
  176. const rov = item.signals?.rov ?? undefined
  177. // 按维度独立 boost:优先取 boostsByCode[configCode],回退 deconstructBoost
  178. const codeBoost = item.configCode ? (params.boostsByCode?.[item.configCode] ?? params.deconstructBoost) : params.deconstructBoost
  179. const hasRov = rov != null && Number.isFinite(rov)
  180. const boost = (item.modality === 'VIDEO' && hasRov) ? codeBoost : 1
  181. if (!hasRov) {
  182. const composite = boost * simNorm
  183. return { composite, simNorm, rovNorm: 0, boost, lowerBound, passesThreshold }
  184. }
  185. const rovDenom = params.rovClipHigh - params.rovClipLow
  186. const rovNorm = rovDenom > 0 ? clip01((rov - params.rovClipLow) / rovDenom) : 0
  187. const composite = boost * (params.alpha * simNorm + (1 - params.alpha) * rovNorm)
  188. return { composite, simNorm, rovNorm, boost, lowerBound, passesThreshold, weightedQuality: rovNorm }
  189. }
  190. const STORAGE_KEY = 'vector_recall_ranking_params'
  191. function loadFromStorage(): RankingParams {
  192. try {
  193. const raw = localStorage.getItem(STORAGE_KEY)
  194. if (!raw) return DEFAULT_RANKING_PARAMS
  195. const parsed = JSON.parse(raw) as Record<string, unknown>
  196. // WP2 迁移:rovP5/rovP95 → rovClipLow/rovClipHigh
  197. if (parsed.rovClipLow === undefined && typeof parsed.rovP5 === 'number') {
  198. parsed.rovClipLow = parsed.rovP5
  199. }
  200. if (parsed.rovClipHigh === undefined && typeof parsed.rovP95 === 'number') {
  201. parsed.rovClipHigh = parsed.rovP95
  202. }
  203. // 清理已迁移/废弃的字段,避免脏数据残留
  204. delete parsed.rovP5
  205. delete parsed.rovP95
  206. delete parsed.wSim
  207. return {
  208. ...DEFAULT_RANKING_PARAMS,
  209. ...parsed,
  210. simThresholdsByCode:
  211. (parsed.simThresholdsByCode as Record<string, number>) ?? {},
  212. } as RankingParams
  213. } catch {
  214. return DEFAULT_RANKING_PARAMS
  215. }
  216. }
  217. function saveToStorage(p: RankingParams) {
  218. try {
  219. localStorage.setItem(STORAGE_KEY, JSON.stringify(p))
  220. } catch {
  221. // localStorage 失败时静默,当次会话仍可用
  222. }
  223. }
  224. /** 展开 boostsByCode:未单独配置的 configCode 使用 deconstructBoost */
  225. export function expandRankingBoosts(r: RankingParams, codes: string[]): RankingParams {
  226. const expanded: Record<string, number> = { ...r.boostsByCode }
  227. for (const code of codes) {
  228. if (!(code in expanded)) expanded[code] = r.deconstructBoost
  229. }
  230. return { ...r, boostsByCode: expanded }
  231. }
  232. /** 召回请求用精排参数:展开维度 boost + 完整字段 */
  233. export function rankingForRequest(r: RankingParams, codes: string[]): RankingParams {
  234. return toRankingPayload(expandRankingBoosts(r, codes))
  235. }
  236. /** 随召回请求提交的精排参数(字段与后端 RankingSpec 对齐) */
  237. export function toRankingPayload(params: RankingParams): RankingParams {
  238. return {
  239. simThreshold: params.simThreshold,
  240. simThresholdsByCode: params.simThresholdsByCode ?? {},
  241. rovClipLow: params.rovClipLow,
  242. rovClipHigh: params.rovClipHigh,
  243. alpha: params.alpha,
  244. deconstructBoost: params.deconstructBoost,
  245. boostsByCode: params.boostsByCode ?? {},
  246. wCtr: params.wCtr,
  247. wViral: params.wViral,
  248. wRoi: params.wRoi,
  249. materialMissingStrategy: params.materialMissingStrategy,
  250. }
  251. }
  252. export function useRankingParams(): [RankingParams, (next: RankingParams) => void] {
  253. const [params, setParams] = useState<RankingParams>(() => loadFromStorage())
  254. useEffect(() => {
  255. saveToStorage(params)
  256. }, [params])
  257. return [params, setParams]
  258. }