scoring.ts 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. import { useEffect, useState } from 'react'
  2. import type { RecallSignals, VideoMatchEnrichedVO } from '../api/types'
  3. /**
  4. * 精排参数——前后端同构的单一来源。
  5. *
  6. * 公式 (VIDEO):
  7. * sim_norm = clip((sim - lower) / (1 - lower), 0, 1)
  8. * rov_norm = clip((rov - rovClipLow) / (rovClipHigh - rovClipLow), 0, 1)
  9. * composite = alpha × boost × sim_norm + (1 - alpha) × rov_norm
  10. * boost 仅作用于相关性分(解构维度权重),ROV 是视频粒度不加 boost
  11. * ROV 缺失时: composite = alpha × boost × sim_norm
  12. *
  13. * 公式 (ARTICLE):
  14. * sim_norm = clip((sim - lower) / (1 - lower), 0, 1)
  15. * ROV 缺失时退化为纯 sim 排序: composite = sim_norm
  16. *
  17. * 公式 (MATERIAL):
  18. * sim_norm = clip((sim - simThreshold) / (1 - simThreshold), 0, 1)
  19. * qualityScore = (wCtr × ctr + wViral × viral + wRoi × roi) / qualTotalW
  20. * composite = alpha × boost × sim_norm + (1 - alpha) × qualityScore
  21. * 质量缺失时按 materialMissingStrategy 处理:"group"(分组)或 "shrink"(收缩)
  22. * boost 仅作用于相关性分
  23. */
  24. export interface RankingParams {
  25. simThreshold: number
  26. simThresholdsByCode: Record<string, number>
  27. /** ROV 归一化下界(clip 低值) */
  28. rovClipLow: number
  29. /** ROV 归一化上界(clip 高值) */
  30. rovClipHigh: number
  31. /** 相关性 VS 质量的权衡权重 [0, 1],VIDEO/ARTICLE/MATERIAL 通用 */
  32. alpha: number
  33. /** 兜底 boost:仅当 configCode 缺失(null/undefined)时使用,已知维度走 getDefaultBoostForCode */
  34. deconstructBoost: number
  35. /** 按维度独立 boost —— 每个 configCode 可单独设置,覆盖维度默认值 */
  36. boostsByCode: Record<string, number>
  37. /** 素材质量子维度权重——打开率,默认 0.5(与 wViral/wRoi 之和为 1) */
  38. wCtr: number
  39. /** 素材质量子维度权重——裂变率,默认 0.3 */
  40. wViral: number
  41. /** 素材质量子维度权重——ROI,默认 0.2 */
  42. wRoi: number
  43. /** 素材质量缺失策略:"group" | "shrink" */
  44. materialMissingStrategy: 'group' | 'shrink'
  45. }
  46. /** 维度 boost 取值范围 */
  47. export const BOOST_MIN = 0.1
  48. export const BOOST_MAX = 1
  49. const TOPIC_CONFIG_CODE = 'VIDEO_TOPIC'
  50. /** 各维度默认 boost:选题 1,其余 0.4 */
  51. export function getDefaultBoostForCode(code: string): number {
  52. return code === TOPIC_CONFIG_CODE ? 1 : 0.4
  53. }
  54. export const DEFAULT_RANKING_PARAMS: RankingParams = {
  55. simThreshold: 0.65,
  56. simThresholdsByCode: {},
  57. boostsByCode: {},
  58. rovClipLow: 0,
  59. rovClipHigh: 0.07,
  60. alpha: 0.6,
  61. deconstructBoost: 0.4,
  62. wCtr: 0.5,
  63. wViral: 0.3,
  64. wRoi: 0.2,
  65. materialMissingStrategy: 'group',
  66. }
  67. export interface ScoreBreakdown {
  68. composite: number
  69. simNorm: number
  70. rovNorm: number
  71. boost: number
  72. lowerBound: number
  73. passesThreshold: boolean
  74. /** 精排加权质量分:素材=(wCtr·ctr+wViral·viral+wRoi·roi)/Σw;视频=rov_norm */
  75. weightedQuality?: number
  76. /** 素材质量缺失时置 true,调用方按策略单独成组 */
  77. qualityMissing?: boolean
  78. }
  79. const clip01 = (x: number) => Math.max(0, Math.min(1, x))
  80. /** signals.quality 缺失时,从 materialDetail.quality 构造质量信号 */
  81. function materialQualityFromDetail(
  82. item: VideoMatchEnrichedVO,
  83. ): RecallSignals['quality'] | undefined {
  84. const q = item.materialDetail?.quality
  85. if (!q) return undefined
  86. const ctr = q.conversionEfficiencyScore
  87. const viral = q.viralScore
  88. const roi = q.revenueScore
  89. const hasData = [ctr, viral, roi].some((v) => v != null && Number.isFinite(v))
  90. if (!hasData) return undefined
  91. return {
  92. hasData: true,
  93. ctr: ctr ?? null,
  94. viral: viral ?? null,
  95. roi: roi ?? null,
  96. }
  97. }
  98. export function effectiveSimThreshold(
  99. configCode: string | null | undefined,
  100. params: RankingParams,
  101. ): number {
  102. if (configCode && configCode in params.simThresholdsByCode) {
  103. return params.simThresholdsByCode[configCode]
  104. }
  105. return params.simThreshold
  106. }
  107. /**
  108. * 计算单条召回结果的综合得分——WP2 前后端同构版本。
  109. *
  110. * 关键修正:
  111. * - 读 signals 而非散落字段(sim/rov/quality)
  112. * - boost 按维度独立:boostsByCode[configCode] → getDefaultBoostForCode → deconstructBoost(兜底)
  113. * - ARTICLE 无 rov 时退化为纯 sim 排序
  114. * - MATERIAL 质量缺失按 signals.quality.hasData 统一判定,不再回退 0.5
  115. */
  116. export function computeCompositeScore(
  117. item: VideoMatchEnrichedVO,
  118. params: RankingParams,
  119. ): ScoreBreakdown | null {
  120. // WP2: 读 signals.sim,兼容旧 score 字段
  121. const sim = item.signals?.sim ?? item.score
  122. if (sim == null || !Number.isFinite(sim)) return null
  123. const lowerBound = effectiveSimThreshold(item.configCode, params)
  124. const denom = 1 - lowerBound
  125. const simNorm = denom > 0 ? clip01((sim - lowerBound) / denom) : 0
  126. const passesThreshold = sim >= lowerBound
  127. // 素材模态:多维质量加权
  128. if (item.modality === 'MATERIAL') {
  129. const quality = item.signals?.quality ?? materialQualityFromDetail(item)
  130. return rankMaterial(simNorm, lowerBound, passesThreshold, quality, item, params)
  131. }
  132. // VIDEO / ARTICLE 模态:ROV 公式
  133. return rankVideoArticle(simNorm, lowerBound, passesThreshold, item, params)
  134. }
  135. function rankMaterial(
  136. simNorm: number,
  137. lowerBound: number,
  138. passesThreshold: boolean,
  139. quality: RecallSignals['quality'] | undefined,
  140. item: VideoMatchEnrichedVO,
  141. params: RankingParams,
  142. ): ScoreBreakdown {
  143. const alpha = params.alpha
  144. // boost 仅作用于相关性分,质量分不加 boost
  145. const codeBoost = item.configCode
  146. ? (params.boostsByCode?.[item.configCode] ?? getDefaultBoostForCode(item.configCode))
  147. : params.deconstructBoost
  148. if (quality == null || !quality.hasData) {
  149. // group(默认):无质量数据,仅依赖相关性
  150. if (params.materialMissingStrategy === 'group') {
  151. return {
  152. composite: alpha * codeBoost * simNorm,
  153. simNorm,
  154. rovNorm: 0,
  155. boost: codeBoost,
  156. lowerBound,
  157. passesThreshold,
  158. qualityMissing: true,
  159. }
  160. }
  161. // shrink: 无先验均值时退化为 alpha × boost × simNorm
  162. return {
  163. composite: alpha * codeBoost * simNorm,
  164. simNorm,
  165. rovNorm: 0,
  166. boost: codeBoost,
  167. lowerBound,
  168. passesThreshold,
  169. }
  170. }
  171. const ctr = quality.ctr ?? 0
  172. const viral = quality.viral ?? 0
  173. const roi = quality.roi ?? 0
  174. const qualTotalW = params.wCtr + params.wViral + params.wRoi || 1
  175. const weightedQuality = (params.wCtr * ctr + params.wViral * viral + params.wRoi * roi) / qualTotalW
  176. const composite = alpha * codeBoost * simNorm + (1 - alpha) * weightedQuality
  177. return {
  178. composite,
  179. simNorm,
  180. rovNorm: 0,
  181. boost: codeBoost,
  182. lowerBound,
  183. passesThreshold,
  184. weightedQuality,
  185. }
  186. }
  187. function rankVideoArticle(
  188. simNorm: number,
  189. lowerBound: number,
  190. passesThreshold: boolean,
  191. item: VideoMatchEnrichedVO,
  192. params: RankingParams,
  193. ): ScoreBreakdown {
  194. // WP2: 读 signals.rov,兼容旧 videoDetail.rov
  195. const rov = item.signals?.rov ?? undefined
  196. // 按维度独立 boost:优先取 boostsByCode[configCode],回退维度默认值,未知维度用 deconstructBoost
  197. const codeBoost = item.configCode
  198. ? (params.boostsByCode?.[item.configCode] ?? getDefaultBoostForCode(item.configCode))
  199. : params.deconstructBoost
  200. const hasRov = rov != null && Number.isFinite(rov)
  201. if (!hasRov) {
  202. const composite = codeBoost * params.alpha * simNorm
  203. return { composite, simNorm, rovNorm: 0, boost: codeBoost, lowerBound, passesThreshold }
  204. }
  205. const rovDenom = params.rovClipHigh - params.rovClipLow
  206. const rovNorm = rovDenom > 0 ? clip01((rov - params.rovClipLow) / rovDenom) : 0
  207. const composite = params.alpha * codeBoost * simNorm + (1 - params.alpha) * rovNorm
  208. return { composite, simNorm, rovNorm, boost: codeBoost, lowerBound, passesThreshold, weightedQuality: rovNorm }
  209. }
  210. const STORAGE_KEY = 'vector_recall_ranking_params'
  211. /** 维度 boost 默认值变更版本;升级后清理旧版 localStorage 脏数据 */
  212. const RANKING_STORAGE_VERSION = 2
  213. const RANKING_VERSION_KEY = 'vector_recall_ranking_params_version'
  214. /** 旧版 UI 常见落盘值(deconstructBoost 兜底 / 批量微调),不等于新版维度默认时应清除 */
  215. const LEGACY_BOOST_SNAPSHOTS = new Set([0.55, 0.6, 0.65, 1.0])
  216. function clampBoost(v: number): number {
  217. return Math.max(BOOST_MIN, Math.min(BOOST_MAX, v))
  218. }
  219. function sanitizeBoostsByCode(boosts: Record<string, number> | undefined): Record<string, number> {
  220. if (!boosts) return {}
  221. const next: Record<string, number> = {}
  222. for (const [code, val] of Object.entries(boosts)) {
  223. if (!Number.isFinite(val)) continue
  224. const expected = getDefaultBoostForCode(code)
  225. // 保留用户真实自定义;清除旧版自动落盘的 0.6/0.65 等快照
  226. if (val === expected || !LEGACY_BOOST_SNAPSHOTS.has(val)) {
  227. next[code] = clampBoost(val)
  228. }
  229. }
  230. return next
  231. }
  232. function migrateDeconstructBoost(v: unknown): number {
  233. if (typeof v !== 'number' || !Number.isFinite(v)) return DEFAULT_RANKING_PARAMS.deconstructBoost
  234. // 旧版默认 1.0 → 新版默认 0.4
  235. if (v === 1.0) return DEFAULT_RANKING_PARAMS.deconstructBoost
  236. return clampBoost(v)
  237. }
  238. function loadFromStorage(): RankingParams {
  239. try {
  240. const raw = localStorage.getItem(STORAGE_KEY)
  241. if (!raw) return DEFAULT_RANKING_PARAMS
  242. const parsed = JSON.parse(raw) as Record<string, unknown>
  243. const storedVersion = Number(localStorage.getItem(RANKING_VERSION_KEY) || 0)
  244. const needsBoostMigration = storedVersion < RANKING_STORAGE_VERSION
  245. // WP2 迁移:rovP5/rovP95 → rovClipLow/rovClipHigh
  246. if (parsed.rovClipLow === undefined && typeof parsed.rovP5 === 'number') {
  247. parsed.rovClipLow = parsed.rovP5
  248. }
  249. if (parsed.rovClipHigh === undefined && typeof parsed.rovP95 === 'number') {
  250. parsed.rovClipHigh = parsed.rovP95
  251. }
  252. // 清理已迁移/废弃的字段,避免脏数据残留
  253. delete parsed.rovP5
  254. delete parsed.rovP95
  255. delete parsed.wSim
  256. const boostsByCode = needsBoostMigration
  257. ? sanitizeBoostsByCode(parsed.boostsByCode as Record<string, number> | undefined)
  258. : ((parsed.boostsByCode as Record<string, number>) ?? {})
  259. const params = {
  260. ...DEFAULT_RANKING_PARAMS,
  261. ...parsed,
  262. boostsByCode,
  263. deconstructBoost: needsBoostMigration
  264. ? migrateDeconstructBoost(parsed.deconstructBoost)
  265. : clampBoost(
  266. typeof parsed.deconstructBoost === 'number'
  267. ? parsed.deconstructBoost
  268. : DEFAULT_RANKING_PARAMS.deconstructBoost,
  269. ),
  270. simThresholdsByCode:
  271. (parsed.simThresholdsByCode as Record<string, number>) ?? {},
  272. } as RankingParams
  273. if (needsBoostMigration) {
  274. localStorage.setItem(RANKING_VERSION_KEY, String(RANKING_STORAGE_VERSION))
  275. }
  276. return params
  277. } catch {
  278. return DEFAULT_RANKING_PARAMS
  279. }
  280. }
  281. function saveToStorage(p: RankingParams) {
  282. try {
  283. localStorage.setItem(STORAGE_KEY, JSON.stringify(p))
  284. localStorage.setItem(RANKING_VERSION_KEY, String(RANKING_STORAGE_VERSION))
  285. } catch {
  286. // localStorage 失败时静默,当次会话仍可用
  287. }
  288. }
  289. /** 展开 boostsByCode:未单独配置的 configCode 使用维度默认值 */
  290. export function expandRankingBoosts(r: RankingParams, codes: string[]): RankingParams {
  291. const expanded: Record<string, number> = { ...r.boostsByCode }
  292. for (const code of codes) {
  293. if (!(code in expanded)) expanded[code] = getDefaultBoostForCode(code)
  294. }
  295. return { ...r, boostsByCode: expanded }
  296. }
  297. /** 召回请求用精排参数:展开维度 boost + 完整字段 */
  298. export function rankingForRequest(r: RankingParams, codes: string[]): RankingParams {
  299. return toRankingPayload(expandRankingBoosts(r, codes))
  300. }
  301. /** 随召回请求提交的精排参数(字段与后端 RankingSpec 对齐) */
  302. export function toRankingPayload(params: RankingParams): RankingParams {
  303. return {
  304. simThreshold: params.simThreshold,
  305. simThresholdsByCode: params.simThresholdsByCode ?? {},
  306. rovClipLow: params.rovClipLow,
  307. rovClipHigh: params.rovClipHigh,
  308. alpha: params.alpha,
  309. deconstructBoost: params.deconstructBoost,
  310. boostsByCode: params.boostsByCode ?? {},
  311. wCtr: params.wCtr,
  312. wViral: params.wViral,
  313. wRoi: params.wRoi,
  314. materialMissingStrategy: params.materialMissingStrategy,
  315. }
  316. }
  317. export function useRankingParams(): [RankingParams, (next: RankingParams) => void] {
  318. const [params, setParams] = useState<RankingParams>(() => loadFromStorage())
  319. useEffect(() => {
  320. saveToStorage(params)
  321. }, [params])
  322. return [params, setParams]
  323. }