scoring.ts 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. import { useEffect, useState } from 'react'
  2. import type { RecallSignals, VideoMatchEnrichedVO } from '../api/types'
  3. /**
  4. * 精排参数——前后端同构的单一来源。
  5. *
  6. * 公式 (VIDEO):
  7. * sim_norm = clip((sim - lower) / (1 - lower), 0, 1)
  8. * rov_norm = clip((rov - rovClipLow) / (rovClipHigh - rovClipLow), 0, 1)
  9. * composite = alpha × boost × sim_norm + (1 - alpha) × rov_norm
  10. * boost 仅作用于相关性分(解构维度权重),ROV 是视频粒度不加 boost
  11. * ROV 缺失时: composite = alpha × boost × sim_norm
  12. *
  13. * 公式 (ARTICLE):
  14. * sim_norm = clip((sim - lower) / (1 - lower), 0, 1)
  15. * ROV 缺失时退化为纯 sim 排序: composite = sim_norm
  16. *
  17. * 公式 (MATERIAL):
  18. * sim_norm = clip((sim - simThreshold) / (1 - simThreshold), 0, 1)
  19. * qualityScore = wCtr×ctr + wCvr×cvr + wRoi×roi + wOpenRate×openRate + wFissionRate×fissionRate
  20. * composite = alpha × boost × sim_norm + (1 - alpha) × qualityScore
  21. * 各维度直接用原始效率比率,无投放数据素材 qualityScore = 0
  22. */
  23. export interface RankingParams {
  24. simThreshold: number
  25. simThresholdsByCode: Record<string, number>
  26. /** ROV 归一化下界(clip 低值) */
  27. rovClipLow: number
  28. /** ROV 归一化上界(clip 高值) */
  29. rovClipHigh: number
  30. /** 相关性 VS 质量的权衡权重 [0, 1],VIDEO/ARTICLE/MATERIAL 通用 */
  31. alpha: number
  32. /** 兜底 boost:仅当 configCode 缺失(null/undefined)时使用,已知维度走 getDefaultBoostForCode */
  33. deconstructBoost: number
  34. /** 按维度独立 boost —— 每个 configCode 可单独设置,覆盖维度默认值 */
  35. boostsByCode: Record<string, number>
  36. /** 素材质量子维度权重——CTR 百分位,默认 0.2 */
  37. wCtr: number
  38. /** 素材质量子维度权重——CVR 百分位,默认 0.2 */
  39. wCvr: number
  40. /** 素材质量子维度权重——ROI 百分位,默认 0.2 */
  41. wRoi: number
  42. /** 素材质量子维度权重——小程序打开率 百分位,默认 0.2 */
  43. wOpenRate: number
  44. /** 素材质量子维度权重——T0裂变率 百分位,默认 0.2 */
  45. wFissionRate: number
  46. /** 素材质量缺失策略:"group" | "shrink" */
  47. materialMissingStrategy: 'group' | 'shrink'
  48. /** 文章质量子维度权重——阅读,默认 0.4 */
  49. wRead: number
  50. /** 文章质量子维度权重——打开率,默认 0.3 */
  51. wOpen: number
  52. /** 文章质量子维度权重——裂变率,默认 0.3 */
  53. wFission: number
  54. }
  55. /** 维度 boost 取值范围 */
  56. export const BOOST_MIN = 0.1
  57. export const BOOST_MAX = 1
  58. const TOPIC_CONFIG_CODE = 'VIDEO_TOPIC'
  59. /** 各维度默认 boost:选题 1,其余 0.4 */
  60. export function getDefaultBoostForCode(code: string): number {
  61. return code === TOPIC_CONFIG_CODE ? 1 : 0.4
  62. }
  63. export const DEFAULT_RANKING_PARAMS: RankingParams = {
  64. simThreshold: 0.65,
  65. simThresholdsByCode: {},
  66. boostsByCode: {},
  67. rovClipLow: 0,
  68. rovClipHigh: 0.07,
  69. alpha: 0.6,
  70. deconstructBoost: 0.4,
  71. wCtr: 0.2,
  72. wCvr: 0.2,
  73. wRoi: 0.2,
  74. wOpenRate: 0.2,
  75. wFissionRate: 0.2,
  76. materialMissingStrategy: 'group',
  77. wRead: 0.4,
  78. wOpen: 0.3,
  79. wFission: 0.3,
  80. }
  81. export interface ScoreBreakdown {
  82. composite: number
  83. simNorm: number
  84. rovNorm: number
  85. boost: number
  86. lowerBound: number
  87. passesThreshold: boolean
  88. /** 精排加权质量分:素材=wCtr·ctrScore+wCvr·cvrScore+wRoi·roiScore+wOpenRate·openRateScore+wFissionRate·fissionRateScore;视频=rov_norm */
  89. weightedQuality?: number
  90. /** 素材质量缺失时置 true,调用方按策略单独成组 */
  91. qualityMissing?: boolean
  92. }
  93. const clip01 = (x: number) => Math.max(0, Math.min(1, x))
  94. /** signals.quality 缺失时,从 materialDetail.quality 构造质量信号 */
  95. function materialQualityFromDetail(
  96. item: VideoMatchEnrichedVO,
  97. ): RecallSignals['quality'] | undefined {
  98. const q = item.materialDetail?.quality
  99. if (!q) return undefined
  100. const ctr = q.ctrScore
  101. const cvr = q.cvrScore
  102. const viral = q.fissionRateScore
  103. const roi = q.roiScore
  104. const openRateScore = q.openRateScore
  105. const hasData = [ctr, cvr, viral, roi, openRateScore].some((v) => v != null && Number.isFinite(v))
  106. if (!hasData) return undefined
  107. return {
  108. hasData: true,
  109. ctr: ctr ?? null,
  110. cvr: cvr ?? null,
  111. viral: viral ?? null,
  112. roi: roi ?? null,
  113. openRateScore: openRateScore ?? null,
  114. readScore: null,
  115. openScore: null,
  116. fissionScore: null,
  117. totalRead: null,
  118. avgRead: null,
  119. openRate: null,
  120. fissionRate: null,
  121. publishCount: null,
  122. }
  123. }
  124. export function effectiveSimThreshold(
  125. configCode: string | null | undefined,
  126. params: RankingParams,
  127. ): number {
  128. if (configCode && configCode in params.simThresholdsByCode) {
  129. return params.simThresholdsByCode[configCode]
  130. }
  131. return params.simThreshold
  132. }
  133. /**
  134. * 计算单条召回结果的综合得分——WP2 前后端同构版本。
  135. *
  136. * 关键修正:
  137. * - 读 signals 而非散落字段(sim/rov/quality)
  138. * - boost 按维度独立:boostsByCode[configCode] → getDefaultBoostForCode → deconstructBoost(兜底)
  139. * - ARTICLE 无 rov 时退化为纯 sim 排序
  140. * - MATERIAL 质量缺失按 signals.quality.hasData 统一判定,不再回退 0.5
  141. */
  142. export function computeCompositeScore(
  143. item: VideoMatchEnrichedVO,
  144. params: RankingParams,
  145. ): ScoreBreakdown | null {
  146. // WP2: 读 signals.sim,兼容旧 score 字段
  147. const sim = item.signals?.sim ?? item.score
  148. if (sim == null || !Number.isFinite(sim)) return null
  149. const lowerBound = effectiveSimThreshold(item.configCode, params)
  150. const denom = 1 - lowerBound
  151. const simNorm = denom > 0 ? clip01((sim - lowerBound) / denom) : 0
  152. const passesThreshold = sim >= lowerBound
  153. // 素材模态:多维质量加权
  154. if (item.modality === 'MATERIAL') {
  155. const quality = item.signals?.quality ?? materialQualityFromDetail(item)
  156. return rankMaterial(simNorm, lowerBound, passesThreshold, quality, item, params)
  157. }
  158. // VIDEO / ARTICLE 模态:ROV 公式
  159. return rankVideoArticle(simNorm, lowerBound, passesThreshold, item, params)
  160. }
  161. function rankMaterial(
  162. simNorm: number,
  163. lowerBound: number,
  164. passesThreshold: boolean,
  165. quality: RecallSignals['quality'] | undefined,
  166. item: VideoMatchEnrichedVO,
  167. params: RankingParams,
  168. ): ScoreBreakdown {
  169. const alpha = params.alpha
  170. // boost 仅作用于相关性分,质量分不加 boost
  171. const codeBoost = item.configCode
  172. ? (params.boostsByCode?.[item.configCode] ?? getDefaultBoostForCode(item.configCode))
  173. : params.deconstructBoost
  174. if (quality == null || !quality.hasData) {
  175. // group(默认):无质量数据,仅依赖相关性
  176. if (params.materialMissingStrategy === 'group') {
  177. return {
  178. composite: alpha * codeBoost * simNorm,
  179. simNorm,
  180. rovNorm: 0,
  181. boost: codeBoost,
  182. lowerBound,
  183. passesThreshold,
  184. qualityMissing: true,
  185. }
  186. }
  187. // shrink: 无先验均值时退化为 alpha × boost × simNorm
  188. return {
  189. composite: alpha * codeBoost * simNorm,
  190. simNorm,
  191. rovNorm: 0,
  192. boost: codeBoost,
  193. lowerBound,
  194. passesThreshold,
  195. }
  196. }
  197. const ctr = quality.ctr ?? 0
  198. const cvr = quality.cvr ?? 0
  199. const viral = quality.viral ?? 0
  200. const roi = quality.roi ?? 0
  201. const openRate = quality.openRateScore ?? 0
  202. const weightedQuality = params.wCtr * ctr + params.wCvr * cvr + params.wRoi * roi
  203. + params.wOpenRate * openRate + params.wFissionRate * viral
  204. const composite = alpha * codeBoost * simNorm + (1 - alpha) * weightedQuality
  205. return {
  206. composite,
  207. simNorm,
  208. rovNorm: 0,
  209. boost: codeBoost,
  210. lowerBound,
  211. passesThreshold,
  212. weightedQuality,
  213. }
  214. }
  215. function rankVideoArticle(
  216. simNorm: number,
  217. lowerBound: number,
  218. passesThreshold: boolean,
  219. item: VideoMatchEnrichedVO,
  220. params: RankingParams,
  221. ): ScoreBreakdown {
  222. // 按维度独立 boost:优先取 boostsByCode[configCode],回退维度默认值,未知维度用 deconstructBoost
  223. const codeBoost = item.configCode
  224. ? (params.boostsByCode?.[item.configCode] ?? getDefaultBoostForCode(item.configCode))
  225. : params.deconstructBoost
  226. // ARTICLE 模态:优先用质量分(read/open/fission),无质量数据时退化为纯 sim
  227. if (item.modality === 'ARTICLE') {
  228. const qs = item.signals?.quality
  229. if (qs?.hasData && qs.readScore != null && qs.openScore != null && qs.fissionScore != null) {
  230. const qualTotalW = params.wRead + params.wOpen + params.wFission || 1
  231. const qualityScore = (params.wRead * qs.readScore
  232. + params.wOpen * qs.openScore
  233. + params.wFission * qs.fissionScore) / qualTotalW
  234. const composite = params.alpha * codeBoost * simNorm + (1 - params.alpha) * qualityScore
  235. return { composite, simNorm, rovNorm: 0, boost: codeBoost, lowerBound, passesThreshold, weightedQuality: qualityScore }
  236. }
  237. // 无质量数据 → 纯 sim
  238. return { composite: codeBoost * params.alpha * simNorm, simNorm, rovNorm: 0, boost: codeBoost, lowerBound, passesThreshold }
  239. }
  240. // WP2: 读 signals.rov,兼容旧 videoDetail.rov
  241. const rov = item.signals?.rov ?? undefined
  242. const hasRov = rov != null && Number.isFinite(rov)
  243. if (!hasRov) {
  244. const composite = codeBoost * params.alpha * simNorm
  245. return { composite, simNorm, rovNorm: 0, boost: codeBoost, lowerBound, passesThreshold }
  246. }
  247. const rovDenom = params.rovClipHigh - params.rovClipLow
  248. const rovNorm = rovDenom > 0 ? clip01((rov - params.rovClipLow) / rovDenom) : 0
  249. const composite = params.alpha * codeBoost * simNorm + (1 - params.alpha) * rovNorm
  250. return { composite, simNorm, rovNorm, boost: codeBoost, lowerBound, passesThreshold, weightedQuality: rovNorm }
  251. }
  252. const STORAGE_KEY = 'vector_recall_ranking_params'
  253. /** 维度 boost 默认值变更版本;升级后清理旧版 localStorage 脏数据 */
  254. const RANKING_STORAGE_VERSION = 3
  255. const RANKING_VERSION_KEY = 'vector_recall_ranking_params_version'
  256. /** 旧版 UI 常见落盘值(deconstructBoost 兜底 / 批量微调),不等于新版维度默认时应清除 */
  257. const LEGACY_BOOST_SNAPSHOTS = new Set([0.55, 0.6, 0.65, 1.0])
  258. function clampBoost(v: number): number {
  259. return Math.max(BOOST_MIN, Math.min(BOOST_MAX, v))
  260. }
  261. function sanitizeBoostsByCode(boosts: Record<string, number> | undefined): Record<string, number> {
  262. if (!boosts) return {}
  263. const next: Record<string, number> = {}
  264. for (const [code, val] of Object.entries(boosts)) {
  265. if (!Number.isFinite(val)) continue
  266. const expected = getDefaultBoostForCode(code)
  267. // 保留用户真实自定义;清除旧版自动落盘的 0.6/0.65 等快照
  268. if (val === expected || !LEGACY_BOOST_SNAPSHOTS.has(val)) {
  269. next[code] = clampBoost(val)
  270. }
  271. }
  272. return next
  273. }
  274. function migrateDeconstructBoost(v: unknown): number {
  275. if (typeof v !== 'number' || !Number.isFinite(v)) return DEFAULT_RANKING_PARAMS.deconstructBoost
  276. // 旧版默认 1.0 → 新版默认 0.4
  277. if (v === 1.0) return DEFAULT_RANKING_PARAMS.deconstructBoost
  278. return clampBoost(v)
  279. }
  280. function loadFromStorage(): RankingParams {
  281. try {
  282. const raw = localStorage.getItem(STORAGE_KEY)
  283. if (!raw) return DEFAULT_RANKING_PARAMS
  284. const parsed = JSON.parse(raw) as Record<string, unknown>
  285. const storedVersion = Number(localStorage.getItem(RANKING_VERSION_KEY) || 0)
  286. const needsBoostMigration = storedVersion < RANKING_STORAGE_VERSION
  287. // WP2 迁移:rovP5/rovP95 → rovClipLow/rovClipHigh
  288. if (parsed.rovClipLow === undefined && typeof parsed.rovP5 === 'number') {
  289. parsed.rovClipLow = parsed.rovP5
  290. }
  291. if (parsed.rovClipHigh === undefined && typeof parsed.rovP95 === 'number') {
  292. parsed.rovClipHigh = parsed.rovP95
  293. }
  294. // 清理已迁移/废弃的字段,避免脏数据残留
  295. delete parsed.rovP5
  296. delete parsed.rovP95
  297. delete parsed.wSim
  298. delete parsed.wViral // 旧版 3-weights → 新版 5-weights
  299. const boostsByCode = needsBoostMigration
  300. ? sanitizeBoostsByCode(parsed.boostsByCode as Record<string, number> | undefined)
  301. : ((parsed.boostsByCode as Record<string, number>) ?? {})
  302. const params = {
  303. ...DEFAULT_RANKING_PARAMS,
  304. ...parsed,
  305. boostsByCode,
  306. deconstructBoost: needsBoostMigration
  307. ? migrateDeconstructBoost(parsed.deconstructBoost)
  308. : clampBoost(
  309. typeof parsed.deconstructBoost === 'number'
  310. ? parsed.deconstructBoost
  311. : DEFAULT_RANKING_PARAMS.deconstructBoost,
  312. ),
  313. simThresholdsByCode:
  314. (parsed.simThresholdsByCode as Record<string, number>) ?? {},
  315. } as RankingParams
  316. if (needsBoostMigration) {
  317. localStorage.setItem(RANKING_VERSION_KEY, String(RANKING_STORAGE_VERSION))
  318. }
  319. return params
  320. } catch {
  321. return DEFAULT_RANKING_PARAMS
  322. }
  323. }
  324. function saveToStorage(p: RankingParams) {
  325. try {
  326. localStorage.setItem(STORAGE_KEY, JSON.stringify(p))
  327. localStorage.setItem(RANKING_VERSION_KEY, String(RANKING_STORAGE_VERSION))
  328. } catch {
  329. // localStorage 失败时静默,当次会话仍可用
  330. }
  331. }
  332. /** 展开 boostsByCode:未单独配置的 configCode 使用维度默认值 */
  333. export function expandRankingBoosts(r: RankingParams, codes: string[]): RankingParams {
  334. const expanded: Record<string, number> = { ...r.boostsByCode }
  335. for (const code of codes) {
  336. if (!(code in expanded)) expanded[code] = getDefaultBoostForCode(code)
  337. }
  338. return { ...r, boostsByCode: expanded }
  339. }
  340. /** 召回请求用精排参数:展开维度 boost + 完整字段 */
  341. export function rankingForRequest(r: RankingParams, codes: string[]): RankingParams {
  342. return toRankingPayload(expandRankingBoosts(r, codes))
  343. }
  344. /** 随召回请求提交的精排参数(字段与后端 RankingSpec 对齐) */
  345. export function toRankingPayload(params: RankingParams): RankingParams {
  346. return {
  347. simThreshold: params.simThreshold,
  348. simThresholdsByCode: params.simThresholdsByCode ?? {},
  349. rovClipLow: params.rovClipLow,
  350. rovClipHigh: params.rovClipHigh,
  351. alpha: params.alpha,
  352. deconstructBoost: params.deconstructBoost,
  353. boostsByCode: params.boostsByCode ?? {},
  354. wCtr: params.wCtr,
  355. wCvr: params.wCvr,
  356. wRoi: params.wRoi,
  357. wOpenRate: params.wOpenRate,
  358. wFissionRate: params.wFissionRate,
  359. materialMissingStrategy: params.materialMissingStrategy,
  360. wRead: params.wRead,
  361. wOpen: params.wOpen,
  362. wFission: params.wFission,
  363. }
  364. }
  365. export function useRankingParams(): [RankingParams, (next: RankingParams) => void] {
  366. const [params, setParams] = useState<RankingParams>(() => loadFromStorage())
  367. useEffect(() => {
  368. saveToStorage(params)
  369. }, [params])
  370. return [params, setParams]
  371. }