demand_quality.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745
  1. """需求质量判断:事件性、老年性 LLM 评分。
  2. 流程(串行两次 LLM,评分阶段互不截断):
  3. 1. 微信指数达标后,构建待评需求(特征点组合 + 单个有匹配元素 + 有匹配的短语)
  4. 2. 对全部待评需求执行事件性 LLM 评分(当前临时下线)
  5. 3. 对同一批全部待评需求执行老年性 LLM 评分
  6. 4. 导出表 / ODPS 写入时再过滤:标题保留(微信指数 + 灵感/目的点匹配)+ 老年性达标(事件性当前临时下线)
  7. """
  8. from __future__ import annotations
  9. import json
  10. import re
  11. import time
  12. from typing import Any
  13. from app.core.open_router_llm import OpenRouterCallError, create_chat_completion
  14. from app.hot_content.demand_export import ITEM_TYPE_ELEMENT, ITEM_TYPE_PHRASE
  15. from app.hot_content.exceptions import HotContentFlowError
  16. TYPE_FEATURE_POINT = "特征点"
  17. TYPE_PHRASE = "短语"
  18. def _normalize_demand_key(value: str) -> str:
  19. return "".join(str(value or "").split())
  20. def _dedupe_texts(texts: list[str]) -> list[str]:
  21. deduped: list[str] = []
  22. seen: set[str] = set()
  23. for raw in texts:
  24. text = str(raw).strip()
  25. if not text:
  26. continue
  27. keys = {text, _normalize_demand_key(text)}
  28. if keys & seen:
  29. continue
  30. seen.update(keys)
  31. deduped.append(text)
  32. return deduped
  33. def _has_matched_demand(row: dict[str, Any]) -> bool:
  34. return bool(str(row.get("matched_demand") or "").strip())
  35. def _record_wxindex_score(export_rows: list[dict[str, Any]]) -> float:
  36. scores: list[float] = []
  37. for row in export_rows:
  38. try:
  39. scores.append(float(row.get("wxindex_latest_score") or 0))
  40. except (TypeError, ValueError):
  41. continue
  42. return max(scores) if scores else 0.0
  43. def passes_wxindex_gate(
  44. export_rows: list[dict[str, Any]],
  45. *,
  46. wxindex_threshold: float,
  47. ) -> bool:
  48. """记录级微信指数是否达标,用于决定是否进入质量判断。"""
  49. return _record_wxindex_score(export_rows) >= wxindex_threshold
  50. def _repair_json_text(text: str) -> str:
  51. repaired = text.strip()
  52. repaired = re.sub(r",\s*([}\]])", r"\1", repaired)
  53. repaired = repaired.replace(""", '"').replace(""", '"')
  54. repaired = repaired.replace("'", "'").replace("'", "'")
  55. return repaired
  56. def _extract_score_demands_fallback(
  57. raw: str,
  58. candidates: list[dict[str, str]],
  59. ) -> dict[str, Any] | None:
  60. """标准 json.loads 失败时,按候选词宽松提取 score/items。"""
  61. items: list[dict[str, Any]] = []
  62. for candidate in candidates:
  63. demand_type = str(candidate.get("demand_type") or "").strip()
  64. demand_text = str(candidate.get("demand_text") or "").strip()
  65. if not demand_type or not demand_text:
  66. continue
  67. escaped_text = re.escape(demand_text)
  68. escaped_type = re.escape(demand_type)
  69. score_patterns = [
  70. (
  71. rf'"demand_type"\s*:\s*"{escaped_type}"\s*,\s*'
  72. rf'"demand_text"\s*:\s*"{escaped_text}"\s*,\s*'
  73. rf'"score"\s*:\s*([0-9]+(?:\.[0-9]+)?)'
  74. ),
  75. (
  76. rf'"demand_text"\s*:\s*"{escaped_text}"\s*,\s*'
  77. rf'"demand_type"\s*:\s*"{escaped_type}"\s*,\s*'
  78. rf'"score"\s*:\s*([0-9]+(?:\.[0-9]+)?)'
  79. ),
  80. (
  81. rf'"demand_text"\s*:\s*"{escaped_text}"'
  82. rf'[\s\S]{{0,400}}?"score"\s*:\s*([0-9]+(?:\.[0-9]+)?)'
  83. ),
  84. ]
  85. score_value: float | None = None
  86. for pattern in score_patterns:
  87. match = re.search(pattern, raw)
  88. if match:
  89. score_value = _normalize_score(match.group(1))
  90. break
  91. if score_value is None:
  92. continue
  93. reason = ""
  94. reason_match = re.search(
  95. rf'"demand_text"\s*:\s*"{escaped_text}"'
  96. rf'[\s\S]{{0,600}}?"reason"\s*:\s*"((?:[^"\\]|\\.)*)"',
  97. raw,
  98. )
  99. if reason_match:
  100. reason = (
  101. reason_match.group(1)
  102. .replace('\\"', '"')
  103. .replace("\\n", "\n")
  104. .replace("\\t", "\t")
  105. )
  106. items.append(
  107. {
  108. "demand_type": demand_type,
  109. "demand_text": demand_text,
  110. "score": score_value,
  111. "reason": reason,
  112. }
  113. )
  114. if not items:
  115. return None
  116. source_match = re.search(r'"source"\s*:\s*"((?:[^"\\]|\\.)*)"', raw)
  117. return {
  118. "source": source_match.group(1) if source_match else "",
  119. "items": items,
  120. }
  121. def _extract_json_object(
  122. text: str,
  123. *,
  124. candidates: list[dict[str, str]] | None = None,
  125. ) -> dict[str, Any]:
  126. raw = text.strip()
  127. if raw.startswith("```"):
  128. raw = re.sub(r"^```(?:json)?\s*", "", raw)
  129. raw = re.sub(r"\s*```$", "", raw)
  130. blocks = [raw]
  131. match = re.search(r"\{[\s\S]*\}", raw)
  132. if match:
  133. blocks.append(match.group(0))
  134. for block in blocks:
  135. for candidate_text in (block, _repair_json_text(block)):
  136. try:
  137. parsed = json.loads(candidate_text)
  138. if isinstance(parsed, dict):
  139. return parsed
  140. except json.JSONDecodeError:
  141. continue
  142. if candidates:
  143. for block in blocks:
  144. fallback = _extract_score_demands_fallback(block, candidates)
  145. if fallback:
  146. return fallback
  147. raise HotContentFlowError("llm output is not json object")
  148. def _candidate_key(demand_type: str, demand_text: str) -> tuple[str, str]:
  149. return demand_type.strip(), _normalize_demand_key(demand_text)
  150. def build_matched_element_texts(export_rows: list[dict[str, Any]]) -> list[str]:
  151. return _dedupe_texts(
  152. [
  153. str(row.get("item_text") or "").strip()
  154. for row in export_rows
  155. if str(row.get("item_type") or "") == ITEM_TYPE_ELEMENT
  156. and _has_matched_demand(row)
  157. ]
  158. )
  159. def build_feature_combo_text(export_rows: list[dict[str, Any]]) -> str:
  160. return " ".join(build_matched_element_texts(export_rows))
  161. def _append_feature_point_candidate(
  162. candidates: list[dict[str, str]],
  163. seen: set[tuple[str, str]],
  164. demand_text: str,
  165. ) -> None:
  166. text = str(demand_text or "").strip()
  167. if not text:
  168. return
  169. key = _candidate_key(TYPE_FEATURE_POINT, text)
  170. if key in seen:
  171. return
  172. seen.add(key)
  173. candidates.append(
  174. {
  175. "demand_type": TYPE_FEATURE_POINT,
  176. "demand_text": text,
  177. }
  178. )
  179. def build_quality_candidates(
  180. export_rows: list[dict[str, Any]],
  181. *,
  182. wxindex_threshold: float,
  183. ) -> list[dict[str, str]]:
  184. """微信指数达标时,构建特征点组合、单个元素与短语三类待评需求。"""
  185. if not passes_wxindex_gate(export_rows, wxindex_threshold=wxindex_threshold):
  186. return []
  187. candidates: list[dict[str, str]] = []
  188. seen: set[tuple[str, str]] = set()
  189. feature_combo = build_feature_combo_text(export_rows)
  190. _append_feature_point_candidate(candidates, seen, feature_combo)
  191. for element_text in build_matched_element_texts(export_rows):
  192. _append_feature_point_candidate(candidates, seen, element_text)
  193. for row in export_rows:
  194. if str(row.get("item_type") or "") != ITEM_TYPE_PHRASE:
  195. continue
  196. if not _has_matched_demand(row):
  197. continue
  198. phrase_text = str(row.get("item_text") or "").strip()
  199. if not phrase_text:
  200. continue
  201. key = _candidate_key(TYPE_PHRASE, phrase_text)
  202. if key in seen:
  203. continue
  204. seen.add(key)
  205. candidates.append(
  206. {
  207. "demand_type": TYPE_PHRASE,
  208. "demand_text": phrase_text,
  209. }
  210. )
  211. return candidates
  212. def _normalize_score(value: Any) -> float | None:
  213. try:
  214. score = float(value)
  215. except (TypeError, ValueError):
  216. return None
  217. if score < 0:
  218. return 0.0
  219. if score > 10:
  220. return 10.0
  221. return score
  222. def _build_score_lookup(result_json: dict[str, Any] | None) -> dict[tuple[str, str], dict[str, Any]]:
  223. lookup: dict[tuple[str, str], dict[str, Any]] = {}
  224. if not isinstance(result_json, dict):
  225. return lookup
  226. items = result_json.get("items") or []
  227. if not isinstance(items, list):
  228. return lookup
  229. for item in items:
  230. if not isinstance(item, dict):
  231. continue
  232. demand_type = str(item.get("demand_type") or "").strip()
  233. demand_text = str(item.get("demand_text") or "").strip()
  234. if not demand_type or not demand_text:
  235. continue
  236. lookup[_candidate_key(demand_type, demand_text)] = item
  237. return lookup
  238. def lookup_quality_scores(
  239. *,
  240. demand_type: str,
  241. demand_text: str,
  242. event_sense_json: dict[str, Any] | None,
  243. senior_fit_json: dict[str, Any] | None,
  244. ) -> tuple[float | None, float | None]:
  245. key = _candidate_key(demand_type, demand_text)
  246. event_item = _build_score_lookup(event_sense_json).get(key)
  247. senior_item = _build_score_lookup(senior_fit_json).get(key)
  248. event_score = _normalize_score(event_item.get("score")) if event_item else None
  249. senior_score = _normalize_score(senior_item.get("score")) if senior_item else None
  250. return event_score, senior_score
  251. def quality_passed(
  252. *,
  253. demand_type: str,
  254. demand_text: str,
  255. event_sense_json: dict[str, Any] | None,
  256. senior_fit_json: dict[str, Any] | None,
  257. event_threshold: float,
  258. senior_threshold: float,
  259. ) -> bool:
  260. event_score, senior_score = lookup_quality_scores(
  261. demand_type=demand_type,
  262. demand_text=demand_text,
  263. event_sense_json=event_sense_json,
  264. senior_fit_json=senior_fit_json,
  265. )
  266. # TODO: 事件性判断临时下线,恢复时取消下方注释并删除老年性单判逻辑
  267. # if event_score is None or senior_score is None:
  268. # return False
  269. # return event_score >= event_threshold and senior_score >= senior_threshold
  270. if senior_score is None:
  271. return False
  272. return senior_score >= senior_threshold
  273. def attach_quality_scores_to_export_rows(
  274. export_rows: list[dict[str, Any]],
  275. *,
  276. event_sense_json: dict[str, Any] | None,
  277. senior_fit_json: dict[str, Any] | None,
  278. ) -> list[dict[str, Any]]:
  279. rows: list[dict[str, Any]] = []
  280. for row in export_rows:
  281. item_type = str(row.get("item_type") or "")
  282. item_text = str(row.get("item_text") or "").strip()
  283. if item_type == ITEM_TYPE_ELEMENT and item_text and _has_matched_demand(row):
  284. event_score, senior_score = lookup_quality_scores(
  285. demand_type=TYPE_FEATURE_POINT,
  286. demand_text=item_text,
  287. event_sense_json=event_sense_json,
  288. senior_fit_json=senior_fit_json,
  289. )
  290. elif item_type == ITEM_TYPE_PHRASE and item_text:
  291. event_score, senior_score = lookup_quality_scores(
  292. demand_type=TYPE_PHRASE,
  293. demand_text=item_text,
  294. event_sense_json=event_sense_json,
  295. senior_fit_json=senior_fit_json,
  296. )
  297. else:
  298. event_score, senior_score = None, None
  299. rows.append(
  300. {
  301. **row,
  302. "event_sense_score": event_score,
  303. "senior_fit_score": senior_score,
  304. }
  305. )
  306. return rows
  307. def _normalize_llm_items(
  308. parsed: dict[str, Any],
  309. candidates: list[dict[str, str]],
  310. ) -> list[dict[str, Any]]:
  311. candidate_lookup = {
  312. _candidate_key(item["demand_type"], item["demand_text"]): item
  313. for item in candidates
  314. }
  315. raw_items = parsed.get("items") or []
  316. if not isinstance(raw_items, list):
  317. raw_items = []
  318. items: list[dict[str, Any]] = []
  319. seen: set[tuple[str, str]] = set()
  320. for raw in raw_items:
  321. if not isinstance(raw, dict):
  322. continue
  323. demand_type = str(raw.get("demand_type") or "").strip()
  324. demand_text = str(raw.get("demand_text") or "").strip()
  325. if not demand_type or not demand_text:
  326. continue
  327. key = _candidate_key(demand_type, demand_text)
  328. if key not in candidate_lookup or key in seen:
  329. continue
  330. seen.add(key)
  331. score = _normalize_score(raw.get("score"))
  332. if score is None:
  333. continue
  334. items.append(
  335. {
  336. "demand_type": demand_type,
  337. "demand_text": demand_text,
  338. "score": score,
  339. "reason": str(raw.get("reason") or "").strip(),
  340. }
  341. )
  342. return items
  343. def _llm_score_demands(
  344. *,
  345. channel_content_id: str,
  346. candidates: list[dict[str, str]],
  347. system_prompt: str,
  348. model: str,
  349. max_attempts: int,
  350. retry_sleep_seconds: float,
  351. max_tokens: int,
  352. score_field: str,
  353. ) -> dict[str, Any]:
  354. if not candidates:
  355. return {"source": channel_content_id, "items": []}
  356. user_payload = {
  357. "source": channel_content_id,
  358. "demands": candidates,
  359. "output_schema": {
  360. "source": "string",
  361. "items": [
  362. {
  363. "demand_type": "string, 特征点 or 短语",
  364. "demand_text": "string, must match one demand in demands",
  365. "score": "number, 0-10",
  366. "reason": "string",
  367. }
  368. ],
  369. },
  370. "constraints": [
  371. "仅对给定 demands 逐项评分,不得新增或遗漏",
  372. "score 为 0-10 的数字,越大表示越符合判断标准",
  373. "demand_type 与 demand_text 必须与输入完全一致",
  374. "reason 字段请用中文表述,不要使用英文双引号 \"",
  375. "仅输出 JSON 对象,不要 markdown 代码块",
  376. ],
  377. }
  378. last_error: Exception | None = None
  379. for attempt in range(1, max(max_attempts, 1) + 1):
  380. try:
  381. resp = create_chat_completion(
  382. [
  383. {"role": "system", "content": system_prompt},
  384. {
  385. "role": "user",
  386. "content": json.dumps(user_payload, ensure_ascii=False),
  387. },
  388. ],
  389. model=model or None,
  390. temperature=0,
  391. max_tokens=max(max_tokens, 1),
  392. )
  393. parsed = _extract_json_object(
  394. str(resp.get("content") or ""),
  395. candidates=candidates,
  396. )
  397. parsed.setdefault("source", channel_content_id)
  398. items = _normalize_llm_items(parsed, candidates)
  399. return {
  400. "source": channel_content_id,
  401. "score_field": score_field,
  402. "items": items,
  403. }
  404. except (OpenRouterCallError, HotContentFlowError) as exc:
  405. last_error = exc
  406. if attempt < max(max_attempts, 1):
  407. time.sleep(max(retry_sleep_seconds, 0))
  408. raise HotContentFlowError(
  409. f"llm {score_field} scoring failed for {channel_content_id}: {last_error}"
  410. ) from last_error
  411. def llm_score_event_sense(
  412. *,
  413. channel_content_id: str,
  414. candidates: list[dict[str, str]],
  415. model: str,
  416. max_attempts: int,
  417. retry_sleep_seconds: float,
  418. max_tokens: int,
  419. ) -> dict[str, Any]:
  420. system_prompt = """
  421. 你是一个事件表达精确度评估专家。
  422. # 任务
  423. 我会提供若干短语或词组组合(可以是特征词的拼接)。
  424. 请逐项判断:该短语/词组能否准确表达出一个具体的事件。
  425. 表达越确切、事件越具体,得分越高。
  426. # 评分标准(0-10)
  427. 9-10:
  428. 精准指向某一具体事件,无歧义,可直接还原事件内容
  429. 7-8:
  430. 大体可判断是某类事件,但存在少量歧义或信息不完整
  431. 4-6:
  432. 有一定事件指向,但过于泛化,无法锁定具体事件
  433. 1-3:
  434. 偏属性/概念描述,几乎无法对应具体事件
  435. 0:
  436. 完全无法表达任何具体事件
  437. # 评估维度(综合考量)
  438. - 主体明确性:是否点出了事件涉及的人/物/组织
  439. - 动作/结果明确性:是否体现了发生了什么
  440. - 时空限定性:是否暗示了特定时间或地点
  441. - 可还原性:仅凭该短语,能否在脑中重建出事件场景
  442. # 输出格式
  443. 严格输出 JSON,禁止输出任何其他内容。
  444. """
  445. return _llm_score_demands(
  446. channel_content_id=channel_content_id,
  447. candidates=candidates,
  448. system_prompt=system_prompt,
  449. model=model,
  450. max_attempts=max_attempts,
  451. retry_sleep_seconds=retry_sleep_seconds,
  452. max_tokens=max_tokens,
  453. score_field="event_sense",
  454. )
  455. def llm_score_senior_fit(
  456. *,
  457. channel_content_id: str,
  458. candidates: list[dict[str, str]],
  459. model: str,
  460. max_attempts: int,
  461. retry_sleep_seconds: float,
  462. max_tokens: int,
  463. ) -> dict[str, Any]:
  464. system_prompt = """
  465. # 角色
  466. 你是一名严格的中老年内容适老性评分专家,专门评估词组/短语对中国50岁以上中老年用户的吸引力与相关性。你的判断基于严格的用户画像,而非主观感受。你会识别并拒绝一切看似"老年"实则属于年轻群体、中产焦虑、高认知门槛或语义模糊的伪适老词组。
  467. # 核心任务
  468. 对输入的每个词组/短语,输出一个0-10的适老性评分,并给出简短判断依据。
  469. ---
  470. # 一、基础定义(严格遵守,不可修改)
  471. ## 用户画像:中国50岁以上中老年人
  472. ### 认知特点
  473. - 追求"确定性"和"安全感",偏好简单直白,拒绝烧脑与推理
  474. - 不关注新事物、抽象宏观经济、复杂金融博弈、枯燥行政程序
  475. - 对网络梗、亚文化、职场黑话不敏感甚至反感
  476. ### 文化背景
  477. - 成长于上世纪50-70年代,传统观念根深蒂固
  478. - 深受儒家文化影响,强烈的孝道观念与集体主义倾向
  479. - 处于"安享期"而非"奋斗期"
  480. - 关注"保命"(三高/心脏/防骗)而非"塑形"(减肥/发际线)
  481. - 关注"存量财产安全"而非"增量资产博弈"
  482. ### 情感需求
  483. - 核心情感:安逸、从容、被尊重
  484. - 偏好:正能量、民族自豪感、家庭温情、传统文化、同龄人故事
  485. - 反感:贩卖焦虑、激烈矛盾冲突、血腥暴力、悲惨负面内容
  486. ### 场景偏好
  487. - 接受:菜市场、公园、家庭、医院、老友聚会、怀旧场景
  488. - 排斥:写字楼、夜店、高端消费场所、极限运动
  489. ---
  490. # 二、高优品类强制评分白名单(最低分不得低于7分)
  491. 以下品类为业务侧已确认的高优内容,评分最低保底7分。
  492. 原黑名单中的"博弈策略""认知门槛高""泛人群"等扣分信号对本表品类完全失效。
  493. ## 满分区间(9-10分)
  494. - 健康养生·老年健康(含老年专属标注时满分)
  495. - 生活技巧·安全防护·反诈防骗
  496. - 公共管理·医疗卫生·医保报销
  497. - 公共管理·补贴福利·老年群体补贴
  498. - 爱国情感·民族情感
  499. - 政治事件·领袖纪念
  500. - 民生政策·惠民政策(明确老年受益时满分,泛人群降为8)
  501. ## 高分区间(8-9分)
  502. - 公共管理·治理监督·反腐
  503. - 国家实力·国际地位
  504. - 政策制度·国家统一
  505. - 外交事件·外交访问
  506. - 处世智慧·生存策略·经验总结
  507. - 处世智慧·生存策略·生活指导
  508. ## 中高分区间(7-8分)
  509. - 处世智慧·价值取向·处世哲学
  510. - 民生政策·免费福利政策
  511. - 公共管理·补贴福利·生活服务补贴
  512. - 文化概念·文化传承
  513. - 时政评议·社会评议·社会公正
  514. - 军事谋略·战略运筹·战略方案
  515. - 社会问题·国家安全事件
  516. - 国际政治·外交立场
  517. - 国际政治·双边关系(中美关系等)
  518. - 时政评议·国际关系·中美关系
  519. - 社会问题·国际问题·两岸议题
  520. - 地标景观·交通枢纽
  521. - 花卉风格·高饱和花卉
  522. ## 保底7分(区间锁定,不上浮)
  523. - 外交事件·博弈手段
  524. - 政治运作·政治博弈
  525. - 军事安全·能源安全
  526. - 公共管理·政策法规·行业规则
  527. - 社会问题·经济形势·农村农业
  528. - 民生生活·教育议题
  529. ---
  530. # 三、高优品类内部评分细则
  531. ## 细则1:叙事方式决定区间上限
  532. 同一高优品类,叙事方式决定最终得分位置:
  533. - 叙事/成就型(XX圆满完成、我国XX取得胜利)→ 取区间上限
  534. - 评议/立场型(分析XX走向、表达XX立场)→ 取区间中位
  535. - 策略/博弈型(对抗手段、如何反制)→ 取区间下限(最低7分)
  536. ## 细则2:老年专属加成
  537. 品类内容含"老年""中老年""50岁以上"等明确专属信号 → 区间内+1分(不超过10分)
  538. ## 细则3:焦虑化叙事微降(高优品类内仍适用)
  539. 使用"危机""崩溃""末日""警惕"等词渲染负面不确定性 → 区间内-1分(不低于7分保底)
  540. ---
  541. # 四、低分黑名单信号
  542. ## 强制低分信号(命中任一,评分不超过3)
  543. - 职场类:升职、副业、内卷、打工人、绩效、裁员
  544. - 年轻文化:网络梗、二次元、潮流、追星、发际线、颜值
  545. - 金融投资:炒股、基金、加密货币、理财产品、资产配置
  546. - 房产相关:买房、贷款、学区房、房价涨跌
  547. - 健身塑形:减肥、健身、马甲线、体脂率、增肌
  548. - 科技数码:手机评测、AI工具、电脑配置、游戏硬件
  549. - 高消费场景:奢侈品、出境游、米其林、高端健身房
  550. - 情绪贩卖:焦虑、内耗、emo、迷茫、躺平
  551. - 模糊悬念:无具体信息的"千万别做这件事"类表达
  552. ## 中度扣分信号(命中使评分下浮1-2分)
  553. - 内容偏泛人群,缺乏老年专属场景(如免费福利政策未明确针对老年人)
  554. - 认知门槛较高,需要背景知识才能理解(如军事专业术语密集)
  555. - 表达方式年轻化,但内容本身不排斥老年人
  556. - **焦虑化叙事**:即使话题本身适老,若使用"危机""崩溃""警惕""崩盘"等词渲染不确定性,触发此扣分信号——老年用户偏好"确定性"叙事,排斥焦虑化包装
  557. - 农村农业/教育议题:属泛人群内容,老年专属性弱,基础降至2-4分
  558. ---
  559. # 五、评分标准(0-10)
  560. - 9-10:高度契合中老年用户核心关注点、老年专属场景或强情感诉求(防骗、老年健康、养老金、民族自豪)
  561. - 7-8:对中老年用户有较强吸引力或实用价值,场景清晰(处世智慧、传统文化、家庭亲情、叙事型时政)
  562. - 5-6:有一定相关性,但中老年专属属性一般,泛人群居多(评议型时政、泛人群惠民、文化传承)
  563. - 3-4:偏年轻群体或认知门槛偏高,老年性弱
  564. - 1-2:明显面向年轻群体,中老年用户几乎不感兴趣
  565. - 0:与中老年用户完全无关,或存在强烈排斥信号
  566. ---
  567. # 五、输出规则
  568. 严格输出 JSON 对象(含 items 数组),禁止输出 JSON 之外的任何内容(无前缀、无解释、无markdown格式)。
  569. reason 字段请用中文表述,不要使用英文双引号 "。
  570. """
  571. return _llm_score_demands(
  572. channel_content_id=channel_content_id,
  573. candidates=candidates,
  574. system_prompt=system_prompt,
  575. model=model,
  576. max_attempts=max_attempts,
  577. retry_sleep_seconds=retry_sleep_seconds,
  578. max_tokens=max_tokens,
  579. score_field="senior_fit",
  580. )
  581. def filter_candidates_by_event_sense(
  582. candidates: list[dict[str, str]],
  583. event_sense_json: dict[str, Any],
  584. *,
  585. event_threshold: float,
  586. ) -> list[dict[str, str]]:
  587. lookup = _build_score_lookup(event_sense_json)
  588. passed: list[dict[str, str]] = []
  589. for candidate in candidates:
  590. key = _candidate_key(candidate["demand_type"], candidate["demand_text"])
  591. item = lookup.get(key)
  592. score = _normalize_score(item.get("score")) if item else None
  593. if score is not None and score >= event_threshold:
  594. passed.append(candidate)
  595. return passed
  596. def run_demand_quality_pipeline(
  597. *,
  598. channel_content_id: str,
  599. export_rows: list[dict[str, Any]],
  600. wxindex_threshold: float,
  601. event_threshold: float,
  602. senior_threshold: float,
  603. model: str,
  604. max_attempts: int,
  605. retry_sleep_seconds: float,
  606. max_tokens: int,
  607. ) -> tuple[dict[str, Any], dict[str, Any]]:
  608. """微信指数达标的需求:串行执行事件性、老年性 LLM,均对全量候选评分。"""
  609. candidates = build_quality_candidates(
  610. export_rows,
  611. wxindex_threshold=wxindex_threshold,
  612. )
  613. if not candidates:
  614. return {"source": channel_content_id, "items": []}, {"source": channel_content_id, "items": []}
  615. llm_kwargs = {
  616. "channel_content_id": channel_content_id,
  617. "candidates": candidates,
  618. "model": model,
  619. "max_attempts": max_attempts,
  620. "retry_sleep_seconds": retry_sleep_seconds,
  621. "max_tokens": max_tokens,
  622. }
  623. # TODO: 事件性判断临时下线,恢复时取消下方注释并删除 stub 返回
  624. # event_sense_json = llm_score_event_sense(**llm_kwargs)
  625. # event_sense_json["threshold"] = event_threshold
  626. event_sense_json = {
  627. "source": channel_content_id,
  628. "items": [],
  629. "threshold": event_threshold,
  630. }
  631. senior_fit_json = llm_score_senior_fit(**llm_kwargs)
  632. senior_fit_json["threshold"] = senior_threshold
  633. return event_sense_json, senior_fit_json