3 miesięcy temu · 41e2f939ca
--- a/.gitignore
+++ b/.gitignore
@@ -60,6 +60,7 @@ output
 
				 
			
 
				 # Debug output
			
 
				 .trace/
			
 
				+coast_summary/
			
 
				 .trace_test/
			
 
				 .trace_test2/
			
 
				 examples/**/output*/
			
--- a/examples/content_finder/content_finder.md
+++ b/examples/content_finder/content_finder.md
@@ -35,7 +35,7 @@ $system$
 
				 - 执行日志在 OUTPUT_DIR 下；上下文不足时可结合日志与当次 `trace_id` 目录排查。  
			
 
				 
			
 
				 ## 工作方式与技能
			
 
				-整体建议按「理解需求 → 找内容 → 筛选 → 优质账号扩展 → 落盘 → 自检 → 入库 → 爬取计划」推进，但**不必**像流水线逐步打卡；可在中间自由推理、穿插工具。**细则与示例以技能为准**，请在执行中按需遵循：
			
 
				+整体建议按「理解需求 → 找内容 → 备选数量达到筛选标准 → 筛选 → 通过数量达标 → 优质账号扩展 → 落盘 → 自检 → 入库 → 爬取计划」推进，但**不必**像流水线逐步打卡；可在中间自由推理、穿插工具。**细则与示例以技能为准**，请在执行中按需遵循：
			
 
				 
			
 
				 | 技能 | 用途 |
			
 
				 |------|------|
			
@@ -54,12 +54,11 @@ $system$
 
				 ## 流程自检
			
 
				 **在宣称任务完成或结束对话前，须逐项满足下列要求；任一项未满足则继续执行，不得提前收尾。**
			
 
				 
			
 
				-### 1. 寻找是否覆盖策略输出词
			
 
				-需求阶段给出的 case 出发与特征出发搜索词是否都已用于寻找（在次数与分页限制内）。
			
 
				+### 1. 寻找的数量达到20+，可直接先进入筛选阶段。如果筛选后数量不满足要求，再尝试其他搜索词。
			
 
				 
			
 
				 ### 2. 筛选与画像是否符合约定
			
 
				 - case 出发结果：先基础筛选与 case 规则筛选，**满 6 分**可直接进最终池，无需再拉画像。  
			
 
				-- 其余条目：是否已用 **`batch_fetch_portraits`** 做批量画像；`candidates_json` 中 `douyin_user_videos` 来源设 `try_account_fallback: false`，`douyin_search` / `douyin_search_tikhub` 来源设 `true`（默认）。  
			
 
				+- 其余条目：是否已用 **`batch_fetch_portraits`** 做批量画像；`candidates_json` 中 `douyin_user_videos` 来源设 `try_account_fallback: false`，`douyin_search` / `douyin_search_tikhub` 来源设 `true`（默认）。
			
 
				 - 搜索来源是否在批量结果中体现了账号兜底（当允许 fallback 时），而非在未尝试兜底的情况下把 `portrait_data.source` 设为 `none`；若工具侧均失败，须在 `reason` 中说明。  
			
 
				 
			
 
				 ### 除非没有在筛选阶段获取到作者画像，否则不能跳过**优质账号扩展**。
			
--- a/examples/content_finder/core.py
+++ b/examples/content_finder/core.py
@@ -95,9 +95,9 @@ from tools import (
 
				 logger = logging.getLogger(__name__)
			
 
				 
			
 
				 # 默认搜索词
			
 
				-DEFAULT_QUERY = "民生"
			
 
				-DEFAULT_SUGGESTION = "民生"
			
 
				-DEFAULT_DEMAND_ID = 21050
			
 
				+DEFAULT_QUERY = "贪腐案例,赃物"
			
 
				+DEFAULT_SUGGESTION = "用户想了解贪腐案例中的涉案财物和金额"
			
 
				+DEFAULT_DEMAND_ID = 21820
			
 
				 
			
 
				 
			
 
				 def extract_assistant_text(message: Message) -> str:
			
--- a/examples/content_finder/db/store_results.py
+++ b/examples/content_finder/db/store_results.py
@@ -74,14 +74,13 @@ def upsert_good_authors(
 
				     if not accounts:
			
 
				         return 0
			
 
				 
			
 
				-    sql = """
			
 
				-    INSERT INTO demand_find_author (trace_id, author_name, author_link, elderly_ratio, elderly_tgi, remark, content_tags)
			
 
				-    VALUES (%s, %s, %s, %s, %s, %s, %s)
			
 
				-    ON DUPLICATE KEY UPDATE
			
 
				-      elderly_ratio = VALUES(elderly_ratio),
			
 
				-      elderly_tgi = VALUES(elderly_tgi),
			
 
				-      remark = VALUES(remark)
			
 
				+    insert_sql = """
			
 
				+    INSERT INTO demand_find_author (
			
 
				+      trace_id, channel, author_id, author_name, author_link, elderly_ratio, elderly_tgi, remark, content_tags
			
 
				+    )
			
 
				+    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
			
 
				     """
			
 
				+    exists_sql = "SELECT 1 FROM demand_find_author WHERE author_id = %s LIMIT 1"
			
 
				     with conn.cursor() as cur:
			
 
				         rows = 0
			
 
				         for acc in accounts:
			
@@ -96,17 +95,26 @@ def upsert_good_authors(
 
				             sec_uid = acc.get("author_sec_uid") or acc.get("sec_uid")
			
 
				             if not author_link and sec_uid:
			
 
				                 author_link = f"https://www.douyin.com/user/{sec_uid}"
			
 
				-            if not author_name or not author_link:
			
 
				+            author_id = str(sec_uid).strip() if sec_uid is not None else ""
			
 
				+            if not author_name or not author_link or not author_id:
			
 
				                 continue
			
 
				 
			
 
				             elderly_ratio = acc.get("age_50_plus_ratio") or ""
			
 
				             elderly_tgi = acc.get("age_50_plus_tgi") or ""
			
 
				             remark = acc.get("reason") or acc.get("remark") or ""
			
 
				             content_tags = _normalize_content_tags(acc.get("content_tags"))
			
 
				+
			
 
				+            # author_id 已存在时跳过，避免重复新增
			
 
				+            cur.execute(exists_sql, (author_id,))
			
 
				+            if cur.fetchone():
			
 
				+                continue
			
 
				+
			
 
				             cur.execute(
			
 
				-                sql,
			
 
				+                insert_sql,
			
 
				                 (
			
 
				                     trace_id,
			
 
				+                    "抖音",
			
 
				+                    author_id,
			
 
				                     author_name,
			
 
				                     author_link,
			
 
				                     str(elderly_ratio) if elderly_ratio is not None else None,