|
@@ -0,0 +1,946 @@
|
|
|
|
|
+"""
|
|
|
|
|
+HOW 解构 V9 - 点依赖关系处理
|
|
|
|
|
+
|
|
|
|
|
+V9 新特性:
|
|
|
|
|
+- 正确处理点的依赖关系:
|
|
|
|
|
+ - 灵感点 ↔ 目的点(双向互推)
|
|
|
|
|
+ - 灵感点,目的点 → 关键点(单向推导)
|
|
|
|
|
+ - 同类型点不能互推
|
|
|
|
|
+
|
|
|
|
|
+- 提取所有三类点(即使只分析灵感点)
|
|
|
|
|
+- 根据点类型动态提供可推导来源
|
|
|
|
|
+- 支持机器可模拟的推测路径
|
|
|
|
|
+
|
|
|
|
|
+输入输出结构:
|
|
|
|
|
+- 帖子信息:examples_new/{账号}/作者历史帖子/{帖子ID}.json
|
|
|
|
|
+- what解构结果:examples_new/{账号}/output/{帖子ID}_{运行日期}_{运行时间}.json
|
|
|
|
|
+- 输出结果:examples_new/{账号}/how_output/{帖子ID}_{运行日期}_{运行时间}.json
|
|
|
|
|
+"""
|
|
|
|
|
+
|
|
|
|
|
+import asyncio
|
|
|
|
|
+import json
|
|
|
|
|
+import os
|
|
|
|
|
+from typing import Dict, List
|
|
|
|
|
+from datetime import datetime
|
|
|
|
|
+
|
|
|
|
|
+from agents import Agent, Runner, trace
|
|
|
|
|
+from agents.tracing.create import custom_span
|
|
|
|
|
+from lib.my_trace import set_trace
|
|
|
|
|
+from lib.utils import read_json
|
|
|
|
|
+from lib.client import get_model
|
|
|
|
|
+
|
|
|
|
|
+MODEL_NAME = "google/gemini-2.5-flash"
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+# ============================================================================
|
|
|
|
|
+# 多模态消息构建
|
|
|
|
|
+# ============================================================================
|
|
|
|
|
+
|
|
|
|
|
+def build_post_multimodal_content(post_data: Dict) -> List[Dict]:
|
|
|
|
|
+ """构建单个帖子的多模态内容"""
|
|
|
|
|
+ images = post_data.get('images', [])
|
|
|
|
|
+ image_count = len(images)
|
|
|
|
|
+
|
|
|
|
|
+ content = []
|
|
|
|
|
+
|
|
|
|
|
+ if images:
|
|
|
|
|
+ content.append({
|
|
|
|
|
+ "type": "input_text",
|
|
|
|
|
+ "text": f"[帖子图集:{image_count}张图片,第一张是封面]"
|
|
|
|
|
+ })
|
|
|
|
|
+
|
|
|
|
|
+ for img_url in images:
|
|
|
|
|
+ content.append({
|
|
|
|
|
+ "type": "input_image",
|
|
|
|
|
+ "detail": "auto",
|
|
|
|
|
+ "image_url": img_url
|
|
|
|
|
+ })
|
|
|
|
|
+
|
|
|
|
|
+ post_info = f"""
|
|
|
|
|
+<标题>
|
|
|
|
|
+{post_data.get('title', '')}
|
|
|
|
|
+</标题>
|
|
|
|
|
+
|
|
|
|
|
+<正文>
|
|
|
|
|
+{post_data.get('body_text', '')}
|
|
|
|
|
+</正文>
|
|
|
|
|
+
|
|
|
|
|
+<发布时间>
|
|
|
|
|
+{post_data.get('publish_time', '')}
|
|
|
|
|
+</发布时间>
|
|
|
|
|
+
|
|
|
|
|
+<互动数据>
|
|
|
|
|
+点赞: {post_data.get('like_count', 0)}, 收藏: {post_data.get('collect_count', 0)}
|
|
|
|
|
+</互动数据>
|
|
|
|
|
+"""
|
|
|
|
|
+ content.append({
|
|
|
|
|
+ "type": "input_text",
|
|
|
|
|
+ "text": post_info.strip()
|
|
|
|
|
+ })
|
|
|
|
|
+
|
|
|
|
|
+ return content
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+# ============================================================================
|
|
|
|
|
+# Step 1: 来源类型初筛 Agent
|
|
|
|
|
+# ============================================================================
|
|
|
|
|
+
|
|
|
|
|
+STEP1_PROMPT = """
|
|
|
|
|
+你是一个创作溯源分析专家。
|
|
|
|
|
+
|
|
|
|
|
+你的任务:对给定的灵感点,**广召回**所有可能的来源,并给出初步推测路径。
|
|
|
|
|
+
|
|
|
|
|
+## 关键要求
|
|
|
|
|
+
|
|
|
|
|
+1. **广召回**:对每个来源类型(A/B/C/D)都要分析,即使可能性很低也要列出
|
|
|
|
|
+2. **可能性评估**:给出高/中/低的评级
|
|
|
|
|
+3. **初步推测路径**:用3-5步描述从原始点到灵感点的推导过程
|
|
|
|
|
+
|
|
|
|
|
+## 推测路径的要求
|
|
|
|
|
+
|
|
|
|
|
+### 原始点
|
|
|
|
|
+- **A类来源**:原始点是上下文中"可推导来源(A类来源)"提供的其他类型的点
|
|
|
|
|
+- **B类来源**:原始点是博主历史帖子数据
|
|
|
|
|
+- **C类来源**:原始点是外部平台信息(小红书/微博/知乎等)
|
|
|
|
|
+- **D类来源**:原始点是混合的(多种来源组合)
|
|
|
|
|
+
|
|
|
|
|
+### 可用操作类型
|
|
|
|
|
+只能使用这三种操作:
|
|
|
|
|
+1. **从内搜**:搜索/浏览博主历史帖子、回忆过往经验
|
|
|
|
|
+2. **从外搜**:搜索外部平台、浏览热点话题、查询知识
|
|
|
|
|
+3. **信息处理**:观察、对比、提取、归纳、联想、组合、类比
|
|
|
|
|
+
|
|
|
|
|
+### 步骤格式
|
|
|
|
|
+每步必须明确:操作类型 + 具体做什么 + 输出什么
|
|
|
|
|
+
|
|
|
|
|
+格式:`步骤X [操作类型]: 具体操作 → 输出结果`
|
|
|
|
|
+
|
|
|
|
|
+### 注意事项
|
|
|
|
|
+- **不能跳步骤**:关键词、概念的来源必须说清楚
|
|
|
|
|
+- **不能有黑盒**:不能用"突然想到"、"产生灵感"等说法
|
|
|
|
|
+- **数字世界操作**:只能操作数字化的数据,不能有物理世界交互
|
|
|
|
|
+- **合适的粒度**:3-5步说清楚,不要太细(不说底层实现),不要太粗(不能黑盒)
|
|
|
|
|
+
|
|
|
|
|
+## 来源类型分类
|
|
|
|
|
+
|
|
|
|
|
+**A. 从其他点推导**
|
|
|
|
|
+- 从上下文中"可推导来源(A类来源)"部分提供的点推导
|
|
|
|
|
+- 根据点依赖关系:
|
|
|
|
|
+ - 灵感点可从目的点推导
|
|
|
|
|
+ - 目的点可从灵感点推导
|
|
|
|
|
+ - 关键点可从灵感点和目的点推导
|
|
|
|
|
+- 如果上下文说明"没有可用于推导的其他类型的点",则A类可能性为"无"
|
|
|
|
|
+
|
|
|
|
|
+**B. 从博主账号历史**
|
|
|
|
|
+- 从历史帖子中的内容、风格、经验推导
|
|
|
|
|
+
|
|
|
|
|
+**C. 从外部信息**
|
|
|
|
|
+- 从平台热点、流行梗、社会现象推导
|
|
|
|
|
+
|
|
|
|
|
+**D. 混合输入**
|
|
|
|
|
+- 由多个来源融合创新
|
|
|
|
|
+
|
|
|
|
|
+## 输出格式
|
|
|
|
|
+
|
|
|
|
|
+**注意**:
|
|
|
|
|
+- 如果上下文说明没有可推导来源,则A类来源可能性为"无",理由说明原因
|
|
|
|
|
+- 对所有A/B/C/D来源都要分析,即使可能性很低
|
|
|
|
|
+
|
|
|
|
|
+```json
|
|
|
|
|
+{
|
|
|
|
|
+ "可能的来源": {
|
|
|
|
|
+ "A_其他点推导": {
|
|
|
|
|
+ "可能性": "高/中/低/无",
|
|
|
|
|
+ "理由": "为什么这个来源是可能的(1-2句话),如果可能性为'无',说明为什么没有可推导来源",
|
|
|
|
|
+ "初步推测路径": [
|
|
|
|
|
+ "步骤1 [操作类型]: 具体操作 → 输出结果",
|
|
|
|
|
+ "步骤2 [操作类型]: 具体操作 → 输出结果",
|
|
|
|
|
+ "步骤3 [操作类型]: 具体操作 → 输出结果"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ "B_博主历史": {
|
|
|
|
|
+ "可能性": "高/中/低",
|
|
|
|
|
+ "理由": "为什么这个来源是可能的(1-2句话)",
|
|
|
|
|
+ "初步推测路径": [
|
|
|
|
|
+ "原始点: 博主历史帖子数据",
|
|
|
|
|
+ "步骤1 [从内搜]: 具体操作 → 输出结果",
|
|
|
|
|
+ "步骤2 [信息处理]: 具体操作 → 输出结果",
|
|
|
|
|
+ "步骤3 [信息处理]: 具体操作 → 输出结果"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ "C_外部信息": {
|
|
|
|
|
+ "可能性": "高/中/低",
|
|
|
|
|
+ "理由": "为什么这个来源是可能的(1-2句话)",
|
|
|
|
|
+ "初步推测路径": [
|
|
|
|
|
+ "原始点: 外部平台信息",
|
|
|
|
|
+ "步骤1 [从外搜]: 具体操作 → 输出结果",
|
|
|
|
|
+ "步骤2 [信息处理]: 具体操作 → 输出结果",
|
|
|
|
|
+ "步骤3 [信息处理]: 具体操作 → 输出结果"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ "D_混合输入": {
|
|
|
|
|
+ "可能性": "高/中/低",
|
|
|
|
|
+ "理由": "可能混合了哪些来源",
|
|
|
|
|
+ "初步推测路径": [
|
|
|
|
|
+ "原始点: 混合(历史数据+外部信息)",
|
|
|
|
|
+ "步骤1 [操作类型]: 具体操作 → 输出结果",
|
|
|
|
|
+ "步骤2 [操作类型]: 具体操作 → 输出结果",
|
|
|
|
|
+ "步骤3 [操作类型]: 具体操作 → 输出结果"
|
|
|
|
|
+ ]
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+```
|
|
|
|
|
+"""
|
|
|
|
|
+
|
|
|
|
|
+step1_agent = Agent(
|
|
|
|
|
+ name="Source Type Filter",
|
|
|
|
|
+ instructions=STEP1_PROMPT,
|
|
|
|
|
+ model=get_model(MODEL_NAME),
|
|
|
|
|
+ tools=[],
|
|
|
|
|
+)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+# ============================================================================
|
|
|
|
|
+# Step 2: 深入分析 Agent
|
|
|
|
|
+# ============================================================================
|
|
|
|
|
+
|
|
|
|
|
+STEP2_B_PROMPT = """
|
|
|
|
|
+你是一个创作溯源分析专家。
|
|
|
|
|
+
|
|
|
|
|
+你的任务:解构从博主历史如何一步步得到这个灵感点。
|
|
|
|
|
+
|
|
|
|
|
+## 核心要求:明确标识 输入 → 处理 → 输出
|
|
|
|
|
+
|
|
|
|
|
+### 输入
|
|
|
|
|
+- 具体是博主历史中的哪个/哪些帖子?
|
|
|
|
|
+- 这些帖子里有什么内容?(图片/文字/主题)
|
|
|
|
|
+
|
|
|
|
|
+### 处理过程(一步步推导)
|
|
|
|
|
+- 步骤1:创作者观察/接收到什么信息?
|
|
|
|
|
+- 步骤2:产生了什么联想/思考?
|
|
|
|
|
+- 步骤3:如何转化为具体的灵感?
|
|
|
|
|
+- (可以有更多步骤)
|
|
|
|
|
+
|
|
|
|
|
+### 输出
|
|
|
|
|
+- 最终得到的灵感点
|
|
|
|
|
+
|
|
|
|
|
+## 输出要求
|
|
|
|
|
+
|
|
|
|
|
+输出JSON格式:
|
|
|
|
|
+```json
|
|
|
|
|
+{
|
|
|
|
|
+ "输入_博主历史帖子": {
|
|
|
|
|
+ "相关帖子": [
|
|
|
|
|
+ {
|
|
|
|
|
+ "帖子序号": "历史帖子X/总数",
|
|
|
|
|
+ "标题": "...",
|
|
|
|
|
+ "关键内容": "具体是图片中什么/文字里什么"
|
|
|
|
|
+ }
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ "处理_从输入到灵感的推导": {
|
|
|
|
|
+ "步骤1": {
|
|
|
|
|
+ "动作": "观察/接收",
|
|
|
|
|
+ "内容": "创作者看到/注意到了什么"
|
|
|
|
|
+ },
|
|
|
|
|
+ "步骤2": {
|
|
|
|
|
+ "动作": "联想/思考",
|
|
|
|
|
+ "内容": "产生了什么想法/联系"
|
|
|
|
|
+ },
|
|
|
|
|
+ "步骤3": {
|
|
|
|
|
+ "动作": "转化/形成",
|
|
|
|
|
+ "内容": "如何变成具体的灵感"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ "输出_最终灵感": "灵感点名称"
|
|
|
|
|
+}
|
|
|
|
|
+```
|
|
|
|
|
+"""
|
|
|
|
|
+
|
|
|
|
|
+STEP2_C_PROMPT = """
|
|
|
|
|
+你是一个创作溯源分析专家。
|
|
|
|
|
+
|
|
|
|
|
+你的任务:解构从外部信息如何一步步得到这个灵感点。
|
|
|
|
|
+
|
|
|
|
|
+## 核心要求:明确标识 输入 → 处理 → 输出
|
|
|
|
|
+
|
|
|
|
|
+### 输入
|
|
|
|
|
+- 具体是什么外部信息?(热点话题/流行梗/社会现象)
|
|
|
|
|
+- 这些信息的具体内容是什么?
|
|
|
|
|
+
|
|
|
|
|
+### 处理过程(一步步推导)
|
|
|
|
|
+- 步骤1:创作者接触到什么外部信息?
|
|
|
|
|
+- 步骤2:如何理解/解读这个信息?
|
|
|
|
|
+- 步骤3:如何与自己的内容结合?
|
|
|
|
|
+- 步骤4:如何转化为具体的灵感?
|
|
|
|
|
+- (可以有更多步骤)
|
|
|
|
|
+
|
|
|
|
|
+### 输出
|
|
|
|
|
+- 最终得到的灵感点
|
|
|
|
|
+
|
|
|
|
|
+## 输出要求
|
|
|
|
|
+
|
|
|
|
|
+输出JSON格式:
|
|
|
|
|
+```json
|
|
|
|
|
+{
|
|
|
|
|
+ "输入_外部信息": {
|
|
|
|
|
+ "信息类型": "平台热点/流行梗/社会现象",
|
|
|
|
|
+ "具体内容": "是什么话题/梗/现象",
|
|
|
|
|
+ "信息来源": "在哪里看到/了解到"
|
|
|
|
|
+ },
|
|
|
|
|
+ "处理_从输入到灵感的推导": {
|
|
|
|
|
+ "步骤1": {
|
|
|
|
|
+ "动作": "接触/了解",
|
|
|
|
|
+ "内容": "创作者看到/听到了什么"
|
|
|
|
|
+ },
|
|
|
|
|
+ "步骤2": {
|
|
|
|
|
+ "动作": "理解/解读",
|
|
|
|
|
+ "内容": "如何理解这个信息"
|
|
|
|
|
+ },
|
|
|
|
|
+ "步骤3": {
|
|
|
|
|
+ "动作": "结合/融合",
|
|
|
|
|
+ "内容": "如何与自己的内容结合"
|
|
|
|
|
+ },
|
|
|
|
|
+ "步骤4": {
|
|
|
|
|
+ "动作": "转化/形成",
|
|
|
|
|
+ "内容": "如何变成具体的灵感"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ "输出_最终灵感": "灵感点名称"
|
|
|
|
|
+}
|
|
|
|
|
+```
|
|
|
|
|
+"""
|
|
|
|
|
+
|
|
|
|
|
+step2_b_agent = Agent(
|
|
|
|
|
+ name="Blogger History Analyzer",
|
|
|
|
|
+ instructions=STEP2_B_PROMPT,
|
|
|
|
|
+ model=get_model(MODEL_NAME),
|
|
|
|
|
+ tools=[],
|
|
|
|
|
+)
|
|
|
|
|
+
|
|
|
|
|
+step2_c_agent = Agent(
|
|
|
|
|
+ name="External Info Analyzer",
|
|
|
|
|
+ instructions=STEP2_C_PROMPT,
|
|
|
|
|
+ model=get_model(MODEL_NAME),
|
|
|
|
|
+ tools=[],
|
|
|
|
|
+)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+# ============================================================================
|
|
|
|
|
+# Step 3: 路径验证 Agent
|
|
|
|
|
+# ============================================================================
|
|
|
|
|
+
|
|
|
|
|
+STEP3_PROMPT = """
|
|
|
|
|
+你是一个创作溯源分析专家。
|
|
|
|
|
+
|
|
|
|
|
+你的任务:对每个来源路径进行验证和评分。
|
|
|
|
|
+
|
|
|
|
|
+## 验证维度
|
|
|
|
|
+
|
|
|
|
|
+1. **支持证据**(3-5条具体证据)
|
|
|
|
|
+2. **反驳点**(如果有不支持的因素)
|
|
|
|
|
+3. **可能性评分**(1-10分,基于证据强度)
|
|
|
|
|
+
|
|
|
|
|
+## 输出要求
|
|
|
|
|
+
|
|
|
|
|
+输出JSON格式:
|
|
|
|
|
+```json
|
|
|
|
|
+{
|
|
|
|
|
+ "来源类型": "B",
|
|
|
|
|
+ "支持证据": [
|
|
|
|
|
+ "证据1: ...",
|
|
|
|
|
+ "证据2: ...",
|
|
|
|
|
+ "证据3: ..."
|
|
|
|
|
+ ],
|
|
|
|
|
+ "反驳点": [
|
|
|
|
|
+ "反驳1: ..."
|
|
|
|
|
+ ],
|
|
|
|
|
+ "可能性评分": 8,
|
|
|
|
|
+ "评分说明": "为什么给这个分数"
|
|
|
|
|
+}
|
|
|
|
|
+```
|
|
|
|
|
+"""
|
|
|
|
|
+
|
|
|
|
|
+step3_agent = Agent(
|
|
|
|
|
+ name="Path Validator",
|
|
|
|
|
+ instructions=STEP3_PROMPT,
|
|
|
|
|
+ model=get_model(MODEL_NAME),
|
|
|
|
|
+ tools=[],
|
|
|
|
|
+)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+# ============================================================================
|
|
|
|
|
+# Step 4: 综合结论 Agent
|
|
|
|
|
+# ============================================================================
|
|
|
|
|
+
|
|
|
|
|
+STEP4_PROMPT = """
|
|
|
|
|
+你是一个创作溯源分析专家。
|
|
|
|
|
+
|
|
|
|
|
+你的任务:基于前面的分析,给出综合结论。
|
|
|
|
|
+
|
|
|
|
|
+## 输出要求
|
|
|
|
|
+
|
|
|
|
|
+输出JSON格式:
|
|
|
|
|
+```json
|
|
|
|
|
+{
|
|
|
|
|
+ "最可能的来源路径": "...",
|
|
|
|
|
+ "各来源的占比": {
|
|
|
|
|
+ "B_博主历史": "60%",
|
|
|
|
|
+ "C_外部信息": "40%"
|
|
|
|
|
+ },
|
|
|
|
|
+ "完整推导路径": "从...到...最终形成...",
|
|
|
|
|
+ "关键转折点": "...",
|
|
|
|
|
+ "整体置信度": 85
|
|
|
|
|
+}
|
|
|
|
|
+```
|
|
|
|
|
+"""
|
|
|
|
|
+
|
|
|
|
|
+step4_agent = Agent(
|
|
|
|
|
+ name="Conclusion Synthesizer",
|
|
|
|
|
+ instructions=STEP4_PROMPT,
|
|
|
|
|
+ model=get_model(MODEL_NAME),
|
|
|
|
|
+ tools=[],
|
|
|
|
|
+)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+# ============================================================================
|
|
|
|
|
+# 从新格式的what结果中提取所有点
|
|
|
|
|
+# ============================================================================
|
|
|
|
|
+
|
|
|
|
|
+def extract_all_points_v9(what_result: Dict) -> Dict[str, List[Dict]]:
|
|
|
|
|
+ """
|
|
|
|
|
+ 从新格式的 what 解构结果中提取所有三类点
|
|
|
|
|
+
|
|
|
|
|
+ V9版本:从选题理解.explicit_elements提取所有三类点,只保留名称
|
|
|
|
|
+ - 描述、在帖子中的体现等都是what的中间过程,会干扰how的解构
|
|
|
|
|
+
|
|
|
|
|
+ 返回格式:
|
|
|
|
|
+ {
|
|
|
|
|
+ '灵感点': [{'id': '灵感点1', 'name': 'xxx'}, ...],
|
|
|
|
|
+ '目的点': [{'id': '目的点1', 'name': 'xxx'}, ...],
|
|
|
|
|
+ '关键点': [{'id': '关键点1', 'name': 'xxx'}, ...]
|
|
|
|
|
+ }
|
|
|
|
|
+ """
|
|
|
|
|
+ all_points = {
|
|
|
|
|
+ '灵感点': [],
|
|
|
|
|
+ '目的点': [],
|
|
|
|
|
+ '关键点': []
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ # 从选题理解.explicit_elements提取
|
|
|
|
|
+ explicit_elements = what_result.get('选题理解', {}).get('explicit_elements', {})
|
|
|
|
|
+
|
|
|
|
|
+ # 提取灵感点列表(数组)
|
|
|
|
|
+ inspiration_list = explicit_elements.get('灵感点列表', [])
|
|
|
|
|
+ for idx, name in enumerate(inspiration_list, 1):
|
|
|
|
|
+ all_points['灵感点'].append({
|
|
|
|
|
+ 'type': '灵感点',
|
|
|
|
|
+ 'id': f'灵感点{idx}',
|
|
|
|
|
+ 'name': name
|
|
|
|
|
+ })
|
|
|
|
|
+
|
|
|
|
|
+ # 提取目的点(单个字符串)
|
|
|
|
|
+ purpose_name = explicit_elements.get('目的点', '')
|
|
|
|
|
+ if purpose_name:
|
|
|
|
|
+ all_points['目的点'].append({
|
|
|
|
|
+ 'type': '目的点',
|
|
|
|
|
+ 'id': '目的点1',
|
|
|
|
|
+ 'name': purpose_name
|
|
|
|
|
+ })
|
|
|
|
|
+
|
|
|
|
|
+ # 提取关键点列表(数组)
|
|
|
|
|
+ key_list = explicit_elements.get('关键点列表', [])
|
|
|
|
|
+ for idx, name in enumerate(key_list, 1):
|
|
|
|
|
+ all_points['关键点'].append({
|
|
|
|
|
+ 'type': '关键点',
|
|
|
|
|
+ 'id': f'关键点{idx}',
|
|
|
|
|
+ 'name': name
|
|
|
|
|
+ })
|
|
|
|
|
+
|
|
|
|
|
+ return all_points
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+# ============================================================================
|
|
|
|
|
+# 加载博主历史数据
|
|
|
|
|
+# ============================================================================
|
|
|
|
|
+
|
|
|
|
|
+def load_blogger_history_v8(history_dir: str, target_post_id: str) -> Dict:
|
|
|
|
|
+ """加载博主历史数据 - V8版本"""
|
|
|
|
|
+ history_posts = []
|
|
|
|
|
+
|
|
|
|
|
+ for filename in os.listdir(history_dir):
|
|
|
|
|
+ if filename.endswith('.json'):
|
|
|
|
|
+ post_id = filename.replace('.json', '')
|
|
|
|
|
+ # 只过滤掉当前帖子本身(按ID)
|
|
|
|
|
+ if post_id != target_post_id:
|
|
|
|
|
+ filepath = os.path.join(history_dir, filename)
|
|
|
|
|
+ with open(filepath, 'r', encoding='utf-8') as f:
|
|
|
|
|
+ data = json.load(f)
|
|
|
|
|
+ history_posts.append(data)
|
|
|
|
|
+
|
|
|
|
|
+ # 按时间排序
|
|
|
|
|
+ history_posts.sort(key=lambda x: x.get('publish_timestamp', 0))
|
|
|
|
|
+
|
|
|
|
|
+ return {
|
|
|
|
|
+ "历史帖子数": len(history_posts),
|
|
|
|
|
+ "历史帖子列表": history_posts
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+# ============================================================================
|
|
|
|
|
+# 拆步骤分析(复用之前的逻辑)
|
|
|
|
|
+# ============================================================================
|
|
|
|
|
+
|
|
|
|
|
+async def analyze_point_step_by_step(
|
|
|
|
|
+ point: Dict,
|
|
|
|
|
+ all_points: Dict[str, List[Dict]],
|
|
|
|
|
+ blogger_history: Dict,
|
|
|
|
|
+ account_name: str
|
|
|
|
|
+):
|
|
|
|
|
+ """
|
|
|
|
|
+ 拆步骤分析单个点
|
|
|
|
|
+
|
|
|
|
|
+ V9版本:根据点的依赖关系动态提供可推导来源
|
|
|
|
|
+ - 灵感点:可从目的点推导
|
|
|
|
|
+ - 目的点:可从灵感点推导
|
|
|
|
|
+ - 关键点:可从灵感点和目的点推导
|
|
|
|
|
+ """
|
|
|
|
|
+
|
|
|
|
|
+ print(f"\n{'='*80}")
|
|
|
|
|
+ print(f"拆步骤溯源分析: {point['id']} - {point['name']}")
|
|
|
|
|
+ print(f"{'='*80}")
|
|
|
|
|
+
|
|
|
|
|
+ # ========== 根据点类型确定可推导来源 ==========
|
|
|
|
|
+ point_type = point['type']
|
|
|
|
|
+ derivable_sources = {}
|
|
|
|
|
+
|
|
|
|
|
+ if point_type == '灵感点':
|
|
|
|
|
+ # 灵感点可从目的点推导
|
|
|
|
|
+ derivable_sources['目的点'] = all_points.get('目的点', [])
|
|
|
|
|
+ elif point_type == '目的点':
|
|
|
|
|
+ # 目的点可从灵感点推导
|
|
|
|
|
+ derivable_sources['灵感点'] = all_points.get('灵感点', [])
|
|
|
|
|
+ elif point_type == '关键点':
|
|
|
|
|
+ # 关键点可从灵感点和目的点推导
|
|
|
|
|
+ derivable_sources['灵感点'] = all_points.get('灵感点', [])
|
|
|
|
|
+ derivable_sources['目的点'] = all_points.get('目的点', [])
|
|
|
|
|
+
|
|
|
|
|
+ # ========== 准备基础上下文 ==========
|
|
|
|
|
+ content = []
|
|
|
|
|
+
|
|
|
|
|
+ # 待溯源的点信息(只有名称,避免what中间过程的干扰)
|
|
|
|
|
+ content.append({
|
|
|
|
|
+ "type": "input_text",
|
|
|
|
|
+ "text": f"""
|
|
|
|
|
+# 待溯源的{point_type}
|
|
|
|
|
+
|
|
|
|
|
+**名称**: {point['name']}
|
|
|
|
|
+
|
|
|
|
|
+**说明**: 这是从what解构中提取的{point_type}名称,请分析这个{point_type}是如何产生的。
|
|
|
|
|
+"""
|
|
|
|
|
+ })
|
|
|
|
|
+
|
|
|
|
|
+ # 可推导来源的点信息(A类来源)
|
|
|
|
|
+ if derivable_sources:
|
|
|
|
|
+ sources_info = []
|
|
|
|
|
+ for source_type, source_points in derivable_sources.items():
|
|
|
|
|
+ if source_points:
|
|
|
|
|
+ sources_info.append(f"\n## {source_type}")
|
|
|
|
|
+ for p in source_points:
|
|
|
|
|
+ sources_info.append(f"- {p['id']}: {p['name']}")
|
|
|
|
|
+
|
|
|
|
|
+ if sources_info:
|
|
|
|
|
+ content.append({
|
|
|
|
|
+ "type": "input_text",
|
|
|
|
|
+ "text": f"""
|
|
|
|
|
+---
|
|
|
|
|
+
|
|
|
|
|
+# 可推导来源(A类来源)
|
|
|
|
|
+
|
|
|
|
|
+根据点依赖关系,当前{point_type}可以从以下类型的点推导:
|
|
|
|
|
+{''.join(sources_info)}
|
|
|
|
|
+
|
|
|
|
|
+**注意**: 同类型的点不能互相推导。
|
|
|
|
|
+"""
|
|
|
|
|
+ })
|
|
|
|
|
+ else:
|
|
|
|
|
+ # 如果没有可推导来源(例如灵感点且没有目的点)
|
|
|
|
|
+ content.append({
|
|
|
|
|
+ "type": "input_text",
|
|
|
|
|
+ "text": f"""
|
|
|
|
|
+---
|
|
|
|
|
+
|
|
|
|
|
+# 可推导来源(A类来源)
|
|
|
|
|
+
|
|
|
|
|
+当前帖子中没有可用于推导{point_type}的其他类型的点。
|
|
|
|
|
+因此A类来源可能性为:无
|
|
|
|
|
+"""
|
|
|
|
|
+ })
|
|
|
|
|
+
|
|
|
|
|
+ # 博主历史信息(多模态)
|
|
|
|
|
+ history_posts = blogger_history.get('历史帖子列表', [])
|
|
|
|
|
+ content.append({
|
|
|
|
|
+ "type": "input_text",
|
|
|
|
|
+ "text": f"""
|
|
|
|
|
+---
|
|
|
|
|
+
|
|
|
|
|
+# 博主历史信息(可能的输入来源B)
|
|
|
|
|
+
|
|
|
|
|
+**账号名称**: {account_name}
|
|
|
|
|
+**历史帖子数量**: {len(history_posts)} 个
|
|
|
|
|
+
|
|
|
|
|
+以下是博主的所有历史帖子(按发布时间排序):
|
|
|
|
|
+"""
|
|
|
|
|
+ })
|
|
|
|
|
+
|
|
|
|
|
+ # 为每个历史帖子构建多模态内容
|
|
|
|
|
+ for idx, hist_post in enumerate(history_posts, 1):
|
|
|
|
|
+ content.append({
|
|
|
|
|
+ "type": "input_text",
|
|
|
|
|
+ "text": f"\n## 历史帖子 {idx}/{len(history_posts)}\n"
|
|
|
|
|
+ })
|
|
|
|
|
+ hist_post_content = build_post_multimodal_content(hist_post)
|
|
|
|
|
+ content.extend(hist_post_content)
|
|
|
|
|
+
|
|
|
|
|
+ # ========== Step 1: 来源类型初筛 ==========
|
|
|
|
|
+ print(f"\n{'='*60}")
|
|
|
|
|
+ print("Step 1: 来源类型初筛")
|
|
|
|
|
+ print(f"{'='*60}")
|
|
|
|
|
+
|
|
|
|
|
+ step1_messages = [{
|
|
|
|
|
+ "role": "user",
|
|
|
|
|
+ "content": content + [{
|
|
|
|
|
+ "type": "input_text",
|
|
|
|
|
+ "text": "\n---\n\n请根据以上信息,判断这个点最可能来自哪些来源类型(只选1-3个最可能的)。"
|
|
|
|
|
+ }]
|
|
|
|
|
+ }]
|
|
|
|
|
+
|
|
|
|
|
+ # 使用custom_span添加更多元数据
|
|
|
|
|
+ with custom_span(
|
|
|
|
|
+ name=f"Step1: {point['id']}",
|
|
|
|
|
+ data={
|
|
|
|
|
+ "point_id": point['id'],
|
|
|
|
|
+ "point_name": point['name'],
|
|
|
|
|
+ "point_type": point_type,
|
|
|
|
|
+ "step": "来源类型初筛",
|
|
|
|
|
+ "可推导来源数": sum(len(v) for v in derivable_sources.values())
|
|
|
|
|
+ }
|
|
|
|
|
+ ):
|
|
|
|
|
+ result1 = await Runner.run(step1_agent, input=step1_messages)
|
|
|
|
|
+ print(f"\n✅ Step 1 结果:\n{result1.final_output[:300]}...\n")
|
|
|
|
|
+
|
|
|
|
|
+ step1_result = extract_json(result1.final_output)
|
|
|
|
|
+
|
|
|
|
|
+ # 暂时只返回 Step 1 结果,不继续后面的步骤
|
|
|
|
|
+ return {
|
|
|
|
|
+ "灵感点": point['name'],
|
|
|
|
|
+ "step1_来源可能性分析": step1_result
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ # TODO: 后续步骤暂时注释掉,先把 Step 1 做扎实
|
|
|
|
|
+ # selected_types = step1_result.get('选择的来源类型', [])
|
|
|
|
|
+ #
|
|
|
|
|
+ # # ========== Step 2: 解构每个来源(输入→处理→输出) ==========
|
|
|
|
|
+ # print(f"\n{'='*60}")
|
|
|
|
|
+ # print(f"Step 2: 解构每个来源 - 输入→处理→输出 (针对类型: {', '.join(selected_types)})")
|
|
|
|
|
+ # print(f"{'='*60}")
|
|
|
|
|
+ #
|
|
|
|
|
+ # step2_results = {}
|
|
|
|
|
+ #
|
|
|
|
|
+ # for source_type in selected_types:
|
|
|
|
|
+ # if source_type == "B":
|
|
|
|
|
+ # print(f"\n▶ Step 2.B: 解构从博主历史如何得到灵感")
|
|
|
|
|
+ # step2_messages = [{
|
|
|
|
|
+ # "role": "user",
|
|
|
|
|
+ # "content": content + [{
|
|
|
|
|
+ # "type": "input_text",
|
|
|
|
|
+ # "text": f"""
|
|
|
|
|
+ # ---
|
|
|
|
|
+ #
|
|
|
|
|
+ # Step 1 已确定:这个灵感点可能来自"博主账号历史"
|
|
|
|
|
+ #
|
|
|
|
|
+ # 请解构:从博主历史如何一步步得到这个灵感点?
|
|
|
|
|
+ #
|
|
|
|
|
+ # 要求明确标识:
|
|
|
|
|
+ # - 输入:具体是哪个历史帖子?里面有什么内容?
|
|
|
|
|
+ # - 处理:如何从输入一步步推导到灵感?(步骤1、2、3...)
|
|
|
|
|
+ # - 输出:最终得到的灵感点
|
|
|
|
|
+ # """
|
|
|
|
|
+ # }]
|
|
|
|
|
+ # }]
|
|
|
|
|
+ # result2b = await Runner.run(step2_b_agent, input=step2_messages)
|
|
|
|
|
+ # print(f"\n✅ Step 2.B 结果:\n{result2b.final_output[:300]}...\n")
|
|
|
|
|
+ # step2_results['B'] = extract_json(result2b.final_output)
|
|
|
|
|
+ #
|
|
|
|
|
+ # elif source_type == "C":
|
|
|
|
|
+ # print(f"\n▶ Step 2.C: 解构从外部信息如何得到灵感")
|
|
|
|
|
+ # step2_messages = [{
|
|
|
|
|
+ # "role": "user",
|
|
|
|
|
+ # "content": content + [{
|
|
|
|
|
+ # "type": "input_text",
|
|
|
|
|
+ # "text": f"""
|
|
|
|
|
+ # ---
|
|
|
|
|
+ #
|
|
|
|
|
+ # Step 1 已确定:这个灵感点可能来自"外部信息"
|
|
|
|
|
+ #
|
|
|
|
|
+ # 请解构:从外部信息如何一步步得到这个灵感点?
|
|
|
|
|
+ #
|
|
|
|
|
+ # 要求明确标识:
|
|
|
|
|
+ # - 输入:具体是什么外部信息?(热点/梗/现象)
|
|
|
|
|
+ # - 处理:如何从输入一步步推导到灵感?(步骤1、2、3、4...)
|
|
|
|
|
+ # - 输出:最终得到的灵感点
|
|
|
|
|
+ # """
|
|
|
|
|
+ # }]
|
|
|
|
|
+ # }]
|
|
|
|
|
+ # result2c = await Runner.run(step2_c_agent, input=step2_messages)
|
|
|
|
|
+ # print(f"\n✅ Step 2.C 结果:\n{result2c.final_output[:300]}...\n")
|
|
|
|
|
+ # step2_results['C'] = extract_json(result2c.final_output)
|
|
|
|
|
+ #
|
|
|
|
|
+ # # ========== Step 3: 路径验证 ==========
|
|
|
|
|
+ # print(f"\n{'='*60}")
|
|
|
|
|
+ # print("Step 3: 路径验证")
|
|
|
|
|
+ # print(f"{'='*60}")
|
|
|
|
|
+ #
|
|
|
|
|
+ # step3_results = []
|
|
|
|
|
+ #
|
|
|
|
|
+ # for source_type in selected_types:
|
|
|
|
|
+ # print(f"\n▶ Step 3.{source_type}: 验证来源路径")
|
|
|
|
|
+ #
|
|
|
|
|
+ # step2_analysis = json.dumps(step2_results.get(source_type, {}), ensure_ascii=False, indent=2)
|
|
|
|
|
+ #
|
|
|
|
|
+ # step3_messages = [{
|
|
|
|
|
+ # "role": "user",
|
|
|
|
|
+ # "content": [{
|
|
|
|
|
+ # "type": "input_text",
|
|
|
|
|
+ # "text": f"""
|
|
|
|
|
+ # 基于前面的分析:
|
|
|
|
|
+ #
|
|
|
|
|
+ # Step 1: 初筛选择了来源类型 {source_type}
|
|
|
|
|
+ # Step 2: 深入分析结果:
|
|
|
|
|
+ # {step2_analysis}
|
|
|
|
|
+ #
|
|
|
|
|
+ # 请对这个来源路径进行验证:列出支持证据、反驳点、给出评分。
|
|
|
|
|
+ # """
|
|
|
|
|
+ # }]
|
|
|
|
|
+ # }]
|
|
|
|
|
+ #
|
|
|
|
|
+ # result3 = await Runner.run(step3_agent, input=step3_messages)
|
|
|
|
|
+ # print(f"\n✅ Step 3.{source_type} 结果:\n{result3.final_output[:300]}...\n")
|
|
|
|
|
+ # step3_results.append(extract_json(result3.final_output))
|
|
|
|
|
+ #
|
|
|
|
|
+ # # ========== Step 4: 综合结论 ==========
|
|
|
|
|
+ # print(f"\n{'='*60}")
|
|
|
|
|
+ # print("Step 4: 综合结论")
|
|
|
|
|
+ # print(f"{'='*60}")
|
|
|
|
|
+ #
|
|
|
|
|
+ # all_analysis = {
|
|
|
|
|
+ # "step1": step1_result,
|
|
|
|
|
+ # "step2": step2_results,
|
|
|
|
|
+ # "step3": step3_results
|
|
|
|
|
+ # }
|
|
|
|
|
+ #
|
|
|
|
|
+ # step4_messages = [{
|
|
|
|
|
+ # "role": "user",
|
|
|
|
|
+ # "content": [{
|
|
|
|
|
+ # "type": "input_text",
|
|
|
|
|
+ # "text": f"""
|
|
|
|
|
+ # 基于前面所有步骤的分析:
|
|
|
|
|
+ #
|
|
|
|
|
+ # {json.dumps(all_analysis, ensure_ascii=False, indent=2)}
|
|
|
|
|
+ #
|
|
|
|
|
+ # 请给出综合结论:最可能的来源路径、各来源占比、完整推导过程。
|
|
|
|
|
+ # """
|
|
|
|
|
+ # }]
|
|
|
|
|
+ # }]
|
|
|
|
|
+ #
|
|
|
|
|
+ # result4 = await Runner.run(step4_agent, input=step4_messages)
|
|
|
|
|
+ # print(f"\n✅ Step 4 结果:\n{result4.final_output[:300]}...\n")
|
|
|
|
|
+ #
|
|
|
|
|
+ # final_result = {
|
|
|
|
|
+ # "灵感点": point['name'],
|
|
|
|
|
+ # "step1_来源类型初筛": step1_result,
|
|
|
|
|
+ # "step2_深入分析": step2_results,
|
|
|
|
|
+ # "step3_路径验证": step3_results,
|
|
|
|
|
+ # "step4_综合结论": extract_json(result4.final_output)
|
|
|
|
|
+ # }
|
|
|
|
|
+ #
|
|
|
|
|
+ # return final_result
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def extract_json(text: str) -> Dict:
|
|
|
|
|
+ """从文本中提取JSON"""
|
|
|
|
|
+ try:
|
|
|
|
|
+ if "```json" in text:
|
|
|
|
|
+ json_start = text.index("```json") + 7
|
|
|
|
|
+ json_end = text.index("```", json_start)
|
|
|
|
|
+ json_text = text[json_start:json_end].strip()
|
|
|
|
|
+ elif "```" in text:
|
|
|
|
|
+ json_start = text.index("```") + 3
|
|
|
|
|
+ json_end = text.index("```", json_start)
|
|
|
|
|
+ json_text = text[json_start:json_end].strip()
|
|
|
|
|
+ else:
|
|
|
|
|
+ json_text = text
|
|
|
|
|
+ return json.loads(json_text)
|
|
|
|
|
+ except:
|
|
|
|
|
+ return {"原始输出": text}
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+# ============================================================================
|
|
|
|
|
+# Main
|
|
|
|
|
+# ============================================================================
|
|
|
|
|
+
|
|
|
|
|
+async def main(current_time, log_url):
|
|
|
|
|
+ import sys
|
|
|
|
|
+
|
|
|
|
|
+ # 默认测试文件
|
|
|
|
|
+ DEFAULT_TEST_FILE = "examples_new/阿里多多酱/output/685b593800000000120141d3_20251104_111017.json"
|
|
|
|
|
+
|
|
|
|
|
+ # 参数解析
|
|
|
|
|
+ if len(sys.argv) < 2:
|
|
|
|
|
+ print(f"未提供参数,使用默认测试文件: {DEFAULT_TEST_FILE}")
|
|
|
|
|
+ what_result_file = DEFAULT_TEST_FILE
|
|
|
|
|
+ else:
|
|
|
|
|
+ what_result_file = sys.argv[1]
|
|
|
|
|
+
|
|
|
|
|
+ # 解析文件名:{帖子ID}_{运行日期}_{运行时间}.json
|
|
|
|
|
+ filename = os.path.basename(what_result_file)
|
|
|
|
|
+ filename_without_ext = filename.replace('.json', '')
|
|
|
|
|
+ parts = filename_without_ext.split('_')
|
|
|
|
|
+
|
|
|
|
|
+ if len(parts) < 3:
|
|
|
|
|
+ print(f"❌ 文件名格式不正确: {filename}")
|
|
|
|
|
+ print("期望格式: {帖子ID}_{运行日期}_{运行时间}.json")
|
|
|
|
|
+ sys.exit(1)
|
|
|
|
|
+
|
|
|
|
|
+ post_id = parts[0]
|
|
|
|
|
+ run_date = parts[1]
|
|
|
|
|
+ run_time = parts[2]
|
|
|
|
|
+
|
|
|
|
|
+ print("="*80)
|
|
|
|
|
+ print("HOW 解构 V9 - 点依赖关系处理")
|
|
|
|
|
+ print("="*80)
|
|
|
|
|
+ print(f"\n目标帖子ID: {post_id}")
|
|
|
|
|
+ print(f"运行日期: {run_date}")
|
|
|
|
|
+ print(f"运行时间: {run_time}")
|
|
|
|
|
+
|
|
|
|
|
+ # 读取 what 解构结果
|
|
|
|
|
+ what_result = read_json(what_result_file)
|
|
|
|
|
+ if not what_result:
|
|
|
|
|
+ print(f"❌ 无法读取文件: {what_result_file}")
|
|
|
|
|
+ sys.exit(1)
|
|
|
|
|
+
|
|
|
|
|
+ # 从输入路径中提取账号名称
|
|
|
|
|
+ # 路径格式: examples_new/{账号名}/output/{帖子ID}_{运行日期}_{运行时间}.json
|
|
|
|
|
+ path_parts = what_result_file.split('/')
|
|
|
|
|
+ if len(path_parts) >= 3 and path_parts[0] == 'examples_new':
|
|
|
|
|
+ author_name = path_parts[1]
|
|
|
|
|
+ else:
|
|
|
|
|
+ print(f"❌ 无法从路径中提取账号名称: {what_result_file}")
|
|
|
|
|
+ sys.exit(1)
|
|
|
|
|
+
|
|
|
|
|
+ # 构建路径
|
|
|
|
|
+ base_dir = f"examples_new/{author_name}"
|
|
|
|
|
+ history_dir = f"{base_dir}/作者历史帖子"
|
|
|
|
|
+
|
|
|
|
|
+ # 读取目标帖子信息(用于获取账号名称)
|
|
|
|
|
+ target_post_file = f"{history_dir}/{post_id}.json"
|
|
|
|
|
+ target_post = read_json(target_post_file)
|
|
|
|
|
+ account_name = target_post.get('channel_account_name', author_name) if target_post else author_name
|
|
|
|
|
+
|
|
|
|
|
+ # 加载博主历史数据
|
|
|
|
|
+ print(f"\n加载博主历史数据...")
|
|
|
|
|
+ blogger_history = load_blogger_history_v8(history_dir, post_id)
|
|
|
|
|
+ print(f"✓ 已加载 {blogger_history['历史帖子数']} 个历史帖子")
|
|
|
|
|
+
|
|
|
|
|
+ # 提取所有三类点(即使只分析部分类型,也需要提取所有点用于依赖关系判断)
|
|
|
|
|
+ all_points = extract_all_points_v9(what_result)
|
|
|
|
|
+
|
|
|
|
|
+ # 配置:决定分析哪些类型的点
|
|
|
|
|
+ analyze_types = ['灵感点', '目的点', '关键点'] # 分析所有三类点
|
|
|
|
|
+
|
|
|
|
|
+ # 统计提取的点
|
|
|
|
|
+ print(f"\n从 WHAT 解构中提取的点:")
|
|
|
|
|
+ for point_type in ['灵感点', '目的点', '关键点']:
|
|
|
|
|
+ count = len(all_points[point_type])
|
|
|
|
|
+ print(f" - {point_type}: {count} 个")
|
|
|
|
|
+
|
|
|
|
|
+ # 显示要分析的点
|
|
|
|
|
+ print(f"\n本次分析范围: {', '.join(analyze_types)}")
|
|
|
|
|
+ points_to_analyze = []
|
|
|
|
|
+ for point_type in analyze_types:
|
|
|
|
|
+ for point in all_points[point_type]:
|
|
|
|
|
+ points_to_analyze.append(point)
|
|
|
|
|
+ print(f" - {point['id']}: {point['name']}")
|
|
|
|
|
+
|
|
|
|
|
+ # 对每个点进行溯源分析
|
|
|
|
|
+ source_analysis_results = []
|
|
|
|
|
+
|
|
|
|
|
+ for idx, point in enumerate(points_to_analyze, 1):
|
|
|
|
|
+ # 为每个点创建一个自定义span
|
|
|
|
|
+ point_name_short = point['name'][:30] + "..." if len(point['name']) > 30 else point['name']
|
|
|
|
|
+ with custom_span(
|
|
|
|
|
+ name=f"{point['id']}: {point_name_short}",
|
|
|
|
|
+ data={
|
|
|
|
|
+ "point_index": f"{idx}/{len(points_to_analyze)}",
|
|
|
|
|
+ "point_id": point['id'],
|
|
|
|
|
+ "point_type": point['type'],
|
|
|
|
|
+ "point_name": point['name'],
|
|
|
|
|
+ "analysis_stage": "HOW解构溯源"
|
|
|
|
|
+ }
|
|
|
|
|
+ ):
|
|
|
|
|
+ result = await analyze_point_step_by_step(
|
|
|
|
|
+ point, all_points, blogger_history, account_name
|
|
|
|
|
+ )
|
|
|
|
|
+ source_analysis_results.append(result)
|
|
|
|
|
+
|
|
|
|
|
+ # 添加延迟避免API限流
|
|
|
|
|
+ await asyncio.sleep(2)
|
|
|
|
|
+
|
|
|
|
|
+ # 保存结果
|
|
|
|
|
+ now = datetime.now()
|
|
|
|
|
+ output_filename = f"{post_id}_{now.strftime('%Y%m%d')}_{now.strftime('%H%M%S')}.json"
|
|
|
|
|
+ output_dir = f"{base_dir}/how_output"
|
|
|
|
|
+ os.makedirs(output_dir, exist_ok=True)
|
|
|
|
|
+ output_file = f"{output_dir}/{output_filename}"
|
|
|
|
|
+
|
|
|
|
|
+ # 统计各类型点的数量
|
|
|
|
|
+ points_stats = {
|
|
|
|
|
+ point_type: len(all_points[point_type])
|
|
|
|
|
+ for point_type in ['灵感点', '目的点', '关键点']
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ final_result = {
|
|
|
|
|
+ "how_解构_V9": {
|
|
|
|
|
+ "版本说明": "V9 - 点依赖关系处理,支持机器可模拟的推测路径",
|
|
|
|
|
+ "目标帖子ID": post_id,
|
|
|
|
|
+ "运行时间": now.strftime('%Y-%m-%d %H:%M:%S'),
|
|
|
|
|
+ "log_url": log_url,
|
|
|
|
|
+ "历史数据统计": {
|
|
|
|
|
+ "历史帖子数": blogger_history['历史帖子数'],
|
|
|
|
|
+ "数据格式": "多模态(图片 + 结构化文本)"
|
|
|
|
|
+ },
|
|
|
|
|
+ "点统计": points_stats,
|
|
|
|
|
+ "分析范围": ', '.join(analyze_types),
|
|
|
|
|
+ "分析数量": len(points_to_analyze),
|
|
|
|
|
+ "点依赖关系": {
|
|
|
|
|
+ "说明": "灵感点 ↔ 目的点(双向互推),灵感点,目的点 → 关键点(单向推导)",
|
|
|
|
|
+ "规则": "同类型的点不能互相推导"
|
|
|
|
|
+ },
|
|
|
|
|
+ "溯源分析结果": source_analysis_results
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ with open(output_file, 'w', encoding='utf-8') as f:
|
|
|
|
|
+ json.dump(final_result, f, ensure_ascii=False, indent=2)
|
|
|
|
|
+
|
|
|
|
|
+ print("\n" + "="*80)
|
|
|
|
|
+ print(f"✓ V9 溯源分析完成!结果已保存到:")
|
|
|
|
|
+ print(f" {output_file}")
|
|
|
|
|
+ print("="*80)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+if __name__ == "__main__":
|
|
|
|
|
+ current_time, log_url = set_trace()
|
|
|
|
|
+ with trace("how decode v9"):
|
|
|
|
|
+ asyncio.run(main(current_time, log_url))
|