| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295 |
- """
- 知识需求生成Agent的完整测试脚本
- 测试KnowledgeRequirementGenerationAgent的新输出格式(PRD 1.4)
- 新格式包含:
- - 整体项目目标
- - 本次任务目标
- - 上下文
- - 待解构帖子信息
- - 需求(内容知识需求 + 工具知识需求)
- """
- import sys
- import json
- import os
- from pathlib import Path
- from dotenv import load_dotenv
- # 添加项目根目录到路径
- project_root = Path(__file__).parent.parent.parent
- sys.path.insert(0, str(project_root))
- # 加载环境变量
- load_dotenv(project_root / ".env")
- from langchain.chat_models import init_chat_model
- from src.components.agents.knowledge_requirement_agent import generate_knowledge_requirements
- from src.utils.logger import get_logger
- logger = get_logger(__name__)
- def load_test_data(file_path: str) -> dict:
- """加载测试数据
- Args:
- file_path: JSON文件路径
- Returns:
- 解析后的JSON数据
- """
- with open(file_path, 'r', encoding='utf-8') as f:
- return json.load(f)
- def read_prd_content(pdf_path: str) -> str:
- """读取PRD PDF内容
- Args:
- pdf_path: PDF文件路径
- Returns:
- PRD文本内容
- """
- try:
- import pymupdf # PyMuPDF
- # 打开PDF文件
- doc = pymupdf.open(pdf_path)
- # 提取所有页面的文本
- text_content = []
- for page_num in range(len(doc)):
- page = doc[page_num]
- text_content.append(page.get_text())
- doc.close()
- # 合并所有页面的文本
- full_text = "\n".join(text_content)
- logger.info(f"成功读取PDF文件: {pdf_path}, 内容长度: {len(full_text)} 字符")
- return full_text
- except ImportError:
- logger.warning("未安装 pymupdf 库,使用 PyPDF2 作为备选方案")
- try:
- from PyPDF2 import PdfReader
- reader = PdfReader(pdf_path)
- text_content = []
- for page in reader.pages:
- text_content.append(page.extract_text())
- full_text = "\n".join(text_content)
- logger.info(f"成功读取PDF文件 (PyPDF2): {pdf_path}, 内容长度: {len(full_text)} 字符")
- return full_text
- except ImportError:
- logger.error("未安装 PDF 读取库 (pymupdf 或 PyPDF2),无法读取PDF文件")
- raise ImportError("请安装 pymupdf (推荐) 或 PyPDF2: pip install pymupdf 或 pip install PyPDF2")
- except Exception as e:
- logger.error(f"读取PDF文件失败: {e}", exc_info=True)
- raise
- def format_post_content(raw_data: dict) -> dict:
- """格式化帖子内容为Agent所需格式
- Args:
- raw_data: 原始帖子数据
- Returns:
- 格式化后的帖子内容
- """
- return {
- "text": {
- "title": raw_data.get("title", ""),
- "body": raw_data.get("body_text", ""),
- "hashtags": [] # 可以从body_text中提取
- },
- "images": raw_data.get("images", []),
- "metadata": {
- "link": raw_data.get("link", ""),
- "content_id": raw_data.get("channel_content_id", ""),
- "account_name": raw_data.get("channel_account_name", ""),
- "content_type": raw_data.get("content_type", ""),
- "comment_count": raw_data.get("comment_count", 0),
- "like_count": raw_data.get("like_count", 0),
- "collect_count": raw_data.get("collect_count", 0)
- }
- }
- def test_complete_knowledge_requirement_generation():
- """完整测试:知识需求生成(新格式 - PRD 1.4)
- 测试场景:
- 1. 帖子整体解构
- 2. 验证输出格式包含所有必需章节
- 3. 验证知识需求正确分类(内容知识 vs 工具知识)
- 测试步骤:
- - Step 1: 初始化LLM
- - Step 2: 加载PRD内容
- - Step 3: 加载测试帖子数据
- - Step 4: 定义任务阶段
- - Step 5: 创建Agent并生成知识需求
- - Step 6: 展示结果
- - Step 7: 格式验证
- - Step 8: 保存Markdown文档
- - Step 9: 测试总结
- """
- print("\n" + "=" * 100)
- print("📝 完整测试:KnowledgeRequirementGenerationAgent(新格式 - PRD 1.4)")
- print(" - 输入:PRD文档 + 小红书帖子 + 解构上下文 + 任务阶段")
- print(" - 输出:结构化知识需求文档(包含项目目标、任务目标、上下文、需求等)")
- print("=" * 100)
- # Step 1: 初始化LLM
- print("\n🤖 Step 1: 初始化LLM (Gemini)...")
- # 确保 GOOGLE_API_KEY 环境变量已设置
- google_api_key = os.getenv("GEMINI_API_KEY")
- if google_api_key:
- os.environ["GOOGLE_API_KEY"] = google_api_key
- llm = init_chat_model("gemini-2.5-flash", model_provider="google_genai")
- print(" ✓ LLM初始化成功")
- # Step 2: 加载PRD内容
- print("\n📄 Step 2: 加载PRD内容...")
- prd_path = project_root / "prd1.4.pdf"
- prd_content = read_prd_content(str(prd_path))
- print(f" ✓ PRD加载成功,内容长度: {len(prd_content)} 字符")
- print(f" PRD摘要(前200字): {prd_content}...")
- # Step 3: 加载测试帖子数据
- print("\n📁 Step 3: 加载测试帖子数据...")
- test_data_path = project_root / "examples/测试数据/阿里多多酱/待解构帖子.json"
- raw_data = load_test_data(str(test_data_path))
- post_content = format_post_content(raw_data)
- print(f" ✓ 帖子数据加载成功")
- print(f" - 标题: {raw_data.get('title', 'N/A')}")
- print(f" - 图片数量: {len(raw_data.get('images', []))}张")
- print(f" - 点赞数: {raw_data.get('like_count', 0)}")
- # Step 4: 定义任务阶段
- print("\n🎯 Step 4: 定义任务阶段...")
- task_stage = "帖子整体解构"
- print(f" ✓ 任务阶段: {task_stage}")
- print(f" - 说明: 确定帖子的描述维度(品类、主题、脚本、内容亮点、情绪共鸣点等)")
- # Step 5: 创建Agent并生成知识需求
- print("\n⚙️ Step 5: 创建Agent并生成知识需求...")
- print(" 选项:启用知识检索 = False(加快测试速度)")
- result = generate_knowledge_requirements(
- llm=llm,
- prd_content=prd_content,
- post_content=post_content,
- task_stage=task_stage, # 新增参数
- enable_retrieval=False # 设为True可启用知识检索,但会较慢
- )
- # Step 6: 展示结果
- print("\n" + "=" * 100)
- print("📊 Step 6: 生成结果展示")
- print("=" * 100)
- print(f"\n📝 总结:")
- print(f" {result.summary}")
- print(f"\n📄 Markdown文档:")
- print("-" * 100)
- print(result.markdown_document)
- print("-" * 100)
- # Step 7: 格式验证
- print("\n" + "=" * 100)
- print("🔍 Step 7: 格式验证")
- print("=" * 100)
- required_sections = [
- "# 整体项目目标",
- "# 本次任务目标",
- "# 上下文",
- "# 待解构帖子信息",
- "# 需求",
- "## 内容知识需求",
- "### 需求约束",
- "### 需求描述",
- "## 工具知识需求"
- ]
- all_passed = True
- for section in required_sections:
- if section in result.markdown_document:
- print(f" ✓ 包含章节: {section}")
- else:
- print(f" ✗ 缺少章节: {section}")
- all_passed = False
- if all_passed:
- print(f"\n 🎉 格式验证通过!所有必需章节均存在。")
- else:
- print(f"\n ⚠️ 格式验证未通过,缺少部分章节。")
- # Step 8: 保存Markdown文档
- print("\n" + "=" * 100)
- print("💾 Step 8: 保存Markdown文档")
- print("=" * 100)
- output_dir = project_root / "test/outputs"
- output_dir.mkdir(parents=True, exist_ok=True)
- output_path = output_dir / "knowledge_requirement_complete.md"
- with open(output_path, 'w', encoding='utf-8') as f:
- f.write(result.markdown_document)
- print(f"\n ✓ Markdown文档已保存到: {output_path}")
- print(f" - 文件大小: {len(result.markdown_document)} 字符")
- # Step 9: 测试总结
- print("\n" + "=" * 100)
- print("✅ 测试完成总结")
- print("=" * 100)
- print(f" ✓ 成功读取PRD文档 (prd1.4.pdf)")
- print(f" ✓ 成功加载小红书帖子数据")
- print(f" ✓ 成功定义任务阶段: {task_stage}")
- print(f" ✓ 成功生成知识需求文档(新格式 - PRD 1.4)")
- print(f" ✓ 格式验证: {'通过' if all_passed else '未通过'}")
- print(f" ✓ 总结: {result.summary}")
- print(f" ✓ Markdown文档大小: {len(result.markdown_document)} 字符")
- print("\n" + "🎉" * 50)
- print("测试成功完成! 新输出格式符合 PRD 1.4 要求")
- print("🎉" * 50 + "\n")
- return result
- def main():
- """主测试函数"""
- print("\n" + "🚀" * 50)
- print("KnowledgeRequirementGenerationAgent 完整测试套件(PRD 1.4 新格式)")
- print("🚀" * 50)
- try:
- _ = test_complete_knowledge_requirement_generation() # noqa: F841
- print("\n✅ 所有测试通过!")
- return 0
- except Exception as e:
- logger.error(f"测试失败: {e}", exc_info=True)
- print(f"\n❌ 测试失败: {e}\n")
- import traceback
- traceback.print_exc()
- return 1
- if __name__ == "__main__":
- exit(main())
|