|
|
@@ -1,10 +1,11 @@
|
|
|
"""
|
|
|
-主流程脚本:串联 Step1 和 Step2
|
|
|
+主流程脚本:串联 Step1、搜索和 Step2
|
|
|
|
|
|
执行完整的灵感分析流程:
|
|
|
1. Step1: 灵感与人设匹配(调用 step1 main,自动保存结果)
|
|
|
-2. Step2: 增量词在人设中的匹配(调用 step2 main,自动保存结果)
|
|
|
-3. 生成流程汇总文件
|
|
|
+2. Step1.5: 基于 Top1 匹配要素进行小红书搜索(使用 search_xiaohongshu)
|
|
|
+3. Step2: 增量词在人设中的匹配(调用 step2 main,自动保存结果)
|
|
|
+4. 生成流程汇总文件
|
|
|
"""
|
|
|
import os
|
|
|
import sys
|
|
|
@@ -22,6 +23,9 @@ from lib.utils import read_json
|
|
|
import step1_inspiration_match
|
|
|
import step2_incremental_match
|
|
|
|
|
|
+# 导入搜索功能
|
|
|
+from script.search import search_xiaohongshu
|
|
|
+
|
|
|
|
|
|
def find_step1_output(persona_dir: str, inspiration: str, max_tasks: int = None) -> str:
|
|
|
"""查找 step1 输出文件
|
|
|
@@ -71,15 +75,89 @@ def find_step2_output(persona_dir: str, inspiration: str, max_tasks: int = None)
|
|
|
return str(step2_files[0])
|
|
|
|
|
|
|
|
|
+def get_inspiration_score(persona_dir: str, inspiration: str, max_tasks: int = None) -> float:
|
|
|
+ """获取灵感的 Step1 Top1 分数
|
|
|
+
|
|
|
+ Args:
|
|
|
+ persona_dir: 人设目录
|
|
|
+ inspiration: 灵感点名称
|
|
|
+ max_tasks: 任务数限制
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ Step1 Top1 的 score,如果文件不存在返回 -1
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ step1_file = find_step1_output(persona_dir, inspiration, max_tasks)
|
|
|
+ step1_data = read_json(step1_file)
|
|
|
+ results = step1_data.get("匹配结果列表", [])
|
|
|
+ if results:
|
|
|
+ return results[0].get('匹配结果', {}).get('score', 0)
|
|
|
+ return 0
|
|
|
+ except (FileNotFoundError, Exception):
|
|
|
+ return -1
|
|
|
+
|
|
|
+
|
|
|
+def sort_inspirations_by_score(
|
|
|
+ persona_dir: str,
|
|
|
+ inspiration_list: list,
|
|
|
+ max_tasks: int = None
|
|
|
+) -> list:
|
|
|
+ """根据 Step1 结果分数对灵感列表排序
|
|
|
+
|
|
|
+ Args:
|
|
|
+ persona_dir: 人设目录
|
|
|
+ inspiration_list: 灵感列表
|
|
|
+ max_tasks: 任务数限制
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 排序后的灵感列表(按分数降序)
|
|
|
+ """
|
|
|
+ print(f"\n{'─' * 80}")
|
|
|
+ print(f"正在读取现有 Step1 结果文件...")
|
|
|
+ print(f"{'─' * 80}")
|
|
|
+
|
|
|
+ inspiration_scores = []
|
|
|
+ for inspiration in inspiration_list:
|
|
|
+ score = get_inspiration_score(persona_dir, inspiration, max_tasks)
|
|
|
+ inspiration_scores.append({
|
|
|
+ "inspiration": inspiration,
|
|
|
+ "score": score,
|
|
|
+ "has_result": score >= 0
|
|
|
+ })
|
|
|
+
|
|
|
+ # 统计
|
|
|
+ has_result_count = sum(1 for item in inspiration_scores if item["has_result"])
|
|
|
+ print(f"找到 {has_result_count}/{len(inspiration_list)} 个灵感的 Step1 结果")
|
|
|
+
|
|
|
+ # 排序:有结果的按分数降序,无结果的放最后(保持原顺序)
|
|
|
+ sorted_items = sorted(
|
|
|
+ inspiration_scores,
|
|
|
+ key=lambda x: (x["has_result"], x["score"]),
|
|
|
+ reverse=True
|
|
|
+ )
|
|
|
+
|
|
|
+ # 显示排序结果(前10个)
|
|
|
+ print(f"\n排序后的灵感列表(前10个):")
|
|
|
+ for i, item in enumerate(sorted_items[:10], 1):
|
|
|
+ status = f"score={item['score']:.2f}" if item['has_result'] else "无结果"
|
|
|
+ print(f" {i}. [{status}] {item['inspiration']}")
|
|
|
+
|
|
|
+ if len(sorted_items) > 10:
|
|
|
+ print(f" ... 还有 {len(sorted_items) - 10} 个")
|
|
|
+
|
|
|
+ return [item["inspiration"] for item in sorted_items]
|
|
|
+
|
|
|
+
|
|
|
async def run_full_analysis(
|
|
|
persona_dir: str,
|
|
|
inspiration: str,
|
|
|
max_tasks: int = None,
|
|
|
force: bool = False,
|
|
|
current_time: str = None,
|
|
|
- log_url: str = None
|
|
|
+ log_url: str = None,
|
|
|
+ enable_step2: bool = False
|
|
|
) -> dict:
|
|
|
- """执行完整的灵感分析流程(Step1 + Step2)
|
|
|
+ """执行完整的灵感分析流程(Step1 + 搜索 + Step2)
|
|
|
|
|
|
Args:
|
|
|
persona_dir: 人设目录路径
|
|
|
@@ -88,6 +166,7 @@ async def run_full_analysis(
|
|
|
force: 是否强制重新执行(跳过文件存在检查)
|
|
|
current_time: 当前时间戳
|
|
|
log_url: 日志链接
|
|
|
+ enable_step2: 是否执行 Step2(默认 False)
|
|
|
|
|
|
Returns:
|
|
|
包含文件路径和状态的字典
|
|
|
@@ -138,34 +217,75 @@ async def run_full_analysis(
|
|
|
step1_element = step1_top1.get("业务信息", {}).get("匹配要素", "")
|
|
|
print(f"Top1 匹配要素: {step1_element}, score: {step1_score:.2f}")
|
|
|
|
|
|
- # ========== Step2: 增量词匹配 ==========
|
|
|
+ # ========== Step1.5: 小红书搜索 ==========
|
|
|
print(f"\n{'─' * 80}")
|
|
|
- print(f"Step2: 增量词在人设中的匹配")
|
|
|
+ print(f"Step1.5: 基于 Top1 匹配要素进行小红书搜索")
|
|
|
print(f"{'─' * 80}\n")
|
|
|
|
|
|
- # 临时修改 sys.argv 来传递参数给 step2
|
|
|
- sys.argv = [
|
|
|
- "step2_incremental_match.py",
|
|
|
- persona_dir,
|
|
|
- inspiration
|
|
|
- ]
|
|
|
+ search_keyword = step1_element
|
|
|
+ print(f"搜索关键词: {search_keyword}")
|
|
|
|
|
|
+ # 执行搜索
|
|
|
try:
|
|
|
- # 调用 step2 的 main 函数(通过参数传递 force)
|
|
|
- await step2_incremental_match.main(current_time, log_url, force=force)
|
|
|
- finally:
|
|
|
- # 恢复原始参数
|
|
|
- sys.argv = original_argv
|
|
|
+ search_result = search_xiaohongshu(search_keyword)
|
|
|
+ search_notes_count = len(search_result.get('notes', []))
|
|
|
+ print(f"✓ 搜索完成,找到 {search_notes_count} 条笔记")
|
|
|
+
|
|
|
+ # 保存搜索结果
|
|
|
+ search_dir = os.path.join(persona_dir, "how", "灵感点", inspiration, "search")
|
|
|
+ os.makedirs(search_dir, exist_ok=True)
|
|
|
|
|
|
- # 查找 step2 输出文件
|
|
|
- step2_file = find_step2_output(persona_dir, inspiration, max_tasks)
|
|
|
- print(f"✓ Step2 完成,结果文件: {step2_file}\n")
|
|
|
+ scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
|
|
|
+ search_filename = f"{scope_prefix}_search_{search_keyword[:20]}.json" # 截取关键词前20字符避免文件名过长
|
|
|
+ search_file = os.path.join(search_dir, search_filename)
|
|
|
|
|
|
- # 读取 step2 结果
|
|
|
- step2_data = read_json(step2_file)
|
|
|
- step2_score = step2_data.get("匹配结果", {}).get("score", 0)
|
|
|
- step2_b_content = step2_data.get("输入信息", {}).get("B", "")
|
|
|
- step2_word_count = len(step2_b_content.split("\n")) if step2_b_content else 0
|
|
|
+ with open(search_file, 'w', encoding='utf-8') as f:
|
|
|
+ json.dump(search_result, f, ensure_ascii=False, indent=2)
|
|
|
+
|
|
|
+ print(f"✓ 搜索结果已保存: {search_file}\n")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ print(f"⚠️ 搜索失败: {e}")
|
|
|
+ search_file = None
|
|
|
+ search_notes_count = 0
|
|
|
+
|
|
|
+ # ========== Step2: 增量词匹配 ==========
|
|
|
+ step2_file = None
|
|
|
+ step2_score = None
|
|
|
+ step2_word_count = None
|
|
|
+
|
|
|
+ if enable_step2:
|
|
|
+ print(f"\n{'─' * 80}")
|
|
|
+ print(f"Step2: 增量词在人设中的匹配")
|
|
|
+ print(f"{'─' * 80}\n")
|
|
|
+
|
|
|
+ # 临时修改 sys.argv 来传递参数给 step2
|
|
|
+ sys.argv = [
|
|
|
+ "step2_incremental_match.py",
|
|
|
+ persona_dir,
|
|
|
+ inspiration
|
|
|
+ ]
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 调用 step2 的 main 函数(通过参数传递 force)
|
|
|
+ await step2_incremental_match.main(current_time, log_url, force=force)
|
|
|
+ finally:
|
|
|
+ # 恢复原始参数
|
|
|
+ sys.argv = original_argv
|
|
|
+
|
|
|
+ # 查找 step2 输出文件
|
|
|
+ step2_file = find_step2_output(persona_dir, inspiration, max_tasks)
|
|
|
+ print(f"✓ Step2 完成,结果文件: {step2_file}\n")
|
|
|
+
|
|
|
+ # 读取 step2 结果
|
|
|
+ step2_data = read_json(step2_file)
|
|
|
+ step2_score = step2_data.get("匹配结果", {}).get("score", 0)
|
|
|
+ step2_b_content = step2_data.get("输入信息", {}).get("B", "")
|
|
|
+ step2_word_count = len(step2_b_content.split("\n")) if step2_b_content else 0
|
|
|
+ else:
|
|
|
+ print(f"\n{'─' * 80}")
|
|
|
+ print(f"Step2: 已跳过(使用 --enable-step2 启用)")
|
|
|
+ print(f"{'─' * 80}\n")
|
|
|
|
|
|
# ========== 保存流程汇总 ==========
|
|
|
output_dir = os.path.join(persona_dir, "how", "灵感点", inspiration)
|
|
|
@@ -178,22 +298,30 @@ async def run_full_analysis(
|
|
|
summary_filename = f"{scope_prefix}_summary_完整流程_{model_short}.json"
|
|
|
summary_file = os.path.join(output_dir, summary_filename)
|
|
|
|
|
|
+ # 构建流程描述
|
|
|
+ workflow = "Step1 + 搜索"
|
|
|
+ if enable_step2:
|
|
|
+ workflow += " + Step2"
|
|
|
+
|
|
|
summary = {
|
|
|
"元数据": {
|
|
|
"current_time": current_time,
|
|
|
"log_url": log_url,
|
|
|
- "流程": "Step1 + Step2 完整分析",
|
|
|
+ "流程": workflow,
|
|
|
"step1_model": step1_data.get("元数据", {}).get("model", ""),
|
|
|
- "step2_model": step2_data.get("元数据", {}).get("model", "")
|
|
|
+ "step2_model": step2_data.get("元数据", {}).get("model", "") if enable_step2 and 'step2_data' in locals() else None
|
|
|
},
|
|
|
"灵感": inspiration,
|
|
|
"文件路径": {
|
|
|
"step1": step1_file,
|
|
|
+ "search": search_file if 'search_file' in locals() else None,
|
|
|
"step2": step2_file
|
|
|
},
|
|
|
"关键指标": {
|
|
|
"step1_top1_score": step1_score,
|
|
|
"step1_top1_匹配要素": step1_element,
|
|
|
+ "search_keyword": search_keyword if 'search_keyword' in locals() else None,
|
|
|
+ "search_notes_count": search_notes_count if 'search_notes_count' in locals() else 0,
|
|
|
"step2_增量词数量": step2_word_count,
|
|
|
"step2_score": step2_score
|
|
|
}
|
|
|
@@ -206,12 +334,16 @@ async def run_full_analysis(
|
|
|
print(f"完整流程执行完成")
|
|
|
print(f"{'=' * 80}")
|
|
|
print(f"\n结果文件:")
|
|
|
- print(f" Step1: {step1_file}")
|
|
|
- print(f" Step2: {step2_file}")
|
|
|
- print(f" 汇总: {summary_file}\n")
|
|
|
+ print(f" Step1: {step1_file}")
|
|
|
+ if 'search_file' in locals() and search_file:
|
|
|
+ print(f" 搜索: {search_file}")
|
|
|
+ if enable_step2 and step2_file:
|
|
|
+ print(f" Step2: {step2_file}")
|
|
|
+ print(f" 汇总: {summary_file}\n")
|
|
|
|
|
|
return {
|
|
|
"step1_file": step1_file,
|
|
|
+ "search_file": search_file if 'search_file' in locals() else None,
|
|
|
"step2_file": step2_file,
|
|
|
"summary_file": summary_file,
|
|
|
"status": "success"
|
|
|
@@ -222,16 +354,22 @@ async def main():
|
|
|
"""主函数"""
|
|
|
# 解析命令行参数
|
|
|
parser = argparse.ArgumentParser(
|
|
|
- description="灵感分析主流程 (Step1 + Step2)",
|
|
|
+ description="灵感分析主流程 (Step1 + 搜索 + Step2)",
|
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
|
epilog="""
|
|
|
使用示例:
|
|
|
- # 处理第1个灵感
|
|
|
+ # 处理第1个灵感(Step1 + 搜索,默认不执行 Step2)
|
|
|
python run_inspiration_analysis.py --dir data/阿里多多酱/out/人设_1110 --count 1
|
|
|
|
|
|
+ # 启用 Step2 完整流程(Step1 + 搜索 + Step2)
|
|
|
+ python run_inspiration_analysis.py --count 1 --enable-step2
|
|
|
+
|
|
|
# 随机处理5个灵感
|
|
|
python run_inspiration_analysis.py --count 5 --shuffle
|
|
|
|
|
|
+ # 按 Step1 分数排序,处理前10个高分灵感
|
|
|
+ python run_inspiration_analysis.py --count 10 --sort-by-score
|
|
|
+
|
|
|
# 处理所有灵感,强制重新执行
|
|
|
python run_inspiration_analysis.py --count all --force
|
|
|
|
|
|
@@ -271,17 +409,36 @@ async def main():
|
|
|
help="随机选择灵感,而不是按顺序"
|
|
|
)
|
|
|
|
|
|
+ parser.add_argument(
|
|
|
+ "--sort-by-score",
|
|
|
+ action="store_true",
|
|
|
+ help="根据 Step1 结果分数排序(降序),优先处理高分灵感"
|
|
|
+ )
|
|
|
+
|
|
|
+ parser.add_argument(
|
|
|
+ "--enable-step2",
|
|
|
+ action="store_true",
|
|
|
+ help="启用 Step2 增量词匹配(默认关闭)"
|
|
|
+ )
|
|
|
+
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
persona_dir = args.dir
|
|
|
force = args.force
|
|
|
shuffle = args.shuffle
|
|
|
+ sort_by_score = args.sort_by_score
|
|
|
+ enable_step2 = args.enable_step2
|
|
|
|
|
|
# 处理 max_tasks
|
|
|
max_tasks = None if args.max_tasks == "all" else int(args.max_tasks)
|
|
|
|
|
|
+ # 动态流程名称
|
|
|
+ workflow_name = "Step1 + 搜索"
|
|
|
+ if enable_step2:
|
|
|
+ workflow_name += " + Step2"
|
|
|
+
|
|
|
print(f"{'=' * 80}")
|
|
|
- print(f"灵感分析主流程 (Step1 + Step2)")
|
|
|
+ print(f"灵感分析主流程 ({workflow_name})")
|
|
|
print(f"{'=' * 80}")
|
|
|
print(f"人设目录: {persona_dir}")
|
|
|
|
|
|
@@ -305,8 +462,20 @@ async def main():
|
|
|
if shuffle:
|
|
|
print(f"随机模式: 随机选择灵感")
|
|
|
|
|
|
+ if sort_by_score:
|
|
|
+ print(f"分数排序: 根据 Step1 结果按分数降序处理")
|
|
|
+
|
|
|
+ if enable_step2:
|
|
|
+ print(f"Step2: 启用增量词匹配")
|
|
|
+ else:
|
|
|
+ print(f"Step2: 已关闭(使用 --enable-step2 启用)")
|
|
|
+
|
|
|
# 选择要处理的灵感列表
|
|
|
- if shuffle:
|
|
|
+ if sort_by_score:
|
|
|
+ # 根据 Step1 结果分数排序
|
|
|
+ sorted_list = sort_inspirations_by_score(persona_dir, inspiration_list, max_tasks)
|
|
|
+ inspirations_to_process = sorted_list[:inspiration_count]
|
|
|
+ elif shuffle:
|
|
|
# 随机打乱灵感列表后选择
|
|
|
shuffled_list = inspiration_list.copy()
|
|
|
random.shuffle(shuffled_list)
|
|
|
@@ -336,7 +505,8 @@ async def main():
|
|
|
max_tasks=max_tasks,
|
|
|
force=force,
|
|
|
current_time=insp_time,
|
|
|
- log_url=insp_log_url
|
|
|
+ log_url=insp_log_url,
|
|
|
+ enable_step2=enable_step2
|
|
|
)
|
|
|
|
|
|
results.append(result)
|