|
|
@@ -0,0 +1,504 @@
|
|
|
+"""
|
|
|
+批量测试脚本:处理作者历史帖子目录下的所有帖子(并发版本,带历史帖子)
|
|
|
+
|
|
|
+功能:
|
|
|
+1. 加载最新的20个帖子(按照publish_timestamp从新到旧排序)
|
|
|
+2. 为每个帖子加载历史帖子(比当前帖子早的最近15篇)
|
|
|
+3. 并发处理所有帖子
|
|
|
+"""
|
|
|
+
|
|
|
+import json
|
|
|
+import sys
|
|
|
+import os
|
|
|
+import argparse
|
|
|
+from pathlib import Path
|
|
|
+from datetime import datetime
|
|
|
+from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
+import threading
|
|
|
+
|
|
|
+# 添加项目根目录到路径
|
|
|
+project_root = Path(__file__).parent.parent
|
|
|
+sys.path.insert(0, str(project_root))
|
|
|
+
|
|
|
+# 手动加载.env文件
|
|
|
+def load_env_file(env_path):
|
|
|
+ """手动加载.env文件"""
|
|
|
+ if not env_path.exists():
|
|
|
+ return False
|
|
|
+
|
|
|
+ with open(env_path, 'r') as f:
|
|
|
+ for line in f:
|
|
|
+ line = line.strip()
|
|
|
+ # 跳过注释和空行
|
|
|
+ if not line or line.startswith('#'):
|
|
|
+ continue
|
|
|
+ # 解析KEY=VALUE
|
|
|
+ if '=' in line:
|
|
|
+ key, value = line.split('=', 1)
|
|
|
+ os.environ[key.strip()] = value.strip()
|
|
|
+
|
|
|
+ return True
|
|
|
+
|
|
|
+env_path = project_root / ".env"
|
|
|
+if load_env_file(env_path):
|
|
|
+ print(f"✅ 已加载环境变量从: {env_path}")
|
|
|
+ # 验证API密钥
|
|
|
+ api_key = os.environ.get("GEMINI_API_KEY", "")
|
|
|
+ if api_key:
|
|
|
+ print(f" GEMINI_API_KEY: {api_key[:10]}...")
|
|
|
+else:
|
|
|
+ print(f"⚠️ 未找到.env文件: {env_path}")
|
|
|
+
|
|
|
+from src.workflows.what_deconstruction_workflow import WhatDeconstructionWorkflow
|
|
|
+from src.utils.logger import get_logger
|
|
|
+
|
|
|
+logger = get_logger(__name__)
|
|
|
+
|
|
|
+# 线程安全的输出锁
|
|
|
+print_lock = threading.Lock()
|
|
|
+
|
|
|
+# 线程安全的计数器
|
|
|
+class ThreadSafeCounter:
|
|
|
+ def __init__(self):
|
|
|
+ self._lock = threading.Lock()
|
|
|
+ self._success = 0
|
|
|
+ self._fail = 0
|
|
|
+
|
|
|
+ def increment_success(self):
|
|
|
+ with self._lock:
|
|
|
+ self._success += 1
|
|
|
+
|
|
|
+ def increment_fail(self):
|
|
|
+ with self._lock:
|
|
|
+ self._fail += 1
|
|
|
+
|
|
|
+ @property
|
|
|
+ def success(self):
|
|
|
+ with self._lock:
|
|
|
+ return self._success
|
|
|
+
|
|
|
+ @property
|
|
|
+ def fail(self):
|
|
|
+ with self._lock:
|
|
|
+ return self._fail
|
|
|
+
|
|
|
+def safe_print(*args, **kwargs):
|
|
|
+ """线程安全的打印函数"""
|
|
|
+ with print_lock:
|
|
|
+ print(*args, **kwargs)
|
|
|
+
|
|
|
+
|
|
|
+def load_historical_posts(history_dir, target_timestamp=None, target_post_id=None, max_count=15):
|
|
|
+ """
|
|
|
+ 加载历史帖子(根据publish_timestamp从新到旧排序)
|
|
|
+
|
|
|
+ 选择比目标帖子早发布,并且是最近发布的帖子,排除目标帖子本身
|
|
|
+
|
|
|
+ Args:
|
|
|
+ history_dir: 历史帖子目录
|
|
|
+ target_timestamp: 目标帖子的发布时间戳(可选)
|
|
|
+ target_post_id: 目标帖子的ID(用于过滤重复,可选)
|
|
|
+ max_count: 最多加载的帖子数量
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ list: 历史帖子列表(从新到旧排序)
|
|
|
+ """
|
|
|
+ history_path = Path(history_dir)
|
|
|
+
|
|
|
+ if not history_path.exists():
|
|
|
+ safe_print(f"⚠️ 历史帖子目录不存在: {history_path}")
|
|
|
+ return []
|
|
|
+
|
|
|
+ # 获取所有JSON文件
|
|
|
+ json_files = list(history_path.glob("*.json"))
|
|
|
+
|
|
|
+ if not json_files:
|
|
|
+ safe_print(f"⚠️ 未找到历史帖子文件")
|
|
|
+ return []
|
|
|
+
|
|
|
+ # 读取所有帖子并提取publish_timestamp
|
|
|
+ posts_with_timestamp = []
|
|
|
+ for file_path in json_files:
|
|
|
+ try:
|
|
|
+ with open(file_path, 'r', encoding='utf-8') as f:
|
|
|
+ post_data = json.load(f)
|
|
|
+
|
|
|
+ # 获取发布时间戳,如果不存在则使用0
|
|
|
+ timestamp = post_data.get("publish_timestamp", 0)
|
|
|
+ post_id = post_data.get("channel_content_id", "")
|
|
|
+
|
|
|
+ posts_with_timestamp.append({
|
|
|
+ "file_path": file_path,
|
|
|
+ "post_data": post_data,
|
|
|
+ "timestamp": timestamp,
|
|
|
+ "post_id": post_id
|
|
|
+ })
|
|
|
+ except Exception as e:
|
|
|
+ safe_print(f" ⚠️ 读取文件失败 {file_path.name}: {e}")
|
|
|
+ continue
|
|
|
+
|
|
|
+ if not posts_with_timestamp:
|
|
|
+ safe_print(f"⚠️ 没有成功读取到任何帖子")
|
|
|
+ return []
|
|
|
+
|
|
|
+ # 过滤掉目标帖子本身
|
|
|
+ if target_post_id is not None:
|
|
|
+ posts_with_timestamp = [
|
|
|
+ post for post in posts_with_timestamp
|
|
|
+ if post["post_id"] != target_post_id
|
|
|
+ ]
|
|
|
+
|
|
|
+ # 如果提供了目标时间戳,只保留比目标帖子早的帖子
|
|
|
+ if target_timestamp is not None:
|
|
|
+ posts_with_timestamp = [
|
|
|
+ post for post in posts_with_timestamp
|
|
|
+ if post["timestamp"] < target_timestamp
|
|
|
+ ]
|
|
|
+
|
|
|
+ if not posts_with_timestamp:
|
|
|
+ return []
|
|
|
+
|
|
|
+ # 按照publish_timestamp排序(从新到旧)
|
|
|
+ posts_with_timestamp.sort(key=lambda x: x["timestamp"], reverse=True)
|
|
|
+
|
|
|
+ # 选择最近的N篇(从新到旧)
|
|
|
+ selected_posts = posts_with_timestamp[:max_count] if len(posts_with_timestamp) > max_count else posts_with_timestamp
|
|
|
+
|
|
|
+ historical_posts = []
|
|
|
+ for post_info in selected_posts:
|
|
|
+ post_data = post_info["post_data"]
|
|
|
+
|
|
|
+ # 转换为需要的格式
|
|
|
+ historical_post = {
|
|
|
+ "text": {
|
|
|
+ "title": post_data.get("title", ""),
|
|
|
+ "body": post_data.get("body_text", ""),
|
|
|
+ "hashtags": ""
|
|
|
+ },
|
|
|
+ "images": post_data.get("images", [])
|
|
|
+ }
|
|
|
+
|
|
|
+ historical_posts.append(historical_post)
|
|
|
+
|
|
|
+ return historical_posts
|
|
|
+
|
|
|
+
|
|
|
+def load_post_files(directory, max_count=20):
|
|
|
+ """
|
|
|
+ 加载作者历史帖子目录下的所有JSON文件,按照publish_timestamp从新到旧排序,取最新的max_count个
|
|
|
+
|
|
|
+ Args:
|
|
|
+ directory: 帖子目录
|
|
|
+ max_count: 最多加载的帖子数量(默认20)
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ list: 帖子文件路径列表(按时间从新到旧排序)
|
|
|
+ """
|
|
|
+ post_dir = Path(directory)
|
|
|
+
|
|
|
+ if not post_dir.exists():
|
|
|
+ raise FileNotFoundError(f"目录不存在: {post_dir}")
|
|
|
+
|
|
|
+ # 获取所有JSON文件
|
|
|
+ json_files = list(post_dir.glob("*.json"))
|
|
|
+
|
|
|
+ if not json_files:
|
|
|
+ raise FileNotFoundError(f"目录中没有找到JSON文件: {post_dir}")
|
|
|
+
|
|
|
+ # 读取所有帖子并提取publish_timestamp
|
|
|
+ posts_with_timestamp = []
|
|
|
+ for file_path in json_files:
|
|
|
+ try:
|
|
|
+ with open(file_path, 'r', encoding='utf-8') as f:
|
|
|
+ post_data = json.load(f)
|
|
|
+
|
|
|
+ # 获取发布时间戳,如果不存在则使用0
|
|
|
+ timestamp = post_data.get("publish_timestamp", 0)
|
|
|
+
|
|
|
+ posts_with_timestamp.append({
|
|
|
+ "file_path": file_path,
|
|
|
+ "timestamp": timestamp,
|
|
|
+ "post_data": post_data
|
|
|
+ })
|
|
|
+ except Exception as e:
|
|
|
+ print(f"⚠️ 读取文件失败 {file_path.name}: {e}")
|
|
|
+ continue
|
|
|
+
|
|
|
+ if not posts_with_timestamp:
|
|
|
+ raise FileNotFoundError(f"没有成功读取到任何帖子")
|
|
|
+
|
|
|
+ # 按照publish_timestamp排序(从新到旧)
|
|
|
+ posts_with_timestamp.sort(key=lambda x: x["timestamp"], reverse=True)
|
|
|
+
|
|
|
+ # 取最新的max_count个
|
|
|
+ selected_posts = posts_with_timestamp[:max_count]
|
|
|
+
|
|
|
+ print(f"📊 按时间排序并选择最新 {len(selected_posts)} 个帖子:")
|
|
|
+ for idx, post_info in enumerate(selected_posts, 1):
|
|
|
+ post_data = post_info["post_data"]
|
|
|
+ publish_time = post_data.get("publish_time", "未知时间")
|
|
|
+ title = post_data.get("title", "无标题")
|
|
|
+ print(f" {idx}. {post_info['file_path'].name}")
|
|
|
+ print(f" 标题: {title}")
|
|
|
+ print(f" 发布时间: {publish_time}")
|
|
|
+
|
|
|
+ return [post_info["file_path"] for post_info in selected_posts]
|
|
|
+
|
|
|
+
|
|
|
+def convert_to_workflow_input(raw_data, historical_posts=None):
|
|
|
+ """
|
|
|
+ 将原始数据转换为工作流输入格式
|
|
|
+
|
|
|
+ Args:
|
|
|
+ raw_data: 原始帖子数据
|
|
|
+ historical_posts: 历史帖子列表(可选)
|
|
|
+ """
|
|
|
+ # 转换为工作流需要的格式
|
|
|
+ images = raw_data.get("images", [])
|
|
|
+
|
|
|
+ input_data = {
|
|
|
+ "multimedia_content": {
|
|
|
+ "images": images,
|
|
|
+ "video": raw_data.get("video", {}),
|
|
|
+ "text": {
|
|
|
+ "title": raw_data.get("title", ""),
|
|
|
+ "body": raw_data.get("body_text", ""),
|
|
|
+ "hashtags": ""
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "comments": raw_data.get("comments", []), # 包含评论数据
|
|
|
+ "creator_info": {
|
|
|
+ "nickname": raw_data.get("channel_account_name", ""),
|
|
|
+ "account_id": raw_data.get("channel_account_id", "")
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ # 如果有历史帖子,添加到输入数据中
|
|
|
+ if historical_posts:
|
|
|
+ input_data["historical_posts"] = historical_posts
|
|
|
+
|
|
|
+ return input_data
|
|
|
+
|
|
|
+
|
|
|
+def process_single_post(post_file, posts_dir, output_dir, counter, total_count, current_index):
|
|
|
+ """处理单个帖子文件(线程安全版本,带历史帖子)"""
|
|
|
+ post_name = post_file.stem # 获取文件名(不含扩展名)
|
|
|
+ thread_id = threading.current_thread().name
|
|
|
+
|
|
|
+ safe_print(f"\n{'='*80}")
|
|
|
+ safe_print(f"[线程:{thread_id}] 处理帖子: {post_name}")
|
|
|
+ safe_print(f"进度: [{current_index}/{total_count}]")
|
|
|
+ safe_print(f"{'='*80}")
|
|
|
+
|
|
|
+ # 1. 加载帖子数据
|
|
|
+ safe_print(f"\n[线程:{thread_id}][1] 加载帖子数据...")
|
|
|
+ try:
|
|
|
+ with open(post_file, "r", encoding="utf-8") as f:
|
|
|
+ raw_data = json.load(f)
|
|
|
+
|
|
|
+ target_timestamp = raw_data.get('publish_timestamp')
|
|
|
+ target_post_id = raw_data.get('channel_content_id')
|
|
|
+
|
|
|
+ safe_print(f"✅ [{thread_id}] 成功加载帖子数据")
|
|
|
+ safe_print(f" - 标题: {raw_data.get('title', 'N/A')}")
|
|
|
+ safe_print(f" - 帖子ID: {target_post_id}")
|
|
|
+ safe_print(f" - 发布时间: {raw_data.get('publish_time', '未知时间')}")
|
|
|
+ safe_print(f" - 图片数: {len(raw_data.get('images', []))}")
|
|
|
+ safe_print(f" - 点赞数: {raw_data.get('like_count', 0)}")
|
|
|
+ safe_print(f" - 评论数: {raw_data.get('comment_count', 0)}")
|
|
|
+ except Exception as e:
|
|
|
+ safe_print(f"❌ [{thread_id}] 加载帖子数据失败: {e}")
|
|
|
+ counter.increment_fail()
|
|
|
+ return False
|
|
|
+
|
|
|
+ # 2. 加载历史帖子
|
|
|
+ safe_print(f"\n[线程:{thread_id}][2] 加载历史帖子...")
|
|
|
+ historical_posts = load_historical_posts(
|
|
|
+ posts_dir,
|
|
|
+ target_timestamp=target_timestamp,
|
|
|
+ target_post_id=target_post_id,
|
|
|
+ max_count=15
|
|
|
+ )
|
|
|
+
|
|
|
+ if historical_posts:
|
|
|
+ safe_print(f"✅ [{thread_id}] 成功加载 {len(historical_posts)} 篇历史帖子")
|
|
|
+ else:
|
|
|
+ safe_print(f"⚠️ [{thread_id}] 未加载到历史帖子,将使用常规分析模式")
|
|
|
+
|
|
|
+ # 3. 转换数据格式
|
|
|
+ safe_print(f"\n[线程:{thread_id}][3] 转换数据格式...")
|
|
|
+ try:
|
|
|
+ input_data = convert_to_workflow_input(raw_data, historical_posts)
|
|
|
+ safe_print(f"✅ [{thread_id}] 数据格式转换成功")
|
|
|
+ safe_print(f" - 历史帖子数: {len(input_data.get('historical_posts', []))}")
|
|
|
+ except Exception as e:
|
|
|
+ safe_print(f"❌ [{thread_id}] 数据格式转换失败: {e}")
|
|
|
+ counter.increment_fail()
|
|
|
+ return False
|
|
|
+
|
|
|
+ # 4. 初始化工作流(每个线程创建独立实例,确保线程安全)
|
|
|
+ safe_print(f"\n[线程:{thread_id}][4] 初始化工作流...")
|
|
|
+ try:
|
|
|
+ workflow = WhatDeconstructionWorkflow(
|
|
|
+ model_provider="google_genai",
|
|
|
+ max_depth=10
|
|
|
+ )
|
|
|
+ safe_print(f"✅ [{thread_id}] 工作流初始化成功")
|
|
|
+ except Exception as e:
|
|
|
+ safe_print(f"❌ [{thread_id}] 工作流初始化失败: {e}")
|
|
|
+ counter.increment_fail()
|
|
|
+ return False
|
|
|
+
|
|
|
+ # 5. 执行工作流
|
|
|
+ safe_print(f"\n[线程:{thread_id}][5] 执行工作流...")
|
|
|
+ safe_print(f" 注意:这可能需要几分钟时间...")
|
|
|
+ try:
|
|
|
+ result = workflow.invoke(input_data)
|
|
|
+ safe_print(f"✅ [{thread_id}] 工作流执行成功")
|
|
|
+ except Exception as e:
|
|
|
+ safe_print(f"❌ [{thread_id}] 工作流执行失败: {e}")
|
|
|
+ import traceback
|
|
|
+ safe_print(traceback.format_exc())
|
|
|
+ counter.increment_fail()
|
|
|
+ return False
|
|
|
+
|
|
|
+ # 6. 保存结果
|
|
|
+ safe_print(f"\n[线程:{thread_id}][6] 保存结果...")
|
|
|
+ try:
|
|
|
+ # 使用帖子文件名作为前缀
|
|
|
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
|
+ output_filename = f"{post_name}_with_history_{timestamp}.json"
|
|
|
+ output_path = output_dir / output_filename
|
|
|
+
|
|
|
+ with open(output_path, "w", encoding="utf-8") as f:
|
|
|
+ json.dump(result, f, ensure_ascii=False, indent=2)
|
|
|
+
|
|
|
+ safe_print(f"✅ [{thread_id}] 结果已保存到: {output_path}")
|
|
|
+ except Exception as e:
|
|
|
+ safe_print(f"❌ [{thread_id}] 保存结果失败: {e}")
|
|
|
+ counter.increment_fail()
|
|
|
+ return False
|
|
|
+
|
|
|
+ # 7. 显示结果摘要
|
|
|
+ safe_print(f"\n{'='*80}")
|
|
|
+ safe_print(f"结果摘要 - {post_name} [线程:{thread_id}]")
|
|
|
+ safe_print(f"{'='*80}")
|
|
|
+
|
|
|
+ if result:
|
|
|
+ three_points = result.get("三点解构", {})
|
|
|
+ inspiration_data = three_points.get("灵感点", {})
|
|
|
+ keypoints_data = three_points.get("关键点", {})
|
|
|
+ comments = result.get("评论分析", {}).get("解构维度", [])
|
|
|
+
|
|
|
+ safe_print(f"\n三点解构:")
|
|
|
+ safe_print(f" - 灵感点数量: {inspiration_data.get('total_count', 0)}")
|
|
|
+ safe_print(f" - 灵感点分析模式: {inspiration_data.get('analysis_mode', '未知')}")
|
|
|
+ safe_print(f" - 目的点数量: 1")
|
|
|
+ safe_print(f" - 关键点数量: {keypoints_data.get('total_count', 0)}")
|
|
|
+
|
|
|
+ safe_print(f"\n评论分析:")
|
|
|
+ safe_print(f" - 解构维度数: {len(comments)}")
|
|
|
+
|
|
|
+ topic_understanding = result.get("选题理解", {})
|
|
|
+ if topic_understanding:
|
|
|
+ topic_theme = topic_understanding.get("topic_theme", "")
|
|
|
+ safe_print(f"\n选题理解:")
|
|
|
+ safe_print(f" - 选题主题: {topic_theme}")
|
|
|
+
|
|
|
+ counter.increment_success()
|
|
|
+ return True
|
|
|
+
|
|
|
+
|
|
|
+def main():
|
|
|
+ """主函数(并发版本,带历史帖子)"""
|
|
|
+ # 解析命令行参数
|
|
|
+ parser = argparse.ArgumentParser(description='批量处理帖子的What解构工作流')
|
|
|
+ parser.add_argument('directory', type=str, help='帖子目录名(如"阿里多多酱"或"G88818")')
|
|
|
+ args = parser.parse_args()
|
|
|
+
|
|
|
+ directory = args.directory
|
|
|
+
|
|
|
+ print("=" * 80)
|
|
|
+ print(f"开始批量处理作者历史帖子(并发模式,带历史帖子)- 目录: {directory}")
|
|
|
+ print("=" * 80)
|
|
|
+
|
|
|
+ # 配置
|
|
|
+ posts_dir = Path(__file__).parent / directory / "作者历史帖子"
|
|
|
+ output_dir = Path(__file__).parent / directory / "output"
|
|
|
+ output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
+
|
|
|
+ # 并发配置:设置最大线程数(建议根据CPU核心数和API限制调整)
|
|
|
+ MAX_WORKERS = 4 # 可以根据需要调整,建议不超过5
|
|
|
+
|
|
|
+ # 处理帖子数量限制
|
|
|
+ MAX_POSTS = 20 # 只处理最新的20个帖子
|
|
|
+
|
|
|
+ # 1. 加载所有帖子文件(按时间从新到旧排序,取最新20个)
|
|
|
+ print(f"\n[1] 扫描帖子文件...")
|
|
|
+ try:
|
|
|
+ post_files = load_post_files(posts_dir, max_count=MAX_POSTS)
|
|
|
+ print(f"✅ 选择 {len(post_files)} 个最新帖子进行处理")
|
|
|
+ except Exception as e:
|
|
|
+ print(f"❌ 扫描帖子文件失败: {e}")
|
|
|
+ return
|
|
|
+
|
|
|
+ # 2. 初始化线程安全计数器
|
|
|
+ print(f"\n[2] 初始化并发处理...")
|
|
|
+ print(f" - 最大并发线程数: {MAX_WORKERS}")
|
|
|
+ counter = ThreadSafeCounter()
|
|
|
+
|
|
|
+ # 3. 使用线程池并发处理所有帖子
|
|
|
+ print(f"\n[3] 开始并发处理...")
|
|
|
+ print(f" 注意:多个线程会同时处理不同的帖子,每个帖子都会加载对应的历史帖子")
|
|
|
+ print("=" * 80)
|
|
|
+
|
|
|
+ start_time = datetime.now()
|
|
|
+
|
|
|
+ # 使用ThreadPoolExecutor进行并发处理
|
|
|
+ with ThreadPoolExecutor(max_workers=MAX_WORKERS, thread_name_prefix="Worker") as executor:
|
|
|
+ # 提交所有任务
|
|
|
+ future_to_post = {
|
|
|
+ executor.submit(
|
|
|
+ process_single_post,
|
|
|
+ post_file,
|
|
|
+ posts_dir, # 传入帖子目录,用于加载历史帖子
|
|
|
+ output_dir,
|
|
|
+ counter,
|
|
|
+ len(post_files),
|
|
|
+ i
|
|
|
+ ): (post_file, i)
|
|
|
+ for i, post_file in enumerate(post_files, 1)
|
|
|
+ }
|
|
|
+
|
|
|
+ # 等待所有任务完成
|
|
|
+ for future in as_completed(future_to_post):
|
|
|
+ post_file, index = future_to_post[future]
|
|
|
+ try:
|
|
|
+ result = future.result()
|
|
|
+ if not result:
|
|
|
+ safe_print(f"⚠️ 处理帖子失败: {post_file.name}")
|
|
|
+ except Exception as e:
|
|
|
+ safe_print(f"❌ 处理帖子时发生异常: {post_file.name}")
|
|
|
+ safe_print(f" 错误: {e}")
|
|
|
+ import traceback
|
|
|
+ safe_print(traceback.format_exc())
|
|
|
+ counter.increment_fail()
|
|
|
+
|
|
|
+ end_time = datetime.now()
|
|
|
+ duration = (end_time - start_time).total_seconds()
|
|
|
+
|
|
|
+ # 4. 总结
|
|
|
+ print("\n" + "=" * 80)
|
|
|
+ print("批量处理完成")
|
|
|
+ print("=" * 80)
|
|
|
+ print(f"\n总计: {len(post_files)} 个帖子")
|
|
|
+ print(f"成功: {counter.success} 个")
|
|
|
+ print(f"失败: {counter.fail} 个")
|
|
|
+ print(f"耗时: {duration:.2f} 秒")
|
|
|
+ print(f"平均每个帖子: {duration/len(post_files):.2f} 秒")
|
|
|
+ print(f"\n结果保存目录: {output_dir}")
|
|
|
+ print("=" * 80)
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ main()
|