浏览代码

处理无类别的记录

jihuaqiang 1 月之前
父节点
当前提交
55831e0df7

+ 49 - 26
content_indentify/indentify.py

@@ -44,36 +44,59 @@ class ContentIdentifier:
     
 
     def get_unprocessed_record(self) -> Optional[Dict[str, Any]]:
-        """从数据库获取一条未处理的数据"""
-        sql = """
-        SELECT id, formatted_content 
-        FROM knowledge_search_content 
-        WHERE recognition_status = 0
-        LIMIT 1
+        """从数据库获取一条未处理的数据
+        先从 knowledge_content_query 表中选取 category_id = 0 的所有 query_word,
+        然后用这些 query_word 去 knowledge_search_content 表中匹配,
+        找出 recognition_status = 0 的一条开始处理
         """
-        
         try:
-            result = self.db.get_values(sql)
+            # 第一步:获取 category_id = 0 的所有 query_word
+            query_sql = """
+            SELECT query_word 
+            FROM knowledge_content_query 
+            WHERE category_id = 0
+            """
+            
+            query_result = self.db.get_values(query_sql)
+            if not query_result:
+                self.logger.warning("未找到 category_id = 0 的 query_word")
+                return None
+            
+            query_words = [row[0] for row in query_result]
+            self.logger.info(f"找到 {len(query_words)} 个 category_id = 0 的 query_word")
+            
+            # 第二步:用这些 query_word 去匹配 knowledge_search_content 表
+            # 使用 IN 查询来匹配多个 query_word
+            if len(query_words) > 0:
+                # 构建带引号的查询条件,因为 query_word 是字符串
+                quoted_words = [f"'{word}'" for word in query_words]
+                placeholders = ','.join(quoted_words)
+                
+                content_sql = f"""
+                SELECT id, formatted_content
+                FROM knowledge_search_content 
+                WHERE recognition_status = 0
+                AND query_word IN ({placeholders})
+                LIMIT 1
+                """
+                self.logger.info(f"执行查询: {content_sql}")
+                
+                # 不需要传递参数,因为SQL已经包含了具体的值
+                result = self.db.get_values(content_sql)
+            else:
+                self.logger.warning("没有可用的 query_word 进行匹配")
+                return None
             if result and len(result) > 0:
                 record = result[0]
                 # 检查返回的字段数量
-                if len(record) >= 3:
-                    return {
-                        'id': record[0],
-                        'formatted_content': record[1],
-                        'channel_content_id': record[2]
-                    }
-                elif len(record) == 2:
-                    # 如果没有channel_content_id字段,使用id作为默认值
-                    return {
-                        'id': record[0],
-                        'formatted_content': record[1],
-                        'channel_content_id': record[0]  # 使用id作为默认值
-                    }
-                else:
-                    self.logger.error(f"数据库返回字段数量异常: {len(record)}, 期望至少2个字段")
-                    return None
-            return None
+                return {
+                    'id': record[0],
+                    'formatted_content': record[1]
+                }
+            else:
+                self.logger.info("未找到匹配 query_word 且 recognition_status = 0 的记录")
+                return None
+                
         except Exception as e:
             self.logger.error(f"获取未处理记录失败: {e}")
             return None
@@ -145,7 +168,7 @@ class ContentIdentifier:
                 self.logger.warning("没有找到未处理的记录")
                 return False
             
-            self.logger.info(f"开始处理记录 ID: {record['id']}, 内容ID: {record['channel_content_id']}")
+            self.logger.info(f"开始处理记录 ID: {record['id']}")
             # self.logger.info(f"  多模态识别: {record['multimodal_recognition'][:300]}...")
 
             # 先设置这条记录的 recognition_status = 1

+ 1 - 0
content_indentify/scheduler.pid

@@ -0,0 +1 @@
+82953

+ 189 - 0
query_key/aggregate_queries.py

@@ -0,0 +1,189 @@
+# 聚合query.json中的数据,生成所有可能的组合并存储到数据库
+# 参考set_querys.py的逻辑
+
+import json
+import os
+import sys
+from datetime import datetime
+
+# 添加项目根目录到Python路径
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from utils.mysql_db import MysqlHelper
+from loguru import logger
+
+def load_query_json():
+    """加载query.json文件"""
+    try:
+        query_file_path = os.path.join(os.path.dirname(__file__), "query.json")
+        with open(query_file_path, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+        logger.info("成功加载query.json文件")
+        return data
+    except Exception as e:
+        logger.error(f"加载query.json文件失败: {e}")
+        return None
+
+def generate_combinations(query_data):
+    """生成所有可能的组合"""
+    combinations = []
+    
+    # 获取query.json中的各种类型
+    content_formats = [item["name"] for item in query_data.get("content_format", [])]
+    stages = [item["name"] for item in query_data.get("stage", [])]
+    content_types = [item["name"] for item in query_data.get("content_type", [])]
+    
+    logger.info(f"发现 {len(content_formats)} 种内容格式: {content_formats}")
+    logger.info(f"发现 {len(stages)} 个阶段: {stages}")
+    logger.info(f"发现 {len(content_types)} 种内容类型: {content_types}")
+    
+    # 生成所有可能的组合
+    for content_format in content_formats:
+        for stage in stages:
+            for content_type in content_types:
+                # 生成拼接的字符串
+                query_word = f"{content_format}{stage}{content_type}"
+                
+                # 创建JSON结构
+                combination = {
+                    "content_format": content_format,
+                    "stage": stage,
+                    "content_type": content_type,
+                    "query_word": query_word,
+                    "create_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                }
+                combinations.append(combination)
+    
+    logger.info(f"生成了 {len(combinations)} 个组合")
+    return combinations
+
+def save_to_database(combinations):
+    """将组合结果保存到knowledge_content_query表中"""
+    try:
+        # 先清空表(可选,根据需求决定)
+        # clear_sql = "DELETE FROM knowledge_content_query"
+        # MysqlHelper.update_values(clear_sql)
+        
+        # 插入数据的SQL语句
+        insert_sql = """
+        INSERT INTO knowledge_content_query 
+        (stage, content_type, content_format, query_word, status, create_time) 
+        VALUES (%s, %s, %s, %s, %s, %s)
+        """
+        
+        success_count = 0
+        total_count = len(combinations)
+        
+        logger.info(f"开始插入 {total_count} 条记录到数据库...")
+        
+        for i, combo in enumerate(combinations, 1):
+            params = (
+                combo["stage"],
+                combo["content_type"],
+                combo["content_format"],
+                combo["query_word"],
+                0,  # status设为0
+                combo["create_time"]
+            )
+            
+            result = MysqlHelper.update_values(insert_sql, params)
+            if result is not None:
+                success_count += 1
+                # 每插入100条记录输出一次进度
+                if i % 100 == 0:
+                    logger.info(f"已插入 {i}/{total_count} 条记录")
+            else:
+                logger.error(f"插入失败: {combo}")
+        
+        logger.info(f"成功插入 {success_count}/{total_count} 条记录到数据库")
+        return success_count
+        
+    except Exception as e:
+        logger.error(f"保存到数据库失败: {e}")
+        return 0
+
+def print_combinations(combinations):
+    """打印所有组合结果"""
+    try:
+        print(f"\n=== 生成了 {len(combinations)} 个组合 ===\n")
+        
+        # 打印前10个组合的JSON结构作为示例
+        for i, combo in enumerate(combinations[:10], 1):
+            print(f"{i:3d}. {json.dumps(combo, ensure_ascii=False, indent=2)}")
+        
+        if len(combinations) > 10:
+            print(f"... 还有 {len(combinations) - 10} 个组合")
+        
+        print(f"\n=== 总共 {len(combinations)} 个组合 ===")
+        return True
+        
+    except Exception as e:
+        logger.error(f"打印组合结果失败: {e}")
+        return False
+
+def print_statistics(combinations):
+    """打印统计信息"""
+    try:
+        print(f"\n=== 统计信息 ===")
+        
+        # 按内容格式统计
+        content_format_stats = {}
+        stage_stats = {}
+        content_type_stats = {}
+        
+        for combo in combinations:
+            content_format_stats[combo["content_format"]] = content_format_stats.get(combo["content_format"], 0) + 1
+            stage_stats[combo["stage"]] = stage_stats.get(combo["stage"], 0) + 1
+            content_type_stats[combo["content_type"]] = content_type_stats.get(combo["content_type"], 0) + 1
+        
+        print(f"内容格式分布:")
+        for fmt, count in content_format_stats.items():
+            print(f"  {fmt}: {count} 个组合")
+        
+        print(f"\n阶段分布:")
+        for stage, count in stage_stats.items():
+            print(f"  {stage}: {count} 个组合")
+        
+        print(f"\n内容类型分布:")
+        for ctype, count in content_type_stats.items():
+            print(f"  {ctype}: {count} 个组合")
+        
+        return True
+        
+    except Exception as e:
+        logger.error(f"打印统计信息失败: {e}")
+        return False
+
+def main():
+    """主函数"""
+    logger.info("开始执行query.json聚合")
+    
+    # 1. 加载query.json文件
+    query_data = load_query_json()
+    if not query_data:
+        logger.error("无法加载query.json文件,程序退出")
+        return
+    
+    # 2. 生成组合
+    combinations = generate_combinations(query_data)
+    
+    # 3. 打印组合结果示例
+    if print_combinations(combinations):
+        logger.info("组合生成成功")
+        
+        # 4. 打印统计信息
+        print_statistics(combinations)
+        
+        # 5. 保存到数据库
+        logger.info("开始保存到数据库...")
+        saved_count = save_to_database(combinations)
+        
+        if saved_count > 0:
+            logger.info(f"程序执行成功完成,共保存 {saved_count} 条记录到数据库")
+        else:
+            logger.error("数据库保存失败")
+    else:
+        logger.error("程序执行失败")
+
+if __name__ == "__main__":
+    main() 

+ 177 - 0
query_key/aggregated_queries.json

@@ -0,0 +1,177 @@
+{
+  "metadata": {
+    "total_combinations": 24,
+    "generated_time": "2025-08-15 11:15:59",
+    "description": "由query.json聚合生成的查询组合"
+  },
+  "combinations": [
+    {
+      "content_format": "图文",
+      "stage": "策划",
+      "content_type": "方法",
+      "query_word": "图文策划方法",
+      "create_time": "2025-08-15 11:15:59"
+    },
+    {
+      "content_format": "图文",
+      "stage": "策划",
+      "content_type": "原因",
+      "query_word": "图文策划原因",
+      "create_time": "2025-08-15 11:15:59"
+    },
+    {
+      "content_format": "图文",
+      "stage": "策划",
+      "content_type": "关键点",
+      "query_word": "图文策划关键点",
+      "create_time": "2025-08-15 11:15:59"
+    },
+    {
+      "content_format": "图文",
+      "stage": "创作",
+      "content_type": "方法",
+      "query_word": "图文创作方法",
+      "create_time": "2025-08-15 11:15:59"
+    },
+    {
+      "content_format": "图文",
+      "stage": "创作",
+      "content_type": "原因",
+      "query_word": "图文创作原因",
+      "create_time": "2025-08-15 11:15:59"
+    },
+    {
+      "content_format": "图文",
+      "stage": "创作",
+      "content_type": "关键点",
+      "query_word": "图文创作关键点",
+      "create_time": "2025-08-15 11:15:59"
+    },
+    {
+      "content_format": "图文",
+      "stage": "制作",
+      "content_type": "方法",
+      "query_word": "图文制作方法",
+      "create_time": "2025-08-15 11:15:59"
+    },
+    {
+      "content_format": "图文",
+      "stage": "制作",
+      "content_type": "原因",
+      "query_word": "图文制作原因",
+      "create_time": "2025-08-15 11:15:59"
+    },
+    {
+      "content_format": "图文",
+      "stage": "制作",
+      "content_type": "关键点",
+      "query_word": "图文制作关键点",
+      "create_time": "2025-08-15 11:15:59"
+    },
+    {
+      "content_format": "图文",
+      "stage": "发布",
+      "content_type": "方法",
+      "query_word": "图文发布方法",
+      "create_time": "2025-08-15 11:15:59"
+    },
+    {
+      "content_format": "图文",
+      "stage": "发布",
+      "content_type": "原因",
+      "query_word": "图文发布原因",
+      "create_time": "2025-08-15 11:15:59"
+    },
+    {
+      "content_format": "图文",
+      "stage": "发布",
+      "content_type": "关键点",
+      "query_word": "图文发布关键点",
+      "create_time": "2025-08-15 11:15:59"
+    },
+    {
+      "content_format": "视频",
+      "stage": "策划",
+      "content_type": "方法",
+      "query_word": "视频策划方法",
+      "create_time": "2025-08-15 11:15:59"
+    },
+    {
+      "content_format": "视频",
+      "stage": "策划",
+      "content_type": "原因",
+      "query_word": "视频策划原因",
+      "create_time": "2025-08-15 11:15:59"
+    },
+    {
+      "content_format": "视频",
+      "stage": "策划",
+      "content_type": "关键点",
+      "query_word": "视频策划关键点",
+      "create_time": "2025-08-15 11:15:59"
+    },
+    {
+      "content_format": "视频",
+      "stage": "创作",
+      "content_type": "方法",
+      "query_word": "视频创作方法",
+      "create_time": "2025-08-15 11:15:59"
+    },
+    {
+      "content_format": "视频",
+      "stage": "创作",
+      "content_type": "原因",
+      "query_word": "视频创作原因",
+      "create_time": "2025-08-15 11:15:59"
+    },
+    {
+      "content_format": "视频",
+      "stage": "创作",
+      "content_type": "关键点",
+      "query_word": "视频创作关键点",
+      "create_time": "2025-08-15 11:15:59"
+    },
+    {
+      "content_format": "视频",
+      "stage": "制作",
+      "content_type": "方法",
+      "query_word": "视频制作方法",
+      "create_time": "2025-08-15 11:15:59"
+    },
+    {
+      "content_format": "视频",
+      "stage": "制作",
+      "content_type": "原因",
+      "query_word": "视频制作原因",
+      "create_time": "2025-08-15 11:15:59"
+    },
+    {
+      "content_format": "视频",
+      "stage": "制作",
+      "content_type": "关键点",
+      "query_word": "视频制作关键点",
+      "create_time": "2025-08-15 11:15:59"
+    },
+    {
+      "content_format": "视频",
+      "stage": "发布",
+      "content_type": "方法",
+      "query_word": "视频发布方法",
+      "create_time": "2025-08-15 11:15:59"
+    },
+    {
+      "content_format": "视频",
+      "stage": "发布",
+      "content_type": "原因",
+      "query_word": "视频发布原因",
+      "create_time": "2025-08-15 11:15:59"
+    },
+    {
+      "content_format": "视频",
+      "stage": "发布",
+      "content_type": "关键点",
+      "query_word": "视频发布关键点",
+      "create_time": "2025-08-15 11:15:59"
+    }
+  ]
+}

+ 1 - 1
query_key/set_querys.py

@@ -66,7 +66,7 @@ def generate_combinations(categories, query_data):
                         "content_type": content_type,
                         "content_format": content_format,
                         "query_word": query_word,
-                        "status": 0,
+                        "status": 1,
                         "create_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                     }
                     combinations.append(combination)

+ 5 - 17
structure/multi_thread_scheduler.py

@@ -16,16 +16,12 @@ from structure_processor import StructureProcessor
 from utils.logging_config import get_logger
 
 class MultiThreadScheduler:
-    def __init__(self, thread_count=5, interval_minutes=2, 
-                 query_word=None, source_type=None, source_channel=None):
+    def __init__(self, thread_count=5, interval_minutes=2):
         self.thread_count = thread_count
         self.interval_seconds = interval_minutes * 60
         self.running = True
         self.threads = []
         self.processor = StructureProcessor()
-        self.query_word = query_word
-        self.source_type = source_type
-        self.source_channel = source_channel
         self.pid_file = "structure_scheduler.pid"
         
         # 设置日志
@@ -79,9 +75,7 @@ class MultiThreadScheduler:
                 
                 # 处理一条数据
                 thread_logger.info(f"开始处理数据...")
-                success = self.processor.process_single_record(
-                    self.query_word, self.source_type, self.source_channel
-                )
+                success = self.processor.process_single_record()
                 
                 if success:
                     thread_logger.info("数据处理成功")
@@ -116,7 +110,7 @@ class MultiThreadScheduler:
     def start_all_threads(self):
         """启动所有工作线程"""
         self.logger.info(f"启动 {self.thread_count} 个工作线程...")
-        self.logger.info(f"查询条件: query_word={self.query_word}, source_type={self.source_type}, source_channel={self.source_channel}")
+        self.logger.info("查询条件: multimodal_recognition is not null AND structured_data is null AND query_word IN (category_id = 0 的 query_word)")
         
         for i in range(self.thread_count):
             thread = threading.Thread(
@@ -174,9 +168,6 @@ def main():
     import argparse
     
     parser = argparse.ArgumentParser(description='多线程结构化处理调度器')
-    parser.add_argument('--query_word', default=None, help='query词')
-    parser.add_argument('--source_type', default=None, help='数据源类型')
-    parser.add_argument('--source_channel', default=None, help='数据源渠道')
     parser.add_argument('--thread_count', type=int, default=5, help='线程数量')
     parser.add_argument('--interval_minutes', type=int, default=2, help='处理间隔(分钟)')
     
@@ -187,17 +178,14 @@ def main():
     print("=" * 60)
     print(f"线程数量: {args.thread_count}")
     print(f"处理间隔: {args.interval_minutes}分钟")
-    print(f"查询条件: query_word={args.query_word}, source_type={args.source_type}, source_channel={args.source_channel}")
+    print("查询条件: multimodal_recognition is not null AND structured_data is null AND query_word IN (category_id = 0 的 query_word)")
     print(f"启动时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
     print("=" * 60)
     
     # 创建并运行调度器
     scheduler = MultiThreadScheduler(
         thread_count=args.thread_count, 
-        interval_minutes=args.interval_minutes,
-        query_word=args.query_word,
-        source_type=args.source_type,
-        source_channel=args.source_channel
+        interval_minutes=args.interval_minutes
     )
     scheduler.run()
 

+ 60 - 55
structure/structure_processor.py

@@ -40,61 +40,79 @@ class StructureProcessor:
         self.stop_event = threading.Event()
         self.threads = []
     
-    def build_query_conditions(self, query_word: Optional[str], 
-                             source_type: Optional[str], 
-                             source_channel: Optional[str]) -> Tuple[str, Tuple]:
-        """构建查询条件和参数"""
-        conditions = ["multimodal_recognition is not null", "structured_data is null"]
-        params = []
-        
-        if query_word is not None:
-            conditions.append("query_word = %s")
-            params.append(query_word)
-        if source_type is not None:
-            conditions.append("source_type = %s")
-            params.append(source_type)
-        if source_channel is not None:
-            conditions.append("source_channel = %s")
-            params.append(source_channel)
+    def get_query_words(self) -> List[str]:
+        """从 knowledge_content_query 表中获取 category_id = 0 的所有 query_word"""
+        try:
+            sql = """
+            SELECT query_word 
+            FROM knowledge_content_query 
+            WHERE category_id = 0
+            """
             
-        where_clause = " AND ".join(conditions)
-        return where_clause, tuple(params)
+            result = MysqlHelper.get_values(sql)
+            if result:
+                query_words = [row[0] for row in result]
+                self.logger.info(f"找到 {len(query_words)} 个 category_id = 0 的 query_word")
+                return query_words
+            else:
+                self.logger.warning("未找到 category_id = 0 的 query_word")
+                return []
+                
+        except Exception as e:
+            self.logger.error(f"获取 query_word 失败: {e}")
+            return []
     
-    def process_single_record(self, query_word: Optional[str], 
-                            source_type: Optional[str], 
-                            source_channel: Optional[str]) -> bool:
+    def process_single_record(self) -> bool:
         """处理单条记录"""
         try:
             with self.lock:
-                # 构建查询条件和参数
-                where_clause, params = self.build_query_conditions(query_word, source_type, source_channel)
+                # 第一步:获取 category_id = 0 的所有 query_word
+                query_words = self.get_query_words()
+                if not query_words:
+                    self.logger.warning("没有可用的 query_word")
+                    return False
+                
+                # 第二步:用这些 query_word 去匹配 knowledge_search_content 表
+                # 构建带引号的查询条件
+                quoted_words = [f"'{word}'" for word in query_words]
+                placeholders = ','.join(quoted_words)
                 
-                # 先查询一条需要处理的记录
+                # 使用 FOR UPDATE 锁定记录,确保原子性操作
+                # 明确排除正在处理中和已处理的记录
                 select_sql = f"""
                     SELECT id, multimodal_recognition 
                     FROM knowledge_search_content 
-                    WHERE {where_clause}
+                    WHERE multimodal_recognition IS NOT NULL  
+                        AND structured_data IS NULL
+                        AND query_word IN ({placeholders})
                     LIMIT 1
                 """
                 
-                records = MysqlHelper.get_values(select_sql, params)
+                self.logger.info(f"执行查询: {select_sql}")
+                
+                records = MysqlHelper.get_values(select_sql)
                 if not records:
                     self.logger.warning("没有找到需要处理的记录")
                     return False
                 
                 row = records[0]
+                self.logger.info(f"row: {row}")
                 record_id = row[0]
+                self.logger.info(f"record_id: {record_id}")
                 
-                # 标记为处理中,防止其他线程取到重复处理
+                # 立即标记为处理中,防止其他线程取到重复处理
                 mark_sql = """
                     UPDATE knowledge_search_content 
-                    SET structured_data = '{}' 
+                    SET structured_data = 'PROCESSING' 
                     WHERE id = %s
                 """
                 
-                MysqlHelper.update_values(mark_sql, (record_id,))
+                mark_result = MysqlHelper.update_values(mark_sql, (record_id,))
+                if mark_result is None:
+                    self.logger.error(f"标记记录 {record_id} 为处理中失败")
+                    return False
                 
-                self.logger.info(f"开始处理记录 ID: {record_id}")
+                self.logger.info(f"记录 {record_id} 已标记为处理中")
                 
                 # 处理内容
                 result = self.processor.process(row[1], self.system_prompt)
@@ -108,7 +126,11 @@ class StructureProcessor:
                     WHERE id = %s
                 """
                 
-                MysqlHelper.update_values(update_sql, (result, record_id))
+                update_result = MysqlHelper.update_values(update_sql, (result, record_id))
+                if update_result is None:
+                    self.logger.error(f"更新记录 {record_id} 失败")
+                    return False
+                
                 self.logger.info(f"记录 {record_id} 处理完成并更新数据库")
                 return True
                 
@@ -116,8 +138,7 @@ class StructureProcessor:
             self.logger.error(f"处理记录失败: {str(e)}", exc_info=True)
             return False
     
-    def worker_thread(self, thread_id: int, query_word: Optional[str], 
-                     source_type: Optional[str], source_channel: Optional[str]):
+    def worker_thread(self, thread_id: int):
         """工作线程函数"""
         thread_logger = get_logger(f'WorkerThread-{thread_id}')
         thread_logger.info(f"线程 {thread_id} 启动")
@@ -125,7 +146,7 @@ class StructureProcessor:
         while not self.stop_event.is_set():
             try:
                 # 尝试处理一条记录
-                success = self.process_single_record(query_word, source_type, source_channel)
+                success = self.process_single_record()
                 
                 if not success:
                     thread_logger.info(f"没有找到需要处理的记录,等待5秒后重试")
@@ -148,20 +169,18 @@ class StructureProcessor:
         
         thread_logger.info(f"线程 {thread_id} 已停止")
     
-    def start_multi_thread_processing(self, query_word: Optional[str], 
-                                    source_type: Optional[str], 
-                                    source_channel: Optional[str]):
+    def start_multi_thread_processing(self):
         """启动多线程处理"""
         self.threads = []
         
         self.logger.info("启动多线程处理...")
-        self.logger.info(f"查询条件: query_word={query_word}, source_type={source_type}, source_channel={source_channel}")
+        self.logger.info("查询条件: multimodal_recognition is not null AND structured_data is null AND query_word IN (category_id = 0 的 query_word)")
         
         # 创建5个线程,间隔5秒启动
         for i in range(5):
             thread = threading.Thread(
                 target=self.worker_thread,
-                args=(i + 1, query_word, source_type, source_channel)
+                args=(i + 1,)
             )
             self.threads.append(thread)
             
@@ -204,23 +223,9 @@ class StructureProcessor:
 
 def main():
     """主函数"""
-    import argparse
-    
-    parser = argparse.ArgumentParser(description='内容结构化处理脚本')
-    parser.add_argument('--query_word', default=None, help='query词')
-    parser.add_argument('--source_type', default=None, help='数据源类型')
-    parser.add_argument('--source_channel', default=None, help='数据源渠道')
-    
-    args = parser.parse_args()
-    
     try:
         processor = StructureProcessor()
-        
-        processor.start_multi_thread_processing(
-            query_word=args.query_word, 
-            source_type=args.source_type, 
-            source_channel=args.source_channel
-        )
+        processor.start_multi_thread_processing()
     except Exception as e:
         print(f"程序执行失败: {str(e)}")
         sys.exit(1)
@@ -229,4 +234,4 @@ def main():
 if __name__ == "__main__":
     # 测试单条记录处理
     processor = StructureProcessor()
-    processor.process_single_record(query_word=None, source_type=None, source_channel=None) 
+    processor.process_single_record() 

+ 1 - 0
structure/structure_scheduler.pid

@@ -0,0 +1 @@
+96498