丁云鹏 преди 1 седмица
родител
ревизия
bbc495cc1e
променени са 2 файла, в които са добавени 52 реда и са изтрити 47 реда
  1. 50 45
      3_handle.py
  2. 2 2
      utils/mysql_db.py

+ 50 - 45
3_handle.py

@@ -17,64 +17,65 @@ class Handler:
     def __init__(self):
         
         # 初始化飞书客户端
-        self.mysql = MysqlHelper()
         self.processor = GeminiProcessor()
         self.system_prompt = File.read_file('prompt/handle.md')
 
         # print(self.system_prompt)
         
     
-    def process_single_record(self, record) -> bool:
-        """处理单条记录"""
-        try:
-            # 提取内容
-            content = self.extract_content_from_record(record)
-
-            # 检查是否有输入内容
-            if not content.strip() :
-                print(f"记录 {record.record_id} 没有输入内容,跳过")
-                return True
-
-            result = self.processor.process(content, self.system_prompt)
-
-            
-            # 更新飞书表格
-            self.update_feishu_record(record.record_id, result)
-
-            
-            # 添加延迟避免API限制
-            time.sleep(1)
-            
-            return True
-            
-        except Exception as e:
-            print(f"处理记录 {record.record_id} 失败: {e}")
-            return False
-    
-    def process_all_records(self):
+    def process_all_records(self, query_word, source_type, source_channel):
         """处理所有记录"""
-        print(f"开始处理飞书表格 {self.table_id} 中的所有记录")
         
-        page_token = None
         total_processed = 0
         total_success = 0
         
         while True:
             try:
-                # 获取记录
-                result = self.feishu.get_all_records(self.table_id, page_token)
-                
-                if not result.items:
-                    print("没有找到记录")
-                    break
+                # 查库 获取记录
+                sql = """
+                select id, formatted_content from knowledge_search_content 
+                where formatted_content is not null and multimodal_recognition is null
+                """
                 
-                print(f"获取到 {len(result.items)} 条记录")
+                # 添加条件(当参数有值时)
+                conditions = []
+                if query_word is not None:
+                    conditions.append(f"query_word='{query_word}'")
+                if source_type is not None:
+                    conditions.append(f"source_type='{source_type}'")
+                if source_channel is not None:
+                    conditions.append(f"source_channel='{source_channel}'")
+
+                # 如果有条件,添加到SQL中
+                if conditions:
+                    sql += " and " + " and ".join(conditions)
+    
+                records = MysqlHelper.get_values(sql)    
+                print(f"获取到 {len(result)} 条记录")
                 
+
                 # 处理每条记录
-                for record in result.items:
+                for row in records:
                     total_processed += 1
-                    if self.process_single_record(record):
+                    """处理单条记录"""
+                    try:
+
+                        result = self.processor.process(row[1], self.system_prompt)
+
+                        
+                        # 更新数据库
+                        update_sql = """
+                        update knowledge_search_content set multimodal_recognition = %s where id = %s
+                        """
+                        MysqlHelper.update_values(update_sql, (result, row[0]))
+
+                        
+                        # 添加延迟避免API限制
+                        time.sleep(1)
                         total_success += 1
+                    except Exception as e:
+                        print(f"处理记录 {record.record_id} 失败: {e}")
+                        
                 
                 # 检查是否有下一页
                 if not result.has_more:
@@ -94,17 +95,21 @@ def main():
     """主函数"""
     # 创建命令行参数解析器
     parser = argparse.ArgumentParser(description='内容识别脚本')
-    parser.add_argument('--query_word', nargs='?', help='query词')
-    parser.add_argument('--source_type', nargs='?', help='数据源类型')
-    parser.add_argument('--source_channel', nargs='?', help='数据源渠道')
+    parser.add_argument('--query_word', default=None, help='query词')
+    parser.add_argument('--source_type', default=None, help='数据源类型')
+    parser.add_argument('--source_channel', default=None, help='数据源渠道')
     
     args = parser.parse_args()
     
     try:
         # 创建内容识别器实例
-        hadnler = Handler()
+        handler = Handler()
         
-        hadnler.process_all_records(query_word=args.query_word, source_type=source_type, source_channel=source_channel)
+        handler.process_all_records(
+            query_word=args.query_word, 
+            source_type=args.source_type, 
+            source_channel=args.source_channel
+        )
                 
     except Exception as e:
         print(f"程序执行失败: {e}")

+ 2 - 2
utils/mysql_db.py

@@ -45,14 +45,14 @@ class MysqlHelper:
             print(f"get_values异常:{e}\n")
 
     @classmethod
-    def update_values(cls, sql):
+    def update_values(cls, sql, params):
         # 连接数据库
         connect = cls.connect_mysql()
         # 返回一个 Cursor对象
         mysql = connect.cursor()
         try:
             # 执行 sql 语句
-            res = mysql.execute(sql)
+            res = mysql.execute(sql, params)
             # 注意 一定要commit,否则添加数据不生效
             connect.commit()
             return res