|
@@ -17,64 +17,65 @@ class Handler:
|
|
|
def __init__(self):
|
|
|
|
|
|
# 初始化飞书客户端
|
|
|
- self.mysql = MysqlHelper()
|
|
|
self.processor = GeminiProcessor()
|
|
|
self.system_prompt = File.read_file('prompt/handle.md')
|
|
|
|
|
|
# print(self.system_prompt)
|
|
|
|
|
|
|
|
|
- def process_single_record(self, record) -> bool:
|
|
|
- """处理单条记录"""
|
|
|
- try:
|
|
|
- # 提取内容
|
|
|
- content = self.extract_content_from_record(record)
|
|
|
-
|
|
|
- # 检查是否有输入内容
|
|
|
- if not content.strip() :
|
|
|
- print(f"记录 {record.record_id} 没有输入内容,跳过")
|
|
|
- return True
|
|
|
-
|
|
|
- result = self.processor.process(content, self.system_prompt)
|
|
|
-
|
|
|
-
|
|
|
- # 更新飞书表格
|
|
|
- self.update_feishu_record(record.record_id, result)
|
|
|
-
|
|
|
-
|
|
|
- # 添加延迟避免API限制
|
|
|
- time.sleep(1)
|
|
|
-
|
|
|
- return True
|
|
|
-
|
|
|
- except Exception as e:
|
|
|
- print(f"处理记录 {record.record_id} 失败: {e}")
|
|
|
- return False
|
|
|
-
|
|
|
- def process_all_records(self):
|
|
|
+ def process_all_records(self, query_word, source_type, source_channel):
|
|
|
"""处理所有记录"""
|
|
|
- print(f"开始处理飞书表格 {self.table_id} 中的所有记录")
|
|
|
|
|
|
- page_token = None
|
|
|
total_processed = 0
|
|
|
total_success = 0
|
|
|
|
|
|
while True:
|
|
|
try:
|
|
|
- # 获取记录
|
|
|
- result = self.feishu.get_all_records(self.table_id, page_token)
|
|
|
-
|
|
|
- if not result.items:
|
|
|
- print("没有找到记录")
|
|
|
- break
|
|
|
+ # 查库 获取记录
|
|
|
+ sql = """
|
|
|
+ select id, formatted_content from knowledge_search_content
|
|
|
+ where formatted_content is not null and multimodal_recognition is null
|
|
|
+ """
|
|
|
|
|
|
- print(f"获取到 {len(result.items)} 条记录")
|
|
|
+ # 添加条件(当参数有值时)
|
|
|
+ conditions = []
|
|
|
+ if query_word is not None:
|
|
|
+ conditions.append(f"query_word='{query_word}'")
|
|
|
+ if source_type is not None:
|
|
|
+ conditions.append(f"source_type='{source_type}'")
|
|
|
+ if source_channel is not None:
|
|
|
+ conditions.append(f"source_channel='{source_channel}'")
|
|
|
+
|
|
|
+ # 如果有条件,添加到SQL中
|
|
|
+ if conditions:
|
|
|
+ sql += " and " + " and ".join(conditions)
|
|
|
+
|
|
|
+ records = MysqlHelper.get_values(sql)
|
|
|
+ print(f"获取到 {len(result)} 条记录")
|
|
|
|
|
|
+
|
|
|
# 处理每条记录
|
|
|
- for record in result.items:
|
|
|
+ for row in records:
|
|
|
total_processed += 1
|
|
|
- if self.process_single_record(record):
|
|
|
+ """处理单条记录"""
|
|
|
+ try:
|
|
|
+
|
|
|
+ result = self.processor.process(row[1], self.system_prompt)
|
|
|
+
|
|
|
+
|
|
|
+ # 更新数据库
|
|
|
+ update_sql = """
|
|
|
+ update knowledge_search_content set multimodal_recognition = %s where id = %s
|
|
|
+ """
|
|
|
+ MysqlHelper.update_values(update_sql, (result, row[0]))
|
|
|
+
|
|
|
+
|
|
|
+ # 添加延迟避免API限制
|
|
|
+ time.sleep(1)
|
|
|
total_success += 1
|
|
|
+ except Exception as e:
|
|
|
+ print(f"处理记录 {record.record_id} 失败: {e}")
|
|
|
+
|
|
|
|
|
|
# 检查是否有下一页
|
|
|
if not result.has_more:
|
|
@@ -94,17 +95,21 @@ def main():
|
|
|
"""主函数"""
|
|
|
# 创建命令行参数解析器
|
|
|
parser = argparse.ArgumentParser(description='内容识别脚本')
|
|
|
- parser.add_argument('--query_word', nargs='?', help='query词')
|
|
|
- parser.add_argument('--source_type', nargs='?', help='数据源类型')
|
|
|
- parser.add_argument('--source_channel', nargs='?', help='数据源渠道')
|
|
|
+ parser.add_argument('--query_word', default=None, help='query词')
|
|
|
+ parser.add_argument('--source_type', default=None, help='数据源类型')
|
|
|
+ parser.add_argument('--source_channel', default=None, help='数据源渠道')
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
try:
|
|
|
# 创建内容识别器实例
|
|
|
- hadnler = Handler()
|
|
|
+ handler = Handler()
|
|
|
|
|
|
- hadnler.process_all_records(query_word=args.query_word, source_type=source_type, source_channel=source_channel)
|
|
|
+ handler.process_all_records(
|
|
|
+ query_word=args.query_word,
|
|
|
+ source_type=args.source_type,
|
|
|
+ source_channel=args.source_channel
|
|
|
+ )
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"程序执行失败: {e}")
|