|
@@ -249,144 +249,12 @@ class ContentIdentifier:
|
|
|
except Exception as e:
|
|
|
self.logger.error(f"处理记录失败: {e}")
|
|
|
return False
|
|
|
-
|
|
|
- def process_all_records(self, max_records: int = 10):
|
|
|
- """处理多条记录"""
|
|
|
- self.logger.info(f"开始批量处理,最多处理 {max_records} 条记录")
|
|
|
-
|
|
|
- processed_count = 0
|
|
|
- success_count = 0
|
|
|
-
|
|
|
- for i in range(max_records):
|
|
|
- self.logger.info(f"\n--- 处理第 {i+1}/{max_records} 条记录 ---")
|
|
|
-
|
|
|
- if self.process_single_record():
|
|
|
- success_count += 1
|
|
|
- else:
|
|
|
- self.logger.warning("没有更多记录需要处理,结束批量处理")
|
|
|
- break
|
|
|
-
|
|
|
- processed_count += 1
|
|
|
-
|
|
|
- # 添加延迟避免API限制
|
|
|
- time.sleep(2)
|
|
|
-
|
|
|
- self.logger.info(f"\n批量处理完成!总共处理 {processed_count} 条记录,成功 {success_count} 条")
|
|
|
-
|
|
|
- def process_continuous(self, max_records: int = None, delay_seconds: int = 2):
|
|
|
- """连续处理记录,直到没有更多记录或达到最大数量限制"""
|
|
|
- self.logger.info("启动连续处理模式...")
|
|
|
- self.logger.info("系统将自动处理数据库中的记录,一条完成后自动处理下一条")
|
|
|
- self.logger.info(f"处理间隔: {delay_seconds} 秒")
|
|
|
- if max_records:
|
|
|
- self.logger.info(f"最大处理数量: {max_records} 条")
|
|
|
- else:
|
|
|
- self.logger.info("无数量限制,将处理所有可用记录")
|
|
|
- self.logger.info("按 Ctrl+C 可以随时停止处理")
|
|
|
- self.logger.info("-" * 60)
|
|
|
-
|
|
|
- processed_count = 0
|
|
|
- success_count = 0
|
|
|
- consecutive_failures = 0
|
|
|
- max_consecutive_failures = 3 # 连续失败3次后停止
|
|
|
-
|
|
|
- try:
|
|
|
- while True:
|
|
|
- # 检查是否达到最大数量限制
|
|
|
- if max_records and processed_count >= max_records:
|
|
|
- self.logger.info(f"\n已达到最大处理数量限制 ({max_records} 条),停止处理")
|
|
|
- break
|
|
|
-
|
|
|
- self.logger.info(f"\n--- 处理第 {processed_count + 1} 条记录 ---")
|
|
|
- self.logger.info(f"时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
|
-
|
|
|
- # 处理单条记录
|
|
|
- if self.process_single_record():
|
|
|
- success_count += 1
|
|
|
- consecutive_failures = 0 # 重置连续失败计数
|
|
|
- self.logger.info(f"✅ 记录处理成功 (成功: {success_count}, 失败: {processed_count - success_count + 1})")
|
|
|
- else:
|
|
|
- consecutive_failures += 1
|
|
|
- self.logger.warning(f"❌ 记录处理失败 (成功: {success_count}, 失败: {processed_count - success_count + 1})")
|
|
|
-
|
|
|
- # 检查连续失败次数
|
|
|
- if consecutive_failures >= max_consecutive_failures:
|
|
|
- self.logger.warning(f"\n⚠️ 连续失败 {max_consecutive_failures} 次,可能没有更多记录需要处理")
|
|
|
- self.logger.info("停止连续处理")
|
|
|
- break
|
|
|
-
|
|
|
- processed_count += 1
|
|
|
-
|
|
|
- # 检查是否还有更多记录
|
|
|
- remaining_records = self.get_remaining_records_count()
|
|
|
- if remaining_records == 0:
|
|
|
- self.logger.info(f"\n🎉 所有记录已处理完成!总共处理 {processed_count} 条记录")
|
|
|
- break
|
|
|
-
|
|
|
- self.logger.info(f"剩余待处理记录: {remaining_records} 条")
|
|
|
-
|
|
|
- # 添加延迟避免API限制
|
|
|
- if delay_seconds > 0:
|
|
|
- self.logger.info(f"等待 {delay_seconds} 秒后处理下一条记录...")
|
|
|
- time.sleep(delay_seconds)
|
|
|
-
|
|
|
- except KeyboardInterrupt:
|
|
|
- self.logger.info(f"\n\n⏹️ 用户中断处理")
|
|
|
- self.logger.info(f"已处理 {processed_count} 条记录,成功 {success_count} 条")
|
|
|
- except Exception as e:
|
|
|
- self.logger.error(f"\n\n💥 处理过程中发生错误: {e}")
|
|
|
- self.logger.info(f"已处理 {processed_count} 条记录,成功 {success_count} 条")
|
|
|
-
|
|
|
- self.logger.info(f"\n📊 连续处理完成!")
|
|
|
- self.logger.info(f"总处理数量: {processed_count}")
|
|
|
- self.logger.info(f"成功数量: {success_count}")
|
|
|
- self.logger.info(f"失败数量: {processed_count - success_count}")
|
|
|
- if processed_count > 0:
|
|
|
- success_rate = (success_count / processed_count) * 100
|
|
|
- self.logger.info(f"成功率: {success_rate:.1f}%")
|
|
|
-
|
|
|
- def get_remaining_records_count(self) -> int:
|
|
|
- """获取剩余待处理记录数量"""
|
|
|
- try:
|
|
|
- sql = "SELECT COUNT(*) FROM knowledge_search_content WHERE recognition_status = 0"
|
|
|
- result = self.db.get_values(sql)
|
|
|
- if result and len(result) > 0:
|
|
|
- return result[0][0]
|
|
|
- return 0
|
|
|
- except Exception as e:
|
|
|
- self.logger.error(f"获取剩余记录数量失败: {e}")
|
|
|
- return 0
|
|
|
|
|
|
|
|
|
def main():
|
|
|
"""主函数"""
|
|
|
- parser = argparse.ArgumentParser(description='内容识别脚本 - 分析图片和视频内容')
|
|
|
- parser.add_argument('--single', action='store_true', help='只处理一条记录')
|
|
|
- parser.add_argument('--batch', type=int, default=10, help='批量处理记录数量,默认10条')
|
|
|
- parser.add_argument('--continuous', action='store_true', help='连续处理模式,自动处理所有可用记录')
|
|
|
- parser.add_argument('--max-records', type=int, help='连续处理模式下的最大处理数量限制')
|
|
|
- parser.add_argument('--delay', type=int, default=2, help='处理间隔时间(秒),默认2秒')
|
|
|
-
|
|
|
- args = parser.parse_args()
|
|
|
-
|
|
|
- try:
|
|
|
- # 创建ContentIdentifier实例
|
|
|
- identifier = ContentIdentifier()
|
|
|
-
|
|
|
- if args.single:
|
|
|
- # 处理单条记录
|
|
|
- identifier.process_single_record()
|
|
|
- elif args.continuous:
|
|
|
- # 连续处理模式
|
|
|
- identifier.process_continuous(args.max_records, args.delay)
|
|
|
- else:
|
|
|
- # 批量处理记录
|
|
|
- identifier.process_all_records(args.batch)
|
|
|
-
|
|
|
- except Exception as e:
|
|
|
- sys.stderr.write(f"程序执行失败: {e}\n")
|
|
|
- sys.exit(1)
|
|
|
-
|
|
|
+ identifier = ContentIdentifier()
|
|
|
+ identifier.process_single_record()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
main()
|