set_querys.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. # 读取数据库中的knowledge_content_category表,读出level为1的name和category_id,并将name和query.json中的content_type,step_type,idea_type中的每一项进行组合,输出组合出的结果,并写入到query.json中
  2. import json
  3. import os
  4. import sys
  5. from datetime import datetime
  6. # 添加项目根目录到Python路径
  7. sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  8. from utils.mysql_db import MysqlHelper
  9. from loguru import logger
  10. def get_level1_categories():
  11. """从数据库获取level为1的category名称和ID"""
  12. try:
  13. sql = "SELECT id, name FROM knowledge_content_category WHERE level = 1"
  14. result = MysqlHelper.get_values(sql)
  15. if result:
  16. # 提取id和name字段的值
  17. categories = [(row[0], row[1]) for row in result]
  18. logger.info(f"成功获取到 {len(categories)} 个level为1的category")
  19. return categories
  20. else:
  21. logger.warning("未找到level为1的category数据")
  22. return []
  23. except Exception as e:
  24. logger.error(f"获取数据库category数据失败: {e}")
  25. return []
  26. def load_query_json():
  27. """加载query.json文件"""
  28. try:
  29. query_file_path = os.path.join(os.path.dirname(__file__), "query.json")
  30. with open(query_file_path, 'r', encoding='utf-8') as f:
  31. data = json.load(f)
  32. logger.info("成功加载query.json文件")
  33. return data
  34. except Exception as e:
  35. logger.error(f"加载query.json文件失败: {e}")
  36. return None
  37. def generate_combinations(categories, query_data):
  38. """生成category与query类型的组合"""
  39. combinations = []
  40. # 获取query.json中的各种类型
  41. content_formats = [item["name"] for item in query_data.get("content_format", [])]
  42. stages = [item["name"] for item in query_data.get("stage", [])]
  43. content_types = [item["name"] for item in query_data.get("content_type", [])]
  44. # 生成所有可能的组合
  45. for category_id, category_name in categories:
  46. for content_format in content_formats:
  47. for stage in stages:
  48. for content_type in content_types:
  49. # 生成拼接的字符串
  50. query_word = f"{category_name}{content_format}{stage}{content_type}"
  51. # 创建JSON结构
  52. combination = {
  53. "category_id": category_id,
  54. "stage": stage,
  55. "content_type": content_type,
  56. "content_format": content_format,
  57. "query_word": query_word,
  58. "status": 0,
  59. "create_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
  60. }
  61. combinations.append(combination)
  62. logger.info(f"生成了 {len(combinations)} 个组合")
  63. return combinations
  64. def save_to_database(combinations):
  65. """将组合结果保存到knowledge_content_query表中"""
  66. try:
  67. # 先清空表(可选,根据需求决定)
  68. # clear_sql = "DELETE FROM knowledge_content_query"
  69. # MysqlHelper.update_values(clear_sql)
  70. # 插入数据的SQL语句
  71. insert_sql = """
  72. INSERT INTO knowledge_content_query
  73. (category_id, stage, content_type, content_format, query_word, status, create_time)
  74. VALUES (%s, %s, %s, %s, %s, %s, %s)
  75. """
  76. success_count = 0
  77. total_count = len(combinations)
  78. logger.info(f"开始插入 {total_count} 条记录到数据库...")
  79. for i, combo in enumerate(combinations, 1):
  80. params = (
  81. combo["category_id"],
  82. combo["stage"],
  83. combo["content_type"],
  84. combo["content_format"],
  85. combo["query_word"],
  86. combo["status"],
  87. combo["create_time"]
  88. )
  89. result = MysqlHelper.update_values(insert_sql, params)
  90. if result is not None:
  91. success_count += 1
  92. # 每插入100条记录输出一次进度
  93. if i % 100 == 0:
  94. logger.info(f"已插入 {i}/{total_count} 条记录")
  95. else:
  96. logger.error(f"插入失败: {combo}")
  97. logger.info(f"成功插入 {success_count}/{total_count} 条记录到数据库")
  98. return success_count
  99. except Exception as e:
  100. logger.error(f"保存到数据库失败: {e}")
  101. return 0
  102. def print_combinations(combinations):
  103. """打印所有组合结果"""
  104. try:
  105. print(f"\n=== 生成了 {len(combinations)} 个组合 ===\n")
  106. # 打印前5个组合的JSON结构作为示例
  107. for i, combo in enumerate(combinations[:5], 1):
  108. print(f"{i:3d}. {json.dumps(combo, ensure_ascii=False, indent=2)}")
  109. if len(combinations) > 5:
  110. print(f"... 还有 {len(combinations) - 5} 个组合")
  111. print(f"\n=== 总共 {len(combinations)} 个组合 ===")
  112. return True
  113. except Exception as e:
  114. logger.error(f"打印组合结果失败: {e}")
  115. return False
  116. def main():
  117. """主函数"""
  118. logger.info("开始执行set_querys.py")
  119. # 1. 从数据库获取level为1的category
  120. categories = get_level1_categories()
  121. if not categories:
  122. logger.error("无法获取category数据,程序退出")
  123. return
  124. # 2. 加载query.json文件
  125. query_data = load_query_json()
  126. if not query_data:
  127. logger.error("无法加载query.json文件,程序退出")
  128. return
  129. # 3. 生成组合
  130. combinations = generate_combinations(categories, query_data)
  131. # 4. 打印组合结果示例
  132. if print_combinations(combinations):
  133. logger.info("组合生成成功")
  134. # 5. 保存到数据库
  135. logger.info("开始保存到数据库...")
  136. saved_count = save_to_database(combinations)
  137. if saved_count > 0:
  138. logger.info(f"程序执行成功完成,共保存 {saved_count} 条记录到数据库")
  139. else:
  140. logger.error("数据库保存失败")
  141. else:
  142. logger.error("程序执行失败")
  143. if __name__ == "__main__":
  144. main()