tools_v1.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. import json
  2. from core.database import DBHelper
  3. from data_models.content_data import ContentData
  4. from data_models.keyword_clustering import KeywordClustering
  5. from data_models.keyword_data import KeywordData
  6. from data_models.keyword_with_content import KeywordWithContent
  7. def query_keyword_data(keywords, db_helper):
  8. """获取关键词数据,避免重复查询"""
  9. if not keywords:
  10. return {}
  11. # 一次性查询所有关键词数据
  12. keyword_datas = db_helper.get_all(KeywordData, keyword__in=keywords)
  13. return {data.keyword: data for data in keyword_datas}
  14. def query_keyword_summary_results(keywords):
  15. """通过关键词搜索,获取与问题相关的内容总结
  16. Args:
  17. keywords: 关键词列表["关键词1","关键词2",...]
  18. Returns:
  19. list: 搜索结果,包含关键词和对应的总结
  20. """
  21. if not keywords:
  22. return []
  23. res = []
  24. db_helper = DBHelper()
  25. try:
  26. # 一次性获取所有关键词数据
  27. keyword_dict = query_keyword_data(keywords, db_helper)
  28. # 获取所有关键词ID
  29. keyword_ids = [data.id for data in keyword_dict.values()]
  30. if not keyword_ids:
  31. return res
  32. # 一次性查询所有关键词聚类数据
  33. clustering_data = db_helper.get_all(
  34. KeywordClustering,
  35. keyword_id__in=keyword_ids
  36. )
  37. # 构建关键字ID到聚类数据的映射
  38. clustering_map = {data.keyword_id: data for data in clustering_data}
  39. # 构建结果
  40. for keyword in keywords:
  41. if keyword in keyword_dict:
  42. keyword_id = keyword_dict[keyword].id
  43. if keyword_id in clustering_map:
  44. res.append({
  45. 'keyword': keyword,
  46. 'keyword_summary': clustering_map[keyword_id].keyword_summary
  47. })
  48. except Exception as e:
  49. # 记录日志或处理异常
  50. print(f"查询关键词总结时出错: {str(e)}")
  51. return res
  52. def query_keyword_content_results(keywords):
  53. """通过关键词搜索,获取与问题相关的内容
  54. Args:
  55. keywords: 关键词列表["关键词1","关键词2",...]
  56. Returns:
  57. list: 搜索结果,包含关键词、内容和内容总结
  58. """
  59. if not keywords:
  60. return []
  61. res = []
  62. db_helper = DBHelper()
  63. try:
  64. # 一次性获取所有关键词数据
  65. keyword_dict = query_keyword_data(keywords, db_helper)
  66. # 获取所有关键词ID
  67. keyword_ids = [data.id for data in keyword_dict.values()]
  68. if not keyword_ids:
  69. return res
  70. # 一次性查询所有关键词与内容的关联
  71. keyword_content_relations = db_helper.get_all(
  72. KeywordWithContent,
  73. keyword_id__in=keyword_ids
  74. )
  75. # 获取所有内容ID
  76. content_ids = [relation.content_id for relation in keyword_content_relations]
  77. if not content_ids:
  78. return res
  79. # 一次性查询所有内容数据
  80. content_data_list = db_helper.get_all(
  81. ContentData,
  82. id__in=content_ids
  83. )
  84. # 构建内容ID到内容数据的映射
  85. content_map = {data.id: data for data in content_data_list}
  86. # 构建关键字ID到关键词的映射
  87. keyword_id_to_word = {data.id: data.keyword for data in keyword_dict.values()}
  88. # 构建结果
  89. for relation in keyword_content_relations:
  90. if relation.content_id in content_map:
  91. content_data = content_map[relation.content_id]
  92. res.append({
  93. 'keyword': keyword_id_to_word.get(relation.keyword_id, '未知关键词'),
  94. 'content': content_data.content,
  95. 'content_summary': content_data.summary
  96. })
  97. except Exception as e:
  98. # 记录日志或处理异常
  99. print(f"查询关键词内容时出错: {str(e)}")
  100. return res
  101. if __name__ == '__main__':
  102. print(json.dumps(query_keyword_content_results(['医疗AI', 'Lora模型']), ensure_ascii=False))
  103. #
  104. # def query_embedding_results(query, top_k=5, better_than_threshold=0.65):
  105. # graphvectorizer = GraphVectorizer()
  106. # return graphvectorizer.embedding_search_entity(query, top_k=top_k, better_than_threshold=better_than_threshold)