123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144 |
- import json
- from core.database import DBHelper
- from data_models.content_data import ContentData
- from data_models.keyword_clustering import KeywordClustering
- from data_models.keyword_data import KeywordData
- from data_models.keyword_with_content import KeywordWithContent
- def query_keyword_data(keywords, db_helper):
- """获取关键词数据,避免重复查询"""
- if not keywords:
- return {}
- # 一次性查询所有关键词数据
- keyword_datas = db_helper.get_all(KeywordData, keyword__in=keywords)
- return {data.keyword: data for data in keyword_datas}
- def query_keyword_summary_results(keywords):
- """通过关键词搜索,获取与问题相关的内容总结
- Args:
- keywords: 关键词列表["关键词1","关键词2",...]
- Returns:
- list: 搜索结果,包含关键词和对应的总结
- """
- if not keywords:
- return []
- res = []
- db_helper = DBHelper()
- try:
- # 一次性获取所有关键词数据
- keyword_dict = query_keyword_data(keywords, db_helper)
- # 获取所有关键词ID
- keyword_ids = [data.id for data in keyword_dict.values()]
- if not keyword_ids:
- return res
- # 一次性查询所有关键词聚类数据
- clustering_data = db_helper.get_all(
- KeywordClustering,
- keyword_id__in=keyword_ids
- )
- # 构建关键字ID到聚类数据的映射
- clustering_map = {data.keyword_id: data for data in clustering_data}
- # 构建结果
- for keyword in keywords:
- if keyword in keyword_dict:
- keyword_id = keyword_dict[keyword].id
- if keyword_id in clustering_map:
- res.append({
- 'keyword': keyword,
- 'keyword_summary': clustering_map[keyword_id].keyword_summary
- })
- except Exception as e:
- # 记录日志或处理异常
- print(f"查询关键词总结时出错: {str(e)}")
- return res
- def query_keyword_content_results(keywords):
- """通过关键词搜索,获取与问题相关的内容
- Args:
- keywords: 关键词列表["关键词1","关键词2",...]
- Returns:
- list: 搜索结果,包含关键词、内容和内容总结
- """
- if not keywords:
- return []
- res = []
- db_helper = DBHelper()
- try:
- # 一次性获取所有关键词数据
- keyword_dict = query_keyword_data(keywords, db_helper)
- # 获取所有关键词ID
- keyword_ids = [data.id for data in keyword_dict.values()]
- if not keyword_ids:
- return res
- # 一次性查询所有关键词与内容的关联
- keyword_content_relations = db_helper.get_all(
- KeywordWithContent,
- keyword_id__in=keyword_ids
- )
- # 获取所有内容ID
- content_ids = [relation.content_id for relation in keyword_content_relations]
- if not content_ids:
- return res
- # 一次性查询所有内容数据
- content_data_list = db_helper.get_all(
- ContentData,
- id__in=content_ids
- )
- # 构建内容ID到内容数据的映射
- content_map = {data.id: data for data in content_data_list}
- # 构建关键字ID到关键词的映射
- keyword_id_to_word = {data.id: data.keyword for data in keyword_dict.values()}
- # 构建结果
- for relation in keyword_content_relations:
- if relation.content_id in content_map:
- content_data = content_map[relation.content_id]
- res.append({
- 'keyword': keyword_id_to_word.get(relation.keyword_id, '未知关键词'),
- 'content': content_data.content,
- 'content_summary': content_data.summary
- })
- except Exception as e:
- # 记录日志或处理异常
- print(f"查询关键词内容时出错: {str(e)}")
- return res
- if __name__ == '__main__':
- print(json.dumps(query_keyword_content_results(['医疗AI', 'Lora模型']), ensure_ascii=False))
- #
- # def query_embedding_results(query, top_k=5, better_than_threshold=0.65):
- # graphvectorizer = GraphVectorizer()
- # return graphvectorizer.embedding_search_entity(query, top_k=top_k, better_than_threshold=better_than_threshold)
|