import json from core.database import DBHelper from data_models.content_data import ContentData from data_models.keyword_clustering import KeywordClustering from data_models.keyword_data import KeywordData from data_models.keyword_with_content import KeywordWithContent def query_keyword_data(keywords, db_helper): """获取关键词数据,避免重复查询""" if not keywords: return {} # 一次性查询所有关键词数据 keyword_datas = db_helper.get_all(KeywordData, keyword__in=keywords) return {data.keyword: data for data in keyword_datas} def query_keyword_summary_results(keywords): """通过关键词搜索,获取与问题相关的内容总结 Args: keywords: 关键词列表["关键词1","关键词2",...] Returns: list: 搜索结果,包含关键词和对应的总结 """ if not keywords: return [] res = [] db_helper = DBHelper() try: # 一次性获取所有关键词数据 keyword_dict = query_keyword_data(keywords, db_helper) # 获取所有关键词ID keyword_ids = [data.id for data in keyword_dict.values()] if not keyword_ids: return res # 一次性查询所有关键词聚类数据 clustering_data = db_helper.get_all( KeywordClustering, keyword_id__in=keyword_ids ) # 构建关键字ID到聚类数据的映射 clustering_map = {data.keyword_id: data for data in clustering_data} # 构建结果 for keyword in keywords: if keyword in keyword_dict: keyword_id = keyword_dict[keyword].id if keyword_id in clustering_map: res.append({ 'keyword': keyword, 'keyword_summary': clustering_map[keyword_id].keyword_summary }) except Exception as e: # 记录日志或处理异常 print(f"查询关键词总结时出错: {str(e)}") return res def query_keyword_content_results(keywords): """通过关键词搜索,获取与问题相关的内容 Args: keywords: 关键词列表["关键词1","关键词2",...] Returns: list: 搜索结果,包含关键词、内容和内容总结 """ if not keywords: return [] res = [] db_helper = DBHelper() try: # 一次性获取所有关键词数据 keyword_dict = query_keyword_data(keywords, db_helper) # 获取所有关键词ID keyword_ids = [data.id for data in keyword_dict.values()] if not keyword_ids: return res # 一次性查询所有关键词与内容的关联 keyword_content_relations = db_helper.get_all( KeywordWithContent, keyword_id__in=keyword_ids ) # 获取所有内容ID content_ids = [relation.content_id for relation in keyword_content_relations] if not content_ids: return res # 一次性查询所有内容数据 content_data_list = db_helper.get_all( ContentData, id__in=content_ids ) # 构建内容ID到内容数据的映射 content_map = {data.id: data for data in content_data_list} # 构建关键字ID到关键词的映射 keyword_id_to_word = {data.id: data.keyword for data in keyword_dict.values()} # 构建结果 for relation in keyword_content_relations: if relation.content_id in content_map: content_data = content_map[relation.content_id] res.append({ 'keyword': keyword_id_to_word.get(relation.keyword_id, '未知关键词'), 'content': content_data.content, 'content_summary': content_data.summary }) except Exception as e: # 记录日志或处理异常 print(f"查询关键词内容时出错: {str(e)}") return res if __name__ == '__main__': print(json.dumps(query_keyword_content_results(['医疗AI', 'Lora模型']), ensure_ascii=False)) # # def query_embedding_results(query, top_k=5, better_than_threshold=0.65): # graphvectorizer = GraphVectorizer() # return graphvectorizer.embedding_search_entity(query, top_k=top_k, better_than_threshold=better_than_threshold)