search_history.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. """
  2. 搜索历史记录管理
  3. """
  4. from typing import List, Dict, Any, Optional
  5. from datetime import datetime
  6. import json
  7. from pathlib import Path
  8. class SearchHistoryManager:
  9. """搜索历史记录管理器"""
  10. def __init__(self, storage_path: str):
  11. self.storage_path = Path(storage_path)
  12. self.storage_path.mkdir(parents=True, exist_ok=True)
  13. self.history_file = self.storage_path / "search_history.jsonl"
  14. async def save_search(
  15. self,
  16. keywords: List[str],
  17. platforms: List[str],
  18. filters: Dict[str, Any],
  19. results_count: int,
  20. trace_id: str,
  21. ) -> None:
  22. """保存搜索记录"""
  23. record = {
  24. "timestamp": datetime.now().isoformat(),
  25. "keywords": keywords,
  26. "platforms": platforms,
  27. "filters": filters,
  28. "results_count": results_count,
  29. "trace_id": trace_id,
  30. }
  31. with open(self.history_file, "a", encoding="utf-8") as f:
  32. f.write(json.dumps(record, ensure_ascii=False) + "\n")
  33. async def get_recent_searches(
  34. self,
  35. limit: int = 50,
  36. keyword_filter: Optional[str] = None,
  37. ) -> List[Dict[str, Any]]:
  38. """获取最近的搜索记录"""
  39. if not self.history_file.exists():
  40. return []
  41. records = []
  42. with open(self.history_file, "r", encoding="utf-8") as f:
  43. for line in f:
  44. if line.strip():
  45. record = json.loads(line)
  46. if keyword_filter:
  47. if keyword_filter in record.get("keywords", []):
  48. records.append(record)
  49. else:
  50. records.append(record)
  51. # 返回最近的记录
  52. return records[-limit:]
  53. async def get_similar_searches(
  54. self,
  55. keywords: List[str],
  56. limit: int = 10,
  57. ) -> List[Dict[str, Any]]:
  58. """获取相似的搜索记录"""
  59. all_searches = await self.get_recent_searches(limit=1000)
  60. # 计算相似度
  61. scored_searches = []
  62. for search in all_searches:
  63. search_keywords = set(search.get("keywords", []))
  64. input_keywords = set(keywords)
  65. similarity = len(search_keywords & input_keywords) / len(search_keywords | input_keywords)
  66. if similarity > 0:
  67. scored_searches.append((similarity, search))
  68. # 按相似度排序
  69. scored_searches.sort(key=lambda x: x[0], reverse=True)
  70. return [search for _, search in scored_searches[:limit]]