content_eval.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. """
  2. 内容评估工具 - 评估内容质量和相关性
  3. """
  4. from typing import Dict, Any, List
  5. from agent.tools import tool, ToolResult, ToolContext
  6. @tool(description="评估内容质量和相关性")
  7. async def evaluate_content(
  8. content_items: List[Dict[str, Any]],
  9. keywords: List[str],
  10. ctx: ToolContext = None,
  11. ) -> ToolResult:
  12. """
  13. 评估内容质量和相关性
  14. Args:
  15. content_items: 内容列表
  16. keywords: 搜索关键词
  17. ctx: 工具上下文
  18. """
  19. evaluated_items = []
  20. for item in content_items:
  21. score = _calculate_quality_score(item, keywords)
  22. item["quality_score"] = score
  23. item["relevance_score"] = _calculate_relevance_score(item, keywords)
  24. evaluated_items.append(item)
  25. # 按质量分数排序
  26. evaluated_items.sort(key=lambda x: x["quality_score"], reverse=True)
  27. return ToolResult(
  28. title="内容评估完成",
  29. output=f"已评估 {len(evaluated_items)} 条内容",
  30. data={"evaluated_items": evaluated_items},
  31. )
  32. def _calculate_quality_score(item: Dict[str, Any], keywords: List[str]) -> float:
  33. """
  34. 计算内容质量分数
  35. 考虑因素:
  36. 1. 播放量、点赞数、评论数、分享数
  37. 2. 互动率(点赞/播放、评论/播放)
  38. 3. 发布时间(新鲜度)
  39. """
  40. stats = item.get("stats", {})
  41. views = stats.get("views", 0)
  42. likes = stats.get("likes", 0)
  43. comments = stats.get("comments", 0)
  44. shares = stats.get("shares", 0)
  45. # 基础分数:基于绝对数值
  46. base_score = (
  47. (views / 10000) * 0.3 +
  48. (likes / 1000) * 0.3 +
  49. (comments / 100) * 0.2 +
  50. (shares / 100) * 0.2
  51. )
  52. # 互动率加成
  53. engagement_rate = (likes + comments + shares) / max(views, 1)
  54. engagement_bonus = engagement_rate * 20
  55. # 总分
  56. total_score = min(base_score + engagement_bonus, 100)
  57. return round(total_score, 2)
  58. def _calculate_relevance_score(item: Dict[str, Any], keywords: List[str]) -> float:
  59. """
  60. 计算内容相关性分数
  61. 考虑因素:
  62. 1. 标题中关键词匹配度
  63. 2. 描述中关键词匹配度
  64. 3. 标签匹配度
  65. """
  66. title = item.get("title", "").lower()
  67. description = item.get("description", "").lower()
  68. tags = [t.lower() for t in item.get("tags", [])]
  69. score = 0.0
  70. keyword_count = len(keywords)
  71. for keyword in keywords:
  72. keyword_lower = keyword.lower()
  73. # 标题匹配(权重最高)
  74. if keyword_lower in title:
  75. score += 40 / keyword_count
  76. # 描述匹配
  77. if keyword_lower in description:
  78. score += 30 / keyword_count
  79. # 标签匹配
  80. if keyword_lower in tags:
  81. score += 30 / keyword_count
  82. return min(score, 100)