crawler_detail.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. class CrawlerDetailAnalysisConst:
  2. CATEGORY_LIST = [
  3. "知识科普",
  4. "国家大事",
  5. "历史人物",
  6. "奇闻趣事",
  7. "名人八卦",
  8. "怀旧时光",
  9. "情感故事",
  10. "社会法治",
  11. "现代人物",
  12. "社会现象",
  13. "健康养生",
  14. "家长里短",
  15. "军事历史",
  16. "财经科技",
  17. "政治新闻",
  18. ]
  19. TRANSFORMED_STATUS = 1
  20. class CrawlerDetail(CrawlerDetailAnalysisConst):
  21. pass
  22. class CrawlerVideoDetailAnalysis(CrawlerDetail):
  23. def __init__(self, pool, trace_id):
  24. self.pool = pool
  25. self.trace_id = trace_id
  26. async def get_crawler_videos_by_platform(self, start_date, end_data):
  27. """
  28. 获取 start_dt && end_dt 之间每个渠道抓取的视频数量
  29. """
  30. query = """
  31. SELECT FROM_UNIXTIME(crawler_timestamp, '%Y-%m-%d') AS crawler_date, platform, count(1) AS video_count
  32. FROM publish_single_video_source
  33. WHERE FROM_UNIXTIME(crawler_timestamp, '%Y-%m-%d') BETWEEN %s AND %s
  34. GROUP BY FROM_UNIXTIME(crawler_timestamp, '%Y-%m-%d'), platform;
  35. """
  36. return await self.pool.async_fetch(query=query, params=(start_date, end_data))
  37. async def get_crawler_videos_by_category(self, start_date, end_data):
  38. """
  39. 获取 start_dt && end_dt 之间每个品类抓取的视频数量
  40. """
  41. query = """
  42. SELECT FROM_UNIXTIME(crawler_timestamp, '%Y-%m-%d') AS crawler_date, category, count(1) AS video_count
  43. FROM publish_single_video_source
  44. WHERE FROM_UNIXTIME(crawler_timestamp) BETWEEN %s AND %s AND category in %s
  45. GROUP BY FROM_UNIXTIME(crawler_timestamp, '%Y-%m-%d'), category;
  46. """
  47. return await self.pool.async_fetch(
  48. query=query, params=(start_date, end_data, tuple(self.CATEGORY_LIST))
  49. )
  50. async def get_transform_videos_by_platform(self, start_date, end_data):
  51. query = """
  52. SELECT DATE_FORMAT(create_timestamp, '%Y-%m-%d') AS dt, platform,
  53. count(*) AS video_count, avg(score) AS average_similarity_score
  54. FROM single_video_transform_queue
  55. WHERE create_timestamp BETWEEN %s AND %s AND status = %s
  56. GROUP BY DATE_FORMAT(create_timestamp, '%Y-%m-%d'), platform;
  57. """
  58. return await self.pool.async_fetch(
  59. query=query, params=(start_date, end_data, self.TRANSFORMED_STATUS)
  60. )
  61. async def get_transform_videos_by_category(self, start_date, end_data):
  62. pass