class CrawlerDetailAnalysisConst: CATEGORY_LIST = [ "知识科普", "国家大事", "历史人物", "奇闻趣事", "名人八卦", "怀旧时光", "情感故事", "社会法治", "现代人物", "社会现象", "健康养生", "家长里短", "军事历史", "财经科技", "政治新闻", ] TRANSFORMED_STATUS = 1 class CrawlerDetail(CrawlerDetailAnalysisConst): pass class CrawlerVideoDetailAnalysis(CrawlerDetail): def __init__(self, pool, trace_id): self.pool = pool self.trace_id = trace_id async def get_crawler_videos_by_platform(self, start_date, end_data): """ 获取 start_dt && end_dt 之间每个渠道抓取的视频数量 """ query = """ SELECT FROM_UNIXTIME(crawler_timestamp, '%Y-%m-%d') AS crawler_date, platform, count(1) AS video_count FROM publish_single_video_source WHERE FROM_UNIXTIME(crawler_timestamp, '%Y-%m-%d') BETWEEN %s AND %s GROUP BY FROM_UNIXTIME(crawler_timestamp, '%Y-%m-%d'), platform; """ return await self.pool.async_fetch(query=query, params=(start_date, end_data)) async def get_crawler_videos_by_category(self, start_date, end_data): """ 获取 start_dt && end_dt 之间每个品类抓取的视频数量 """ query = """ SELECT FROM_UNIXTIME(crawler_timestamp, '%Y-%m-%d') AS crawler_date, category, count(1) AS video_count FROM publish_single_video_source WHERE FROM_UNIXTIME(crawler_timestamp) BETWEEN %s AND %s AND category in %s GROUP BY FROM_UNIXTIME(crawler_timestamp, '%Y-%m-%d'), category; """ return await self.pool.async_fetch( query=query, params=(start_date, end_data, tuple(self.CATEGORY_LIST)) ) async def get_transform_videos_by_platform(self, start_date, end_data): query = """ SELECT DATE_FORMAT(create_timestamp, '%Y-%m-%d') AS dt, platform, count(*) AS video_count, avg(score) AS average_similarity_score FROM single_video_transform_queue WHERE create_timestamp BETWEEN %s AND %s AND status = %s GROUP BY DATE_FORMAT(create_timestamp, '%Y-%m-%d'), platform; """ return await self.pool.async_fetch( query=query, params=(start_date, end_data, self.TRANSFORMED_STATUS) ) async def get_transform_videos_by_category(self, start_date, end_data): pass