1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071 |
- class CrawlerDetailAnalysisConst:
- CATEGORY_LIST = [
- "知识科普",
- "国家大事",
- "历史人物",
- "奇闻趣事",
- "名人八卦",
- "怀旧时光",
- "情感故事",
- "社会法治",
- "现代人物",
- "社会现象",
- "健康养生",
- "家长里短",
- "军事历史",
- "财经科技",
- "政治新闻",
- ]
- TRANSFORMED_STATUS = 1
- class CrawlerDetail(CrawlerDetailAnalysisConst):
- pass
- class CrawlerVideoDetailAnalysis(CrawlerDetail):
- def __init__(self, pool, trace_id):
- self.pool = pool
- self.trace_id = trace_id
- async def get_crawler_videos_by_platform(self, start_date, end_data):
- """
- 获取 start_dt && end_dt 之间每个渠道抓取的视频数量
- """
- query = """
- SELECT FROM_UNIXTIME(crawler_timestamp, '%Y-%m-%d') AS crawler_date, platform, count(1) AS video_count
- FROM publish_single_video_source
- WHERE FROM_UNIXTIME(crawler_timestamp, '%Y-%m-%d') BETWEEN %s AND %s
- GROUP BY FROM_UNIXTIME(crawler_timestamp, '%Y-%m-%d'), platform;
- """
- return await self.pool.async_fetch(query=query, params=(start_date, end_data))
- async def get_crawler_videos_by_category(self, start_date, end_data):
- """
- 获取 start_dt && end_dt 之间每个品类抓取的视频数量
- """
- query = """
- SELECT FROM_UNIXTIME(crawler_timestamp, '%Y-%m-%d') AS crawler_date, category, count(1) AS video_count
- FROM publish_single_video_source
- WHERE FROM_UNIXTIME(crawler_timestamp) BETWEEN %s AND %s AND category in %s
- GROUP BY FROM_UNIXTIME(crawler_timestamp, '%Y-%m-%d'), category;
- """
- return await self.pool.async_fetch(
- query=query, params=(start_date, end_data, tuple(self.CATEGORY_LIST))
- )
- async def get_transform_videos_by_platform(self, start_date, end_data):
- query = """
- SELECT DATE_FORMAT(create_timestamp, '%Y-%m-%d') AS dt, platform,
- count(*) AS video_count, avg(score) AS average_similarity_score
- FROM single_video_transform_queue
- WHERE create_timestamp BETWEEN %s AND %s AND status = %s
- GROUP BY DATE_FORMAT(create_timestamp, '%Y-%m-%d'), platform;
- """
- return await self.pool.async_fetch(
- query=query, params=(start_date, end_data, self.TRANSFORMED_STATUS)
- )
- async def get_transform_videos_by_category(self, start_date, end_data):
- pass
|