|
@@ -0,0 +1,71 @@
|
|
|
+class CrawlerDetailAnalysisConst:
|
|
|
+ CATEGORY_LIST = [
|
|
|
+ "知识科普",
|
|
|
+ "国家大事",
|
|
|
+ "历史人物",
|
|
|
+ "奇闻趣事",
|
|
|
+ "名人八卦",
|
|
|
+ "怀旧时光",
|
|
|
+ "情感故事",
|
|
|
+ "社会法治",
|
|
|
+ "现代人物",
|
|
|
+ "社会现象",
|
|
|
+ "健康养生",
|
|
|
+ "家长里短",
|
|
|
+ "军事历史",
|
|
|
+ "财经科技",
|
|
|
+ "政治新闻",
|
|
|
+ ]
|
|
|
+
|
|
|
+ TRANSFORMED_STATUS = 1
|
|
|
+
|
|
|
+
|
|
|
+class CrawlerDetail(CrawlerDetailAnalysisConst):
|
|
|
+ pass
|
|
|
+
|
|
|
+
|
|
|
+class CrawlerVideoDetailAnalysis(CrawlerDetail):
|
|
|
+ def __init__(self, pool, trace_id):
|
|
|
+ self.pool = pool
|
|
|
+ self.trace_id = trace_id
|
|
|
+
|
|
|
+ async def get_crawler_videos_by_platform(self, start_date, end_data):
|
|
|
+ """
|
|
|
+ 获取 start_dt && end_dt 之间每个渠道抓取的视频数量
|
|
|
+ """
|
|
|
+ query = """
|
|
|
+ SELECT FROM_UNIXTIME(crawler_timestamp, '%Y-%m-%d') AS crawler_date, platform, count(1) AS video_count
|
|
|
+ FROM publish_single_video_source
|
|
|
+ WHERE FROM_UNIXTIME(crawler_timestamp, '%Y-%m-%d') BETWEEN %s AND %s
|
|
|
+ GROUP BY FROM_UNIXTIME(crawler_timestamp, '%Y-%m-%d'), platform;
|
|
|
+ """
|
|
|
+ return await self.pool.async_fetch(query=query, params=(start_date, end_data))
|
|
|
+
|
|
|
+ async def get_crawler_videos_by_category(self, start_date, end_data):
|
|
|
+ """
|
|
|
+ 获取 start_dt && end_dt 之间每个品类抓取的视频数量
|
|
|
+ """
|
|
|
+ query = """
|
|
|
+ SELECT FROM_UNIXTIME(crawler_timestamp, '%Y-%m-%d') AS crawler_date, category, count(1) AS video_count
|
|
|
+ FROM publish_single_video_source
|
|
|
+ WHERE FROM_UNIXTIME(crawler_timestamp) BETWEEN %s AND %s AND category in %s
|
|
|
+ GROUP BY FROM_UNIXTIME(crawler_timestamp, '%Y-%m-%d'), category;
|
|
|
+ """
|
|
|
+ return await self.pool.async_fetch(
|
|
|
+ query=query, params=(start_date, end_data, tuple(self.CATEGORY_LIST))
|
|
|
+ )
|
|
|
+
|
|
|
+ async def get_transform_videos_by_platform(self, start_date, end_data):
|
|
|
+ query = """
|
|
|
+ SELECT DATE_FORMAT(create_timestamp, '%Y-%m-%d') AS dt, platform,
|
|
|
+ count(*) AS video_count, avg(score) AS average_similarity_score
|
|
|
+ FROM single_video_transform_queue
|
|
|
+ WHERE create_timestamp BETWEEN %s AND %s AND status = %s
|
|
|
+ GROUP BY DATE_FORMAT(create_timestamp, '%Y-%m-%d'), platform;
|
|
|
+ """
|
|
|
+ return await self.pool.async_fetch(
|
|
|
+ query=query, params=(start_date, end_data, self.TRANSFORMED_STATUS)
|
|
|
+ )
|
|
|
+
|
|
|
+ async def get_transform_videos_by_category(self, start_date, end_data):
|
|
|
+ pass
|