|
@@ -1,4 +1,3 @@
|
|
|
-from datetime import datetime
|
|
|
|
|
|
from applications.api import feishu_robot
|
|
|
|
|
@@ -80,9 +79,7 @@ class CrawlerVideoDetailAnalysis(CrawlerDetailBase):
|
|
|
获取 start_dt && end_dt 之间每个渠道抓取的视频数量
|
|
|
"""
|
|
|
query = """
|
|
|
- SELECT CAST(
|
|
|
- DATE(FROM_UNIXTIME(crawler_timestamp)) AS CHAR
|
|
|
- ) AS dt, platform, count(1) AS video_count
|
|
|
+ SELECT CAST(DATE(FROM_UNIXTIME(crawler_timestamp)) AS CHAR) AS dt, platform, count(1) AS video_count
|
|
|
FROM publish_single_video_source
|
|
|
WHERE crawler_timestamp BETWEEN UNIX_TIMESTAMP(%s) AND UNIX_TIMESTAMP(%s)
|
|
|
GROUP BY dt, platform;
|
|
@@ -95,11 +92,11 @@ class CrawlerVideoDetailAnalysis(CrawlerDetailBase):
|
|
|
"""
|
|
|
category_place_holders = ", ".join(["%s"] * len(self.CATEGORY_LIST))
|
|
|
query = f"""
|
|
|
- SELECT DATE(FROM_UNIXTIME(crawler_timestamp)) AS dt, category, count(1) AS video_count
|
|
|
+ SELECT CAST(DATE(FROM_UNIXTIME(crawler_timestamp)) AS CHAR) AS dt, category, count(1) AS video_count
|
|
|
FROM publish_single_video_source
|
|
|
- WHERE FROM_UNIXTIME(crawler_timestamp) BETWEEN %s AND %s
|
|
|
+ WHERE crawler_timestamp BETWEEN UNIX_TIMESTAMP(%s) AND UNIX_TIMESTAMP(%s)
|
|
|
AND category IN ({category_place_holders})
|
|
|
- GROUP BY DATE(FROM_UNIXTIME(crawler_timestamp)), category;
|
|
|
+ GROUP BY dt, category;
|
|
|
"""
|
|
|
return await self.pool.async_fetch(
|
|
|
query=query, params=tuple([start_date, end_date] + self.CATEGORY_LIST)
|
|
@@ -107,11 +104,11 @@ class CrawlerVideoDetailAnalysis(CrawlerDetailBase):
|
|
|
|
|
|
async def get_transform_videos_by_platform(self, start_date, end_date):
|
|
|
query = """
|
|
|
- SELECT DATE(create_timestamp) AS dt, platform,
|
|
|
+ SELECT CAST(DATE(create_timestamp) AS CHAR) AS dt, platform,
|
|
|
count(*) AS video_count, avg(score) AS average_similarity_score
|
|
|
FROM single_video_transform_queue
|
|
|
WHERE create_timestamp BETWEEN %s AND %s AND status = %s
|
|
|
- GROUP BY DATE(create_timestamp), platform;
|
|
|
+ GROUP BY dt, platform;
|
|
|
"""
|
|
|
return await self.pool.async_fetch(
|
|
|
query=query, params=(start_date, end_date, self.TRANSFORMED_STATUS)
|
|
@@ -155,22 +152,22 @@ class CrawlerDetailDeal(CrawlerVideoDetailAnalysis, CrawlerArticleDetailAnalysis
|
|
|
|
|
|
match media_type:
|
|
|
case "video":
|
|
|
- response = await self.analysis_video_pool(sub_task, start_date, end_date)
|
|
|
- column_list = list(response[0].keys())
|
|
|
- columns = [column_dict[key] for key in column_list]
|
|
|
- print(columns)
|
|
|
- await feishu_robot.bot(
|
|
|
- title=f"[{start_date}, {end_date}) 抓取视频分平台统计",
|
|
|
- detail={
|
|
|
- "columns": columns,
|
|
|
- "rows": response,
|
|
|
- },
|
|
|
- table=True,
|
|
|
- mention=False,
|
|
|
- )
|
|
|
- print("bot 成功")
|
|
|
+ crawler_detail = await self.analysis_video_pool(sub_task, start_date, end_date)
|
|
|
|
|
|
case "article":
|
|
|
- resource = await self.analysis_article_pool(sub_task, start_date, end_date)
|
|
|
+ crawler_detail = await self.analysis_article_pool(sub_task, start_date, end_date)
|
|
|
case _:
|
|
|
- pass
|
|
|
+ return None
|
|
|
+
|
|
|
+ column_list = list(crawler_detail[0].keys())
|
|
|
+ columns = [column_dict[key] for key in column_list]
|
|
|
+ await feishu_robot.bot(
|
|
|
+ title=f"[{start_date}, {end_date}) 抓取 {media_type} 统计",
|
|
|
+ detail={
|
|
|
+ "columns": columns,
|
|
|
+ "rows": crawler_detail,
|
|
|
+ },
|
|
|
+ table=True,
|
|
|
+ mention=False,
|
|
|
+ )
|
|
|
+ return None
|