Browse Source

add crawler_detail.py

luojunhui 6 days ago
parent
commit
2345b46ac6
1 changed files with 22 additions and 25 deletions
  1. 22 25
      applications/tasks/analysis_task/crawler_detail.py

+ 22 - 25
applications/tasks/analysis_task/crawler_detail.py

@@ -1,4 +1,3 @@
-from datetime import datetime
 
 from applications.api import feishu_robot
 
@@ -80,9 +79,7 @@ class CrawlerVideoDetailAnalysis(CrawlerDetailBase):
         获取 start_dt && end_dt 之间每个渠道抓取的视频数量
         """
         query = """
-            SELECT CAST(
-                DATE(FROM_UNIXTIME(crawler_timestamp)) AS CHAR
-                ) AS dt, platform, count(1) AS video_count
+            SELECT CAST(DATE(FROM_UNIXTIME(crawler_timestamp)) AS CHAR) AS dt, platform, count(1) AS video_count
             FROM publish_single_video_source
             WHERE crawler_timestamp BETWEEN UNIX_TIMESTAMP(%s) AND UNIX_TIMESTAMP(%s)
             GROUP BY dt, platform;
@@ -95,11 +92,11 @@ class CrawlerVideoDetailAnalysis(CrawlerDetailBase):
         """
         category_place_holders = ", ".join(["%s"] * len(self.CATEGORY_LIST))
         query = f"""
-            SELECT DATE(FROM_UNIXTIME(crawler_timestamp)) AS dt, category, count(1) AS video_count
+            SELECT CAST(DATE(FROM_UNIXTIME(crawler_timestamp)) AS CHAR) AS dt, category, count(1) AS video_count
             FROM publish_single_video_source
-            WHERE FROM_UNIXTIME(crawler_timestamp) BETWEEN %s AND %s 
+            WHERE crawler_timestamp BETWEEN UNIX_TIMESTAMP(%s) AND UNIX_TIMESTAMP(%s)
               AND category IN ({category_place_holders})
-            GROUP BY DATE(FROM_UNIXTIME(crawler_timestamp)), category;
+            GROUP BY dt, category;
         """
         return await self.pool.async_fetch(
             query=query, params=tuple([start_date, end_date] + self.CATEGORY_LIST)
@@ -107,11 +104,11 @@ class CrawlerVideoDetailAnalysis(CrawlerDetailBase):
 
     async def get_transform_videos_by_platform(self, start_date, end_date):
         query = """
-            SELECT DATE(create_timestamp) AS dt, platform, 
+            SELECT CAST(DATE(create_timestamp) AS CHAR) AS dt, platform, 
                    count(*) AS video_count, avg(score) AS average_similarity_score
             FROM single_video_transform_queue
             WHERE create_timestamp BETWEEN %s AND %s AND status = %s
-            GROUP BY DATE(create_timestamp), platform;
+            GROUP BY dt, platform;
         """
         return await self.pool.async_fetch(
             query=query, params=(start_date, end_date, self.TRANSFORMED_STATUS)
@@ -155,22 +152,22 @@ class CrawlerDetailDeal(CrawlerVideoDetailAnalysis, CrawlerArticleDetailAnalysis
 
         match media_type:
             case "video":
-                response = await self.analysis_video_pool(sub_task, start_date, end_date)
-                column_list = list(response[0].keys())
-                columns = [column_dict[key] for key in column_list]
-                print(columns)
-                await feishu_robot.bot(
-                    title=f"[{start_date}, {end_date}) 抓取视频分平台统计",
-                    detail={
-                        "columns": columns,
-                        "rows": response,
-                    },
-                    table=True,
-                    mention=False,
-                )
-                print("bot 成功")
+                crawler_detail = await self.analysis_video_pool(sub_task, start_date, end_date)
 
             case "article":
-                resource = await self.analysis_article_pool(sub_task, start_date, end_date)
+                crawler_detail = await self.analysis_article_pool(sub_task, start_date, end_date)
             case _:
-                pass
+                return None
+
+        column_list = list(crawler_detail[0].keys())
+        columns = [column_dict[key] for key in column_list]
+        await feishu_robot.bot(
+            title=f"[{start_date}, {end_date}) 抓取 {media_type} 统计",
+            detail={
+                "columns": columns,
+                "rows": crawler_detail,
+            },
+            table=True,
+            mention=False,
+        )
+        return None