|
@@ -1,7 +1,6 @@
|
|
|
from __future__ import annotations
|
|
|
|
|
|
import asyncio
|
|
|
-import json
|
|
|
import time
|
|
|
import traceback
|
|
|
from datetime import datetime, date, timedelta
|
|
@@ -179,7 +178,7 @@ class CrawlerGzhBaseStrategy(CrawlerPipeline, CrawlerGzhConst):
|
|
|
case _:
|
|
|
raise Exception(f"unknown strategy: {strategy}")
|
|
|
date_string = (datetime.today() - timedelta(days=timedelta_days)).strftime(
|
|
|
- "%Y-%m-%d"
|
|
|
+ "%Y%m%d"
|
|
|
)
|
|
|
return await get_hot_titles(
|
|
|
self.pool,
|
|
@@ -327,6 +326,11 @@ class CrawlerGzhSearchArticles(CrawlerGzhBaseStrategy):
|
|
|
# 更新page
|
|
|
current_page = search_response.get("data", {}).get("next_cursor")
|
|
|
|
|
|
+ async def get_task_execute_result(self):
|
|
|
+ """get task execute result"""
|
|
|
+ query = """select count(*) as total_search_articles from crawler_meta_article where trace_id = %s;"""
|
|
|
+ return await self.pool.async_fetch(query=query, params=(self.trace_id,))
|
|
|
+
|
|
|
async def deal(self, strategy: str = "V1"):
|
|
|
hot_titles = await self.get_hot_titles_with_strategy(strategy)
|
|
|
for hot_title in hot_titles:
|
|
@@ -335,3 +339,11 @@ class CrawlerGzhSearchArticles(CrawlerGzhBaseStrategy):
|
|
|
await self.search_each_title(hot_title)
|
|
|
except Exception as e:
|
|
|
print(f"crawler_gzh_articles error:{e}")
|
|
|
+
|
|
|
+ await feishu_robot.bot(
|
|
|
+ title="公众号搜索任务执行完成",
|
|
|
+ detail={
|
|
|
+ "strategy": strategy,
|
|
|
+ "execute_detail": await self.get_task_execute_result()
|
|
|
+ }
|
|
|
+ )
|