|
@@ -8,11 +8,12 @@ from datetime import datetime, date
|
|
|
from typing import List, Dict
|
|
|
|
|
|
from applications.api import feishu_robot
|
|
|
-from applications.crawler.wechat import search
|
|
|
+from applications.crawler.wechat import weixin_search
|
|
|
from applications.crawler.wechat import get_article_detail
|
|
|
from applications.crawler.wechat import get_article_list_from_account
|
|
|
from applications.pipeline import CrawlerPipeline
|
|
|
-from applications.utils import timestamp_to_str, show_desc_to_sta, generate_gzh_id
|
|
|
+from applications.utils import timestamp_to_str, show_desc_to_sta
|
|
|
+from applications.utils import get_hot_titles, generate_gzh_id
|
|
|
|
|
|
|
|
|
class CrawlerGzhConst:
|
|
@@ -102,14 +103,12 @@ class CrawlerGzhBaseStrategy(CrawlerPipeline, CrawlerGzhConst):
|
|
|
position_list = [i for i in range(1, 9)]
|
|
|
today_dt = date.today().isoformat()
|
|
|
for position in position_list:
|
|
|
- query = """
|
|
|
- select read_cnt, from_unixtime(publish_time, '%Y-%m_%d') as publish_dt from crawler_meta_article
|
|
|
- where out_account_id = %s and article_index = %s
|
|
|
- order by publish_time desc limit %s;
|
|
|
+ query = f"""
|
|
|
+ select read_cnt, from_unixtime(publish_time, "%Y-%m_%d") as publish_dt from crawler_meta_article
|
|
|
+ where out_account_id = '{gh_id}' and article_index = {position}
|
|
|
+ order by publish_time desc limit {self.STAT_DURATION};
|
|
|
"""
|
|
|
- fetch_response = await self.pool.async_fetch(
|
|
|
- query=query, params=(gh_id, position, self.STAT_DURATION)
|
|
|
- )
|
|
|
+ fetch_response = await self.pool.async_fetch(query=query)
|
|
|
if fetch_response:
|
|
|
read_cnt_list = [i["read_cnt"] for i in fetch_response]
|
|
|
n = len(read_cnt_list)
|
|
@@ -228,5 +227,21 @@ class CrawlerGzhSearchArticles(CrawlerGzhBaseStrategy):
|
|
|
def __init__(self, pool, log_client, trace_id):
|
|
|
super().__init__(pool, log_client, trace_id)
|
|
|
|
|
|
- async def deal(self):
|
|
|
- return {"mode": "search", "message": "still developing"}
|
|
|
+ async def search_each_title(self, title: str) -> None:
|
|
|
+ """search in weixin"""
|
|
|
+ search_response = await weixin_search(keyword=title)
|
|
|
+ print(search_response)
|
|
|
+
|
|
|
+ async def deal(self, date_string: str, strategy: str = "V1"):
|
|
|
+ hot_titles = await get_hot_titles(self.pool, date_string=date_string)
|
|
|
+ for hot_title in hot_titles:
|
|
|
+ await self.search_each_title(hot_title)
|
|
|
+#
|
|
|
+#
|
|
|
+# if __name__ == "__main__":
|
|
|
+# import asyncio
|
|
|
+# response = asyncio.run(weixin_search(keyword="南京照相馆"))
|
|
|
+# print(json.dumps(response, ensure_ascii=False, indent=4))
|
|
|
+#
|
|
|
+#
|
|
|
+
|