|
@@ -182,7 +182,7 @@ class CrawlerToutiao(CrawlerPipeline, CrawlerToutiaoConst):
|
|
|
else:
|
|
|
break
|
|
|
|
|
|
- async def crawler_each_account(self, account_name, account_id, media_type):
|
|
|
+ async def crawler_each_account(self, account_name, account_id, media_type, cookie):
|
|
|
"""
|
|
|
get toutiao account info
|
|
|
"""
|
|
@@ -193,17 +193,19 @@ class CrawlerToutiao(CrawlerPipeline, CrawlerToutiaoConst):
|
|
|
"crawler_date": datetime.now().strftime("%Y-%m-%d"),
|
|
|
"media_type": media_type,
|
|
|
}
|
|
|
- await self.log_client.log(
|
|
|
- contents={
|
|
|
- "task": "crawler_toutiao",
|
|
|
- "function": "crawler_each_account",
|
|
|
- "trace_id": self.trace_id,
|
|
|
- "message": "抓取账号成功",
|
|
|
- "status": "success",
|
|
|
- "data": new_account_item,
|
|
|
- }
|
|
|
+ # get title_list
|
|
|
+ response = await get_toutiao_account_info_list(
|
|
|
+ account_id=account_id,
|
|
|
+ cookie=cookie,
|
|
|
+ media_type="article"
|
|
|
)
|
|
|
- await self.save_item_to_database(media_type="account", item=new_account_item)
|
|
|
+ if not response:
|
|
|
+ return
|
|
|
+
|
|
|
+ article_raw_data = response["data"]
|
|
|
+ title_list = [i['title'] for i in article_raw_data]
|
|
|
+ new_account_item["title_list"] = json.dumps(title_list, ensure_ascii=False)
|
|
|
+ await self.save_item_to_database(media_type="account", item=new_account_item, trace_id=self.trace_id)
|
|
|
|
|
|
async def crawler_each_article(self, method, article_raw_data, category=None):
|
|
|
"""
|
|
@@ -240,17 +242,7 @@ class CrawlerToutiao(CrawlerPipeline, CrawlerToutiaoConst):
|
|
|
}
|
|
|
case _:
|
|
|
raise Exception(f"unknown method: {method}")
|
|
|
- await self.log_client.log(
|
|
|
- contents={
|
|
|
- "task": "crawler_toutiao",
|
|
|
- "function": "crawler_each_article",
|
|
|
- "trace_id": self.trace_id,
|
|
|
- "message": "抓取文章成功",
|
|
|
- "status": "success",
|
|
|
- "data": new_article_item,
|
|
|
- }
|
|
|
- )
|
|
|
- await self.save_item_to_database(media_type="article", item=new_article_item)
|
|
|
+ await self.save_item_to_database(media_type="article", item=new_article_item, trace_id=self.trace_id)
|
|
|
|
|
|
async def crawler_each_video(self, video_raw_data):
|
|
|
pass
|
|
@@ -416,6 +408,9 @@ class CrawlerToutiao(CrawlerPipeline, CrawlerToutiaoConst):
|
|
|
# 搜索抓账号
|
|
|
async def search_candidate_accounts(self):
|
|
|
top_title_list = await get_top_article_title_list(pool=self.pool)
|
|
|
+ cookie = await self.get_config_value(
|
|
|
+ key="toutiao_blogger_cookie", output_type="string"
|
|
|
+ )
|
|
|
for article in top_title_list:
|
|
|
title = article["title"]
|
|
|
try:
|
|
@@ -446,7 +441,7 @@ class CrawlerToutiao(CrawlerPipeline, CrawlerToutiaoConst):
|
|
|
)
|
|
|
if account_id:
|
|
|
await self.crawler_each_account(
|
|
|
- account_name, account_id, self.ARTICLE_TYPE
|
|
|
+ account_name, account_id, self.ARTICLE_TYPE, cookie
|
|
|
)
|
|
|
|
|
|
await asyncio.sleep(1)
|