|
@@ -134,7 +134,7 @@ class CrawlerGzhBaseStrategy(CrawlerPipeline, CrawlerGzhConst):
|
|
|
await self.save_item_to_database(
|
|
|
media_type="article", item=new_item, trace_id=self.trace_id
|
|
|
)
|
|
|
- await asyncio.sleep(self.SLEEP_SECONDS)
|
|
|
+ # await asyncio.sleep(self.SLEEP_SECONDS)
|
|
|
|
|
|
async def update_account_read_avg_info(self, gh_id, account_name):
|
|
|
"""update account read avg info"""
|
|
@@ -288,7 +288,8 @@ class CrawlerGzhSearchArticles(CrawlerGzhBaseStrategy):
|
|
|
"""
|
|
|
@description: 对于搜索到的文章list,获取文章详情, 并且存储到meta表中
|
|
|
"""
|
|
|
- for article in article_list:
|
|
|
+ for article in tqdm(article_list, desc="获取搜索结果详情"):
|
|
|
+ print(f"{datetime.now()}: start crawling article: {article}")
|
|
|
url = article["url"]
|
|
|
detail_response = await get_article_detail(url, is_count=True, is_cache=False)
|
|
|
if not detail_response:
|