فهرست منبع

公众号抓取优化

luojunhui 3 ماه پیش
والد
کامیت
428407853e
1فایلهای تغییر یافته به همراه33 افزوده شده و 7 حذف شده
  1. 33 7
      applications/crawler/wechat/gzh_spider.py

+ 33 - 7
applications/crawler/wechat/gzh_spider.py

@@ -33,8 +33,17 @@ async def get_article_detail(
             "is_cache": is_cache,
         }
     )
-    async with AsyncHttpClient(timeout=10) as http_client:
-        response = await http_client.post(target_url, headers=headers, data=payload)
+    try:
+        async with AsyncHttpClient(timeout=10) as http_client:
+            response = await http_client.post(target_url, headers=headers, data=payload)
+    except Exception as e:
+        log(
+            task="get_article_detail",
+            function="get_article_detail",
+            message=f"API请求失败: {e}",
+            data={"link": article_link},
+        )
+        return None
 
     return response
 
@@ -43,8 +52,17 @@ async def get_article_detail(
 async def get_article_list_from_account(account_id: str, index=None) -> dict | None:
     target_url = f"{base_url}/blogger"
     payload = json.dumps({"account_id": account_id, "cursor": index})
-    async with AsyncHttpClient(timeout=120) as http_client:
-        response = await http_client.post(target_url, headers=headers, data=payload)
+    try:
+        async with AsyncHttpClient(timeout=120) as http_client:
+            response = await http_client.post(target_url, headers=headers, data=payload)
+    except Exception as e:
+        log(
+            task="get_article_list_from_account",
+            function="get_article_list_from_account",
+            message=f"API请求失败: {e}",
+            data={"account_id": account_id, "index": index},
+        )
+        return None
     return response
 
 
@@ -93,7 +111,15 @@ def get_source_account_from_article(article_link) -> dict | None:
 async def weixin_search(keyword: str, page="1") -> dict | None:
     url = "{}/keyword".format(base_url)
     payload = json.dumps({"keyword": keyword, "cursor": page})
-    async with AsyncHttpClient(timeout=120) as http_client:
-        response = await http_client.post(url=url, headers=headers, data=payload)
-
+    try:
+        async with AsyncHttpClient(timeout=120) as http_client:
+            response = await http_client.post(url=url, headers=headers, data=payload)
+    except Exception as e:
+        log(
+            task="weixin_search",
+            function="weixin_search",
+            message=f"API请求失败: {e}",
+            data={"keyword": keyword, "page": page},
+        )
+        return None
     return response