ソースを参照

2024-08-15
decoratorApi.py 新增请求超时装饰器
update_msg.py schedule 执行任务时启动新的线程
update_msg.py 所有包含requests请求的方法增加了超时装饰器和异常抛出
wxSpiderApi.py 增加了超时

罗俊辉 1 年間 前
コミット
f1a3f3af04
3 ファイル変更88 行追加11 行削除
  1. 35 0
      applications/decoratorApi.py
  2. 10 5
      applications/wxSpiderApi.py
  3. 43 6
      update_msg.py

+ 35 - 0
applications/decoratorApi.py

@@ -2,6 +2,7 @@
 @author: luojunhui
 """
 import time
+import requests
 
 
 def retryOnNone():
@@ -35,3 +36,37 @@ def retryOnNone():
         return wrapper
 
     return decorator
+
+
+def retryOnTimeout(retries=3, delay=2):
+    """
+    超时重试code
+    :param retries:
+    :param delay:
+    :return:
+    """
+    def decorator(func):
+        """
+        :param func:
+        :return:
+        """
+        def wrapper(*args, **kwargs):
+            """
+            :param args:
+            :param kwargs:
+            :return:
+            """
+            for attempt in range(retries):
+                try:
+                    return func(*args, **kwargs)
+                except requests.exceptions.Timeout:
+                    if attempt < retries - 1:
+                        time.sleep(delay)
+                        print(f"Timeout occurred, retrying... ({attempt + 1}/{retries})")
+                    else:
+                        print("Maximum retries reached. Function failed.")
+                        raise
+
+        return wrapper
+
+    return decorator

+ 10 - 5
applications/wxSpiderApi.py

@@ -4,7 +4,7 @@
 import json
 import requests
 
-from applications.decoratorApi import retryOnNone
+from applications.decoratorApi import retryOnNone, retryOnTimeout
 
 
 class WeixinSpider(object):
@@ -13,6 +13,7 @@ class WeixinSpider(object):
     """
 
     @classmethod
+    @retryOnTimeout()
     @retryOnNone()
     def search_articles(cls, title):
         """
@@ -28,10 +29,11 @@ class WeixinSpider(object):
             'Content-Type': 'application/json'
         }
 
-        response = requests.request("POST", url, headers=headers, data=payload)
+        response = requests.request("POST", url, headers=headers, data=payload, timeout=10)
         return response.json()
 
     @classmethod
+    @retryOnTimeout()
     @retryOnNone()
     def get_article_text(cls, content_link):
         """
@@ -48,10 +50,11 @@ class WeixinSpider(object):
         headers = {
             'Content-Type': 'application/json'
         }
-        response = requests.request("POST", url, headers=headers, data=payload)
+        response = requests.request("POST", url, headers=headers, data=payload, timeout=10)
         return response.json()
 
     @classmethod
+    @retryOnTimeout()
     @retryOnNone()
     def update_msg_list(cls, ghId, index):
         """
@@ -65,10 +68,11 @@ class WeixinSpider(object):
         headers = {
             'Content-Type': 'application/json'
         }
-        response = requests.post(url, headers=headers, data=json.dumps(payload))
+        response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=10)
         return response.json()
 
     @classmethod
+    @retryOnTimeout()
     @retryOnNone()
     def get_account_by_url(cls, content_url):
         """
@@ -80,6 +84,7 @@ class WeixinSpider(object):
             "POST",
             url='http://8.217.190.241:8888/crawler/wei_xin/account_info',
             headers={'Content-Type': 'application/json'},
-            json={"content_link": content_url}
+            json={"content_link": content_url},
+            timeout=10
         )
         return response.json()

+ 43 - 6
update_msg.py

@@ -4,6 +4,7 @@
 
 import time
 import json
+import threading
 
 import requests
 import schedule
@@ -12,6 +13,7 @@ from datetime import datetime
 
 from config import accountBaseInfo
 from applications import PQMySQL, WeixinSpider, Functions
+from applications.decoratorApi import retryOnTimeout
 
 
 class UpdateMsgDaily(object):
@@ -37,6 +39,7 @@ class UpdateMsgDaily(object):
         return gh_id_dict
 
     @classmethod
+    @retryOnTimeout()
     def bot(cls, account_list):
         """
         机器人
@@ -67,7 +70,7 @@ class UpdateMsgDaily(object):
                 "header": {"title": {"content": "【重点关注】", "tag": "plain_text"}},
             },
         }
-        requests.request("POST", url=url, headers=headers, data=json.dumps(payload))
+        requests.request("POST", url=url, headers=headers, data=json.dumps(payload), timeout=10)
 
     @classmethod
     def findAccountLatestUpdateTime(cls, gh_id):
@@ -191,7 +194,18 @@ class UpdateMsgDaily(object):
         输入ghid获取账号的文章list
         :return:
         """
-        response = cls.spider.update_msg_list(ghId=gh_id, index=cursor)
+        try:
+            response = cls.spider.update_msg_list(ghId=gh_id, index=cursor)
+        except Exception as e:
+            response = {
+                "error": str(e),
+                "info": "更新文章接口请求失败",
+                "gh_id": gh_id,
+                "time": datetime.now().__str__()
+            }
+            # 之后可以考虑抛出阿里云日志
+            print(response)
+            return
         msg_list = response.get("data", {}).get("data")
         if msg_list:
             last_article_in_this_msg = msg_list[-1]
@@ -200,7 +214,17 @@ class UpdateMsgDaily(object):
             ]["UpdateTime"]
             last_url = last_article_in_this_msg["AppMsg"]["DetailInfo"][0]["ContentUrl"]
             # 校验是否抓到的是同一个账号
-            resdata = cls.spider.get_account_by_url(last_url)
+            try:
+                resdata = cls.spider.get_account_by_url(last_url)
+            except Exception as e:
+                resdata = {
+                    "error": str(e),
+                    "info": "通过链接获取账号信息失败",
+                    "gh_id": gh_id,
+                    "time": datetime.now().__str__()
+                }
+                print(resdata)
+                return
             check_name = resdata["data"].get("data", {}).get("account_name")
             check_id = resdata["data"].get("data", {}).get("wx_gh")
             if check_id == gh_id:
@@ -294,14 +318,27 @@ class UpdateMsgDaily(object):
                 name = account_dict[account_id]
                 error_account_list.append(name)
         if error_account_list:
-            cls.bot(error_account_list)
+            try:
+                cls.bot(error_account_list)
+            except Exception as e:
+                print("Timeout Error: {}".format(e))
+
+
+def job_with_thread(job_func):
+    """
+    每个任务放到单个线程中
+    :param job_func:
+    :return:
+    """
+    job_thread = threading.Thread(target=job_func)
+    job_thread.start()
 
 
 if __name__ == "__main__":
     UMD = UpdateMsgDaily()
 
-    schedule.every().day.at("21:00").do(UMD.updateJob)
-    schedule.every().day.at("21:30").do(UMD.checkJob)
+    schedule.every().day.at("21:00").do(job_with_thread, UMD.updateJob)
+    schedule.every().day.at("21:30").do(job_with_thread, UMD.checkJob)
 
     while True:
         schedule.run_pending()