Bladeren bron

spider 代码,使用域名而不是 ip+port

新增 get_recommend_articles 方法
luojunhui 8 maanden geleden
bovenliggende
commit
664f1e406c
1 gewijzigde bestanden met toevoegingen van 34 en 31 verwijderingen
  1. 34 31
      applications/wxSpiderApi.py

+ 34 - 31
applications/wxSpiderApi.py

@@ -11,80 +11,83 @@ class WeixinSpider(object):
     """
     Update account articles
     """
-    ip = "8.217.190.241"
+    # ip = "8.217.190.241"
     # ip = "47.98.154.124"
-    port = "8888"
+    # port = "8888"
+    base_url = "http://crawler-cn.aiddit.com/crawler/wei_xin"
+    headers = {
+        "Content-Type": "application/json"
+    }
 
     @classmethod
     @retryOnNone()
-    def search_articles(cls, title):
+    def search_articles(cls, title) -> dict:
         """
         search articles in wx
         :return:
         """
-        url = "http://{}:{}/crawler/wei_xin/keyword".format(cls.ip, cls.port)
+        url = "{}/keyword".format(cls.base_url)
         payload = json.dumps({
             "keyword": title,
             "cursor": "1"
         })
-        headers = {
-            'Content-Type': 'application/json'
-        }
-
-        response = requests.request("POST", url, headers=headers, data=payload)
+        response = requests.request("POST", url, headers=cls.headers, data=payload)
         return response.json()
 
     @classmethod
-    # @retryOnNone()
-    def get_article_text(cls, content_link):
+    def get_article_text(cls, content_link, is_count=False) -> dict:
         """
         获取文章
+        :param is_count:
         :param content_link:
         :return:
         """
-        url = "http://{}:{}/crawler/wei_xin/detail".format(cls.ip, cls.port)
+        url = "{}/detail".format(cls.base_url)
         payload = json.dumps({
             "content_link": content_link,
-            "is_count": False,
+            "is_count": is_count,
             "is_ad": False
         })
-        headers = {
-            'Content-Type': 'application/json'
-        }
-        response = requests.request("POST", url, headers=headers, data=payload)
+        response = requests.request("POST", url, headers=cls.headers, data=payload)
         return response.json()
 
     @classmethod
     @retryOnNone()
-    def update_msg_list(cls, ghId, index):
+    def update_msg_list(cls, ghId, index) -> dict:
         """
         :return:
         """
-        url = 'http://{}:{}/crawler/wei_xin/blogger'.format(cls.ip, cls.port)
+        url = '{}/blogger'.format(cls.base_url)
         payload = {
             'account_id': ghId,
             'cursor': index,
         }
-        headers = {
-            'Content-Type': 'application/json'
-        }
-        response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=120)
-        # print("response", response.text)
+        response = requests.post(url=url, headers=cls.headers, data=json.dumps(payload), timeout=120)
         return response.json()
 
     @classmethod
     @retryOnNone()
-    def get_account_by_url(cls, content_url):
+    def get_account_by_url(cls, content_url) -> dict:
         """
         通过文章获取账号信息
         :param content_url:
         :return:
         """
-        response = requests.request(
-            "POST",
-            url='http://{}:{}/crawler/wei_xin/account_info'.format(cls.ip, cls.port),
-            headers={'Content-Type': 'application/json'},
-            json={"content_link": content_url}
-        )
+        url = '{}/account_info'.format(cls.base_url)
+        data = {"content_link": content_url}
+        response = requests.request("POST", url=url, headers=cls.headers, json=data, timeout=120)
         return response.json()
 
+    @classmethod
+    def get_recommend_articles(cls, content_link) -> dict:
+        """
+        use content link to get recommend articles
+        :param content_link:
+        :return:
+        """
+        url = "{}/recommend".format(cls.base_url)
+        payload = json.dumps(
+            {"content_link": content_link}
+        )
+        response = requests.request("POST", url=url, headers=cls.headers, data=payload, timeout=120)
+        return response.json()