| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119 | 
							- """
 
- @author: luojunhui
 
- """
 
- import json
 
- import time
 
- import requests
 
- def retryOnNone():
 
-     """
 
-     基于None类型数据的重试装饰器
 
-     :return:
 
-     """
 
-     def decorator(func):
 
-         """
 
-         :param func:
 
-         :return:
 
-         """
 
-         max_retries = 5
 
-         wait_seconds = 1
 
-         def wrapper(*args, **kwargs):
 
-             """
 
-             :param args:
 
-             :param kwargs:
 
-             :return:
 
-             """
 
-             for attempt in range(max_retries):
 
-                 response = func(*args, **kwargs)
 
-                 if response['data'] is not None:
 
-                     return response
 
-                 time.sleep(wait_seconds)
 
-             return None
 
-         return wrapper
 
-     return decorator
 
- class WeixinSpider(object):
 
-     """
 
-     Update account articles
 
-     """
 
-     @classmethod
 
-     @retryOnNone()
 
-     def search_articles(cls, title):
 
-         """
 
-         search articles in wx
 
-         :return:
 
-         """
 
-         url = "http://8.217.190.241:8888/crawler/wei_xin/keyword"
 
-         payload = json.dumps({
 
-             "keyword": title,
 
-             "cursor": "1"
 
-         })
 
-         headers = {
 
-             'Content-Type': 'application/json'
 
-         }
 
-         response = requests.request("POST", url, headers=headers, data=payload)
 
-         return response.json()
 
-     @classmethod
 
-     @retryOnNone()
 
-     def get_article_text(cls, content_link):
 
-         """
 
-         获取文章
 
-         :param content_link:
 
-         :return:
 
-         """
 
-         url = "http://8.217.190.241:8888/crawler/wei_xin/detail"
 
-         payload = json.dumps({
 
-             "content_link": content_link,
 
-             "is_count": False,
 
-             "is_ad": False
 
-         })
 
-         headers = {
 
-             'Content-Type': 'application/json'
 
-         }
 
-         response = requests.request("POST", url, headers=headers, data=payload)
 
-         return response.json()
 
-     @classmethod
 
-     @retryOnNone()
 
-     def update_msg_list(cls, ghId, index):
 
-         """
 
-         :return:
 
-         """
 
-         url = 'http://8.217.190.241:8888/crawler/wei_xin/blogger'
 
-         payload = {
 
-             'account_id': ghId,
 
-             'cursor': index,
 
-         }
 
-         headers = {
 
-             'Content-Type': 'application/json'
 
-         }
 
-         response = requests.post(url, headers=headers, data=json.dumps(payload))
 
-         return response.json()
 
-     @classmethod
 
-     @retryOnNone()
 
-     def get_account_by_url(cls, content_url):
 
-         """
 
-         通过文章获取账号信息
 
-         :param content_url:
 
-         :return:
 
-         """
 
-         response = requests.request(
 
-             "POST",
 
-             url='http://8.217.190.241:8888/crawler/wei_xin/account_info',
 
-             headers={'Content-Type': 'application/json'},
 
-             json={"content_link": content_url}
 
-         )
 
-         return response.json()
 
 
  |