123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119 |
- """
- @author: luojunhui
- """
- import json
- import time
- import requests
- def retryOnNone():
- """
- 基于None类型数据的重试装饰器
- :return:
- """
- def decorator(func):
- """
- :param func:
- :return:
- """
- max_retries = 5
- wait_seconds = 1
- def wrapper(*args, **kwargs):
- """
- :param args:
- :param kwargs:
- :return:
- """
- for attempt in range(max_retries):
- response = func(*args, **kwargs)
- if response['data'] is not None:
- return response
- time.sleep(wait_seconds)
- return None
- return wrapper
- return decorator
- class WeixinSpider(object):
- """
- Update account articles
- """
- @classmethod
- @retryOnNone()
- def search_articles(cls, title):
- """
- search articles in wx
- :return:
- """
- url = "http://8.217.190.241:8888/crawler/wei_xin/keyword"
- payload = json.dumps({
- "keyword": title,
- "cursor": "1"
- })
- headers = {
- 'Content-Type': 'application/json'
- }
- response = requests.request("POST", url, headers=headers, data=payload)
- return response.json()
- @classmethod
- @retryOnNone()
- def get_article_text(cls, content_link):
- """
- 获取文章
- :param content_link:
- :return:
- """
- url = "http://8.217.190.241:8888/crawler/wei_xin/detail"
- payload = json.dumps({
- "content_link": content_link,
- "is_count": False,
- "is_ad": False
- })
- headers = {
- 'Content-Type': 'application/json'
- }
- response = requests.request("POST", url, headers=headers, data=payload)
- return response.json()
- @classmethod
- @retryOnNone()
- def update_msg_list(cls, ghId, index):
- """
- :return:
- """
- url = 'http://8.217.190.241:8888/crawler/wei_xin/blogger'
- payload = {
- 'account_id': ghId,
- 'cursor': index,
- }
- headers = {
- 'Content-Type': 'application/json'
- }
- response = requests.post(url, headers=headers, data=json.dumps(payload))
- return response.json()
- @classmethod
- @retryOnNone()
- def get_account_by_url(cls, content_url):
- """
- 通过文章获取账号信息
- :param content_url:
- :return:
- """
- response = requests.request(
- "POST",
- url='http://8.217.190.241:8888/crawler/wei_xin/account_info',
- headers={'Content-Type': 'application/json'},
- json={"content_link": content_url}
- )
- return response.json()
|