wx_spider_api.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. """
  2. @author: luojunhui
  3. """
  4. import json
  5. import time
  6. import requests
  7. def retryOnNone():
  8. """
  9. 基于None类型数据的重试装饰器
  10. :return:
  11. """
  12. def decorator(func):
  13. """
  14. :param func:
  15. :return:
  16. """
  17. max_retries = 5
  18. wait_seconds = 1
  19. def wrapper(*args, **kwargs):
  20. """
  21. :param args:
  22. :param kwargs:
  23. :return:
  24. """
  25. for attempt in range(max_retries):
  26. response = func(*args, **kwargs)
  27. if response['data'] is not None:
  28. return response
  29. time.sleep(wait_seconds)
  30. return None
  31. return wrapper
  32. return decorator
  33. class WeixinSpider(object):
  34. """
  35. Update account articles
  36. """
  37. @classmethod
  38. @retryOnNone()
  39. def search_articles(cls, title):
  40. """
  41. search articles in wx
  42. :return:
  43. """
  44. url = "http://8.217.190.241:8888/crawler/wei_xin/keyword"
  45. payload = json.dumps({
  46. "keyword": title,
  47. "cursor": "1"
  48. })
  49. headers = {
  50. 'Content-Type': 'application/json'
  51. }
  52. response = requests.request("POST", url, headers=headers, data=payload)
  53. return response.json()
  54. @classmethod
  55. @retryOnNone()
  56. def get_article_text(cls, content_link):
  57. """
  58. 获取文章
  59. :param content_link:
  60. :return:
  61. """
  62. url = "http://8.217.190.241:8888/crawler/wei_xin/detail"
  63. payload = json.dumps({
  64. "content_link": content_link,
  65. "is_count": False,
  66. "is_ad": False
  67. })
  68. headers = {
  69. 'Content-Type': 'application/json'
  70. }
  71. response = requests.request("POST", url, headers=headers, data=payload)
  72. return response.json()
  73. @classmethod
  74. @retryOnNone()
  75. def update_msg_list(cls, ghId, index):
  76. """
  77. :return:
  78. """
  79. url = 'http://8.217.190.241:8888/crawler/wei_xin/blogger'
  80. payload = {
  81. 'account_id': ghId,
  82. 'cursor': index,
  83. }
  84. headers = {
  85. 'Content-Type': 'application/json'
  86. }
  87. response = requests.post(url, headers=headers, data=json.dumps(payload))
  88. return response.json()
  89. @classmethod
  90. @retryOnNone()
  91. def get_account_by_url(cls, content_url):
  92. """
  93. 通过文章获取账号信息
  94. :param content_url:
  95. :return:
  96. """
  97. response = requests.request(
  98. "POST",
  99. url='http://8.217.190.241:8888/crawler/wei_xin/account_info',
  100. headers={'Content-Type': 'application/json'},
  101. json={"content_link": content_url}
  102. )
  103. return response.json()