wxSpiderApi.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. """
  2. @author: luojunhui
  3. """
  4. import json
  5. import time
  6. import requests
  7. from applications.decoratorApi import retryOnNone
  8. class WeixinSpider(object):
  9. """
  10. Update account articles
  11. """
  12. # ip = "8.217.190.241"
  13. # ip = "47.98.154.124"
  14. # port = "8888"
  15. base_url = "http://crawler-cn.aiddit.com/crawler/wei_xin"
  16. headers = {
  17. "Content-Type": "application/json"
  18. }
  19. @classmethod
  20. @retryOnNone()
  21. def search_articles(cls, title, page="1") -> dict:
  22. """
  23. search articles in wx
  24. :return:
  25. """
  26. url = "{}/keyword".format(cls.base_url)
  27. payload = json.dumps({
  28. "keyword": title,
  29. "cursor": page
  30. })
  31. response = requests.request("POST", url, headers=cls.headers, data=payload, timeout=120)
  32. return response.json()
  33. @classmethod
  34. # @retryOnNone()
  35. def get_article_text(cls, content_link, is_count=False, is_cache=True) -> dict:
  36. """
  37. 获取文章
  38. :param is_cache:
  39. :param is_count:
  40. :param content_link:
  41. :return:
  42. """
  43. url = "{}/detail".format(cls.base_url)
  44. payload = json.dumps({
  45. "content_link": content_link,
  46. "is_count": is_count,
  47. "is_ad": False,
  48. "is_cache": is_cache
  49. })
  50. response = requests.request("POST", url, headers=cls.headers, data=payload, timeout=120)
  51. return response.json()
  52. @classmethod
  53. @retryOnNone()
  54. def update_msg_list(cls, ghId, index) -> dict:
  55. """
  56. :return:
  57. """
  58. url = '{}/blogger'.format(cls.base_url)
  59. payload = {
  60. 'account_id': ghId,
  61. 'cursor': index,
  62. }
  63. response = requests.post(url=url, headers=cls.headers, data=json.dumps(payload), timeout=120)
  64. return response.json()
  65. @classmethod
  66. @retryOnNone()
  67. def get_account_by_url(cls, content_url) -> dict:
  68. """
  69. 通过文章获取账号信息
  70. :param content_url:
  71. :return:
  72. """
  73. url = '{}/account_info'.format(cls.base_url)
  74. data = {"content_link": content_url}
  75. response = requests.request("POST", url=url, headers=cls.headers, json=data, timeout=120)
  76. return response.json()
  77. @classmethod
  78. def get_recommend_articles(cls, content_link) -> dict:
  79. """
  80. use content link to get recommend articles
  81. :param content_link:
  82. :return:
  83. """
  84. url = "{}/recommend".format(cls.base_url)
  85. payload = json.dumps(
  86. {"content_link": content_link}
  87. )
  88. response = requests.request("POST", url=url, headers=cls.headers, data=payload, timeout=120)
  89. response_json = response.json()
  90. if response_json['code'] != 0:
  91. return cls.get_recommend_articles(content_link)
  92. time.sleep(3)
  93. return response.json()
  94. @classmethod
  95. def get_recommend_articles_v2(cls, content_link) -> dict:
  96. """
  97. use content link to get recommend articles
  98. :param content_link:
  99. :return:
  100. """
  101. url = 'http://datapi.top/wxapi/relatedarticle'
  102. payload = json.dumps(
  103. {
  104. "content_link": content_link,
  105. "token": "401e4d3c85068bb5"
  106. }
  107. )
  108. response = requests.request("POST", url=url, headers=cls.headers, data=payload, timeout=120)
  109. time.sleep(3)
  110. return response.json()