wxSpiderApi.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. """
  2. @author: luojunhui
  3. """
  4. import json
  5. import time
  6. import requests
  7. from applications import log
  8. from applications.decoratorApi import retryOnNone
  9. class WeixinSpider(object):
  10. """
  11. Update account articles
  12. """
  13. # ip = "8.217.190.241"
  14. # ip = "47.98.154.124"
  15. # port = "8888"
  16. base_url = "http://crawler-cn.aiddit.com/crawler/wei_xin"
  17. headers = {
  18. "Content-Type": "application/json"
  19. }
  20. @classmethod
  21. @retryOnNone()
  22. def search_articles(cls, title, page="1") -> dict:
  23. """
  24. search articles in wx
  25. :return:
  26. """
  27. url = "{}/keyword".format(cls.base_url)
  28. payload = json.dumps({
  29. "keyword": title,
  30. "cursor": page
  31. })
  32. response = requests.request("POST", url, headers=cls.headers, data=payload, timeout=120)
  33. return response.json()
  34. @classmethod
  35. # @retryOnNone()
  36. def get_article_text(cls, content_link, is_count=False, is_cache=True) -> dict:
  37. """
  38. 获取文章
  39. :param is_cache:
  40. :param is_count:
  41. :param content_link:
  42. :return:
  43. """
  44. url = "{}/detail".format(cls.base_url)
  45. payload = json.dumps({
  46. "content_link": content_link,
  47. "is_count": is_count,
  48. "is_ad": False,
  49. "is_cache": is_cache
  50. })
  51. response = requests.request("POST", url, headers=cls.headers, data=payload, timeout=120)
  52. return response.json()
  53. @classmethod
  54. @retryOnNone()
  55. def update_msg_list(cls, ghId, index) -> dict:
  56. """
  57. :return:
  58. """
  59. url = '{}/blogger'.format(cls.base_url)
  60. payload = {
  61. 'account_id': ghId,
  62. 'cursor': index,
  63. }
  64. response = requests.post(url=url, headers=cls.headers, data=json.dumps(payload), timeout=120)
  65. return response.json()
  66. @classmethod
  67. @retryOnNone()
  68. def get_account_by_url(cls, content_url) -> dict:
  69. """
  70. 通过文章获取账号信息
  71. :param content_url:
  72. :return:
  73. """
  74. url = '{}/account_info'.format(cls.base_url)
  75. data = {"content_link": content_url}
  76. response = requests.request("POST", url=url, headers=cls.headers, json=data, timeout=120)
  77. return response.json()
  78. @classmethod
  79. def get_recommend_articles(cls, content_link) -> dict:
  80. """
  81. use content link to get recommend articles
  82. :param content_link:
  83. :return:
  84. """
  85. url = "{}/recommend".format(cls.base_url)
  86. payload = json.dumps(
  87. {"content_link": content_link}
  88. )
  89. response = requests.request("POST", url=url, headers=cls.headers, data=payload, timeout=120)
  90. response_json = response.json()
  91. if response_json['code'] != 0:
  92. return cls.get_recommend_articles(content_link)
  93. time.sleep(3)
  94. return response.json()
  95. @classmethod
  96. def get_recommend_articles_v2(cls, content_link) -> dict:
  97. """
  98. use content link to get recommend articles
  99. :param content_link:
  100. :return:
  101. """
  102. url = 'http://datapi.top/wxapi/relatedarticle'
  103. payload = json.dumps(
  104. {
  105. "content_link": content_link,
  106. "token": "401e4d3c85068bb5"
  107. }
  108. )
  109. response = requests.request("POST", url=url, headers=cls.headers, data=payload, timeout=120)
  110. log(
  111. task="article_association_crawler",
  112. function="get_recommend_articles_v2",
  113. message="获取推荐链接,付费接口",
  114. data={
  115. "content_link": content_link,
  116. "response": response.json()
  117. }
  118. )
  119. time.sleep(3)
  120. return response.json()