wxSpiderApi.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. """
  2. @author: luojunhui
  3. """
  4. import json
  5. import requests
  6. from applications.decoratorApi import retryOnNone
  7. class WeixinSpider(object):
  8. """
  9. Update account articles
  10. """
  11. # ip = "8.217.190.241"
  12. # ip = "47.98.154.124"
  13. # port = "8888"
  14. base_url = "http://crawler-cn.aiddit.com/crawler/wei_xin"
  15. headers = {
  16. "Content-Type": "application/json"
  17. }
  18. @classmethod
  19. @retryOnNone()
  20. def search_articles(cls, title, page="1") -> dict:
  21. """
  22. search articles in wx
  23. :return:
  24. """
  25. url = "{}/keyword".format(cls.base_url)
  26. payload = json.dumps({
  27. "keyword": title,
  28. "cursor": page
  29. })
  30. response = requests.request("POST", url, headers=cls.headers, data=payload, timeout=120)
  31. return response.json()
  32. @classmethod
  33. @retryOnNone()
  34. def get_article_text(cls, content_link, is_count=False, is_cache=True) -> dict:
  35. """
  36. 获取文章
  37. :param is_cache:
  38. :param is_count:
  39. :param content_link:
  40. :return:
  41. """
  42. url = "{}/detail".format(cls.base_url)
  43. payload = json.dumps({
  44. "content_link": content_link,
  45. "is_count": is_count,
  46. "is_ad": False,
  47. "is_cache": is_cache
  48. })
  49. response = requests.request("POST", url, headers=cls.headers, data=payload, timeout=120)
  50. return response.json()
  51. @classmethod
  52. @retryOnNone()
  53. def update_msg_list(cls, ghId, index) -> dict:
  54. """
  55. :return:
  56. """
  57. url = '{}/blogger'.format(cls.base_url)
  58. payload = {
  59. 'account_id': ghId,
  60. 'cursor': index,
  61. }
  62. response = requests.post(url=url, headers=cls.headers, data=json.dumps(payload), timeout=120)
  63. return response.json()
  64. @classmethod
  65. @retryOnNone()
  66. def get_account_by_url(cls, content_url) -> dict:
  67. """
  68. 通过文章获取账号信息
  69. :param content_url:
  70. :return:
  71. """
  72. url = '{}/account_info'.format(cls.base_url)
  73. data = {"content_link": content_url}
  74. response = requests.request("POST", url=url, headers=cls.headers, json=data, timeout=120)
  75. return response.json()
  76. @classmethod
  77. def get_recommend_articles(cls, content_link) -> dict:
  78. """
  79. use content link to get recommend articles
  80. :param content_link:
  81. :return:
  82. """
  83. url = "{}/recommend".format(cls.base_url)
  84. payload = json.dumps(
  85. {"content_link": content_link}
  86. )
  87. response = requests.request("POST", url=url, headers=cls.headers, data=payload, timeout=120)
  88. return response.json()