wxSpiderApi.py 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. """
  2. @author: luojunhui
  3. """
  4. import json
  5. import requests
  6. from applications.decoratorApi import retryOnNone
  7. class WeixinSpider(object):
  8. """
  9. Update account articles
  10. """
  11. ip = "8.217.190.241"
  12. # ip = "47.98.154.124"
  13. port = "8888"
  14. @classmethod
  15. @retryOnNone()
  16. def search_articles(cls, title):
  17. """
  18. search articles in wx
  19. :return:
  20. """
  21. url = "http://{}:{}/crawler/wei_xin/keyword".format(cls.ip, cls.port)
  22. payload = json.dumps({
  23. "keyword": title,
  24. "cursor": "1"
  25. })
  26. headers = {
  27. 'Content-Type': 'application/json'
  28. }
  29. response = requests.request("POST", url, headers=headers, data=payload)
  30. return response.json()
  31. @classmethod
  32. @retryOnNone()
  33. def get_article_text(cls, content_link):
  34. """
  35. 获取文章
  36. :param content_link:
  37. :return:
  38. """
  39. url = "http://{}:{}/crawler/wei_xin/detail".format(cls.ip, cls.port)
  40. payload = json.dumps({
  41. "content_link": content_link,
  42. "is_count": False,
  43. "is_ad": False
  44. })
  45. headers = {
  46. 'Content-Type': 'application/json'
  47. }
  48. response = requests.request("POST", url, headers=headers, data=payload)
  49. return response.json()
  50. @classmethod
  51. @retryOnNone()
  52. def update_msg_list(cls, ghId, index):
  53. """
  54. :return:
  55. """
  56. url = 'http://{}:{}/crawler/wei_xin/blogger'.format(cls.ip, cls.port)
  57. payload = {
  58. 'account_id': ghId,
  59. 'cursor': index,
  60. }
  61. headers = {
  62. 'Content-Type': 'application/json'
  63. }
  64. response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=120)
  65. # print("response", response.text)
  66. return response.json()
  67. @classmethod
  68. @retryOnNone()
  69. def get_account_by_url(cls, content_url):
  70. """
  71. 通过文章获取账号信息
  72. :param content_url:
  73. :return:
  74. """
  75. response = requests.request(
  76. "POST",
  77. url='http://{}:{}/crawler/wei_xin/account_info'.format(cls.ip, cls.port),
  78. headers={'Content-Type': 'application/json'},
  79. json={"content_link": content_url}
  80. )
  81. return response.json()