wxSpiderApi.py 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. """
  2. @author: luojunhui
  3. """
  4. import json
  5. import requests
  6. from applications.decoratorApi import retryOnNone
  7. class WeixinSpider(object):
  8. """
  9. Update account articles
  10. """
  11. # ip = "8.217.190.241"
  12. ip = "47.98.154.124"
  13. port = "8888"
  14. @classmethod
  15. @retryOnNone()
  16. def search_articles(cls, title):
  17. """
  18. search articles in wx
  19. :return:
  20. """
  21. url = "http://{}:{}/crawler/wei_xin/keyword".format(cls.ip, cls.port)
  22. payload = json.dumps({
  23. "keyword": title,
  24. "cursor": "1"
  25. })
  26. headers = {
  27. 'Content-Type': 'application/json'
  28. }
  29. response = requests.request("POST", url, headers=headers, data=payload)
  30. return response.json()
  31. @classmethod
  32. @retryOnNone()
  33. def get_article_text(cls, content_link):
  34. """
  35. 获取文章
  36. :param content_link:
  37. :return:
  38. """
  39. url = "http://{}:{}/crawler/wei_xin/detail".format(cls.ip, cls.port)
  40. payload = json.dumps({
  41. "content_link": content_link,
  42. "is_count": False,
  43. "is_ad": False
  44. })
  45. headers = {
  46. 'Content-Type': 'application/json'
  47. }
  48. response = requests.request("POST", url, headers=headers, data=payload)
  49. return response.json()
  50. @classmethod
  51. @retryOnNone()
  52. def update_msg_list(cls, ghId, index):
  53. """
  54. :return:
  55. """
  56. url = 'http://{}:{}/crawler/wei_xin/blogger'.format(cls.ip, cls.port)
  57. payload = {
  58. 'account_id': ghId,
  59. 'cursor': index,
  60. }
  61. headers = {
  62. 'Content-Type': 'application/json'
  63. }
  64. response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=120)
  65. return response.json()
  66. @classmethod
  67. @retryOnNone()
  68. def get_account_by_url(cls, content_url):
  69. """
  70. 通过文章获取账号信息
  71. :param content_url:
  72. :return:
  73. """
  74. response = requests.request(
  75. "POST",
  76. url='http://{}:{}/crawler/wei_xin/account_info'.format(cls.ip, cls.port),
  77. headers={'Content-Type': 'application/json'},
  78. json={"content_link": content_url}
  79. )
  80. return response.json()