wxSpiderApi.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. """
  2. @author: luojunhui
  3. """
  4. import json
  5. import requests
  6. from applications.decoratorApi import retryOnNone
  7. class WeixinSpider(object):
  8. """
  9. Update account articles
  10. """
  11. @classmethod
  12. @retryOnNone()
  13. def search_articles(cls, title):
  14. """
  15. search articles in wx
  16. :return:
  17. """
  18. url = "http://8.217.190.241:8888/crawler/wei_xin/keyword"
  19. payload = json.dumps({
  20. "keyword": title,
  21. "cursor": "1"
  22. })
  23. headers = {
  24. 'Content-Type': 'application/json'
  25. }
  26. response = requests.request("POST", url, headers=headers, data=payload)
  27. return response.json()
  28. @classmethod
  29. @retryOnNone()
  30. def get_article_text(cls, content_link):
  31. """
  32. 获取文章
  33. :param content_link:
  34. :return:
  35. """
  36. url = "http://47.98.154.124:8888/crawler/wei_xin/detail"
  37. payload = json.dumps({
  38. "content_link": content_link,
  39. "is_count": False,
  40. "is_ad": False
  41. })
  42. headers = {
  43. 'Content-Type': 'application/json'
  44. }
  45. # print(url)
  46. # print(payload)
  47. response = requests.request("POST", url, headers=headers, data=payload)
  48. print("info", response.text)
  49. return response.json()
  50. @classmethod
  51. @retryOnNone()
  52. def update_msg_list(cls, ghId, index):
  53. """
  54. :return:
  55. """
  56. url = 'http://8.217.190.241:8888/crawler/wei_xin/blogger'
  57. payload = {
  58. 'account_id': ghId,
  59. 'cursor': index,
  60. }
  61. print(payload)
  62. headers = {
  63. 'Content-Type': 'application/json'
  64. }
  65. response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=120)
  66. return response.json()
  67. @classmethod
  68. @retryOnNone()
  69. def get_account_by_url(cls, content_url):
  70. """
  71. 通过文章获取账号信息
  72. :param content_url:
  73. :return:
  74. """
  75. response = requests.request(
  76. "POST",
  77. url='http://8.217.190.241:8888/crawler/wei_xin/account_info',
  78. headers={'Content-Type': 'application/json'},
  79. json={"content_link": content_url}
  80. )
  81. return response.json()