gzh_api.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. import json
  2. import requests
  3. from tenacity import retry
  4. from requests.exceptions import RequestException
  5. from typing import Optional, Dict
  6. from applications import log
  7. from applications.utils import request_retry
  8. retry_desc = request_retry(retry_times=3, min_retry_delay=2, max_retry_delay=60)
  9. base_url = "http://crawler-cn.aiddit.com/crawler/wei_xin"
  10. headers = {"Content-Type": "application/json"}
  11. def send_post_request(url, data):
  12. try:
  13. response = requests.post(url, headers=headers, data=data, timeout=60)
  14. response.raise_for_status()
  15. return response.json()
  16. except RequestException as e:
  17. print(f"API请求失败: {e}")
  18. except json.JSONDecodeError as e:
  19. print(f"响应解析失败: {e}")
  20. return None
  21. @retry(**retry_desc)
  22. def search_articles_in_gzh(title: str, page: str = "1") -> Optional[Dict]:
  23. url = f"{base_url}/keyword"
  24. payload = json.dumps(
  25. {
  26. "keyword": title,
  27. "cursor": page
  28. }
  29. )
  30. return send_post_request(url, data=payload)
  31. @retry(**retry_desc)
  32. def get_gzh_article_detail(content_link: str, is_count: bool = False, is_cache: bool =True) -> Optional[Dict]:
  33. url = f"{base_url}/detail"
  34. payload = json.dumps(
  35. {
  36. "content_link": content_link,
  37. "is_count": is_count,
  38. "is_cache": is_cache,
  39. "is_ad": False
  40. }
  41. )
  42. return send_post_request(url, data=payload)
  43. @retry(**retry_desc)
  44. def get_gzh_account_article_list(gh_id: str, index: str = None) -> Optional[Dict]:
  45. url = f"{base_url}/blogger"
  46. payload = json.dumps(
  47. {
  48. "account_id": gh_id,
  49. "cursor": index
  50. }
  51. )
  52. return send_post_request(url, data=payload)
  53. @retry(**retry_desc)
  54. def get_gzh_account_detail(content_link: str) -> Optional[Dict]:
  55. url = f"{base_url}/account_info"
  56. payload = json.dumps(
  57. {
  58. "content_link": content_link
  59. }
  60. )
  61. return send_post_request(url, data=payload)
  62. @retry(**retry_desc)
  63. def get_gzh_recommend_articles(content_link: str) -> Optional[Dict]:
  64. url = f"{base_url}/recommend"
  65. payload = json.dumps(
  66. {
  67. "content_link": content_link
  68. }
  69. )
  70. return send_post_request(url, data=payload)
  71. @retry(**retry_desc)
  72. def get_gzh_recommend_articles_v2(content_link: str) -> Optional[Dict]:
  73. url = "http://datapi.top/wxapi/relatedarticle"
  74. payload = {
  75. 'url': content_link,
  76. 'token': '401e4d3c85068bb5'
  77. }
  78. response = send_post_request(url, data=payload)
  79. log(
  80. task="article_association_crawler",
  81. function="get_recommend_articles_v2",
  82. message="获取推荐链接,付费接口",
  83. data={"content_link": content_link, "response": response},
  84. )
  85. return response