official_accounts_api.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. from __future__ import annotations
  2. import re
  3. import json
  4. import requests
  5. from fake_useragent import FakeUserAgent
  6. from tenacity import retry
  7. from applications import log
  8. from applications.utils import request_retry
  9. retry_desc = request_retry(retry_times=3, min_retry_delay=2, max_retry_delay=30)
  10. # url from aigc
  11. base_url = "http://crawler-cn.aiddit.com/crawler/wei_xin"
  12. headers = {"Content-Type": "application/json"}
  13. @retry(**retry_desc)
  14. def get_article_detail(
  15. article_link: str, is_count: bool=False, is_cache: bool=True
  16. ) -> dict | None:
  17. """
  18. get official article detail
  19. """
  20. target_url = f"{base_url}/detail"
  21. payload = json.dumps(
  22. {
  23. "content_link": article_link,
  24. "is_count": is_count,
  25. "is_ad": False,
  26. "is_cache": is_cache
  27. }
  28. )
  29. try:
  30. response = requests.post(
  31. url=target_url, headers=headers, data=payload, timeout=120
  32. )
  33. response.raise_for_status()
  34. return response.json()
  35. except requests.exceptions.RequestException as e:
  36. log(
  37. task="get_official_article_detail",
  38. function="get_official_article_detail",
  39. message=f"API请求失败: {e}",
  40. data={"link": article_link}
  41. )
  42. except json.JSONDecodeError as e:
  43. log(
  44. task="get_official_article_detail",
  45. function="get_official_article_detail",
  46. message=f"响应解析失败: {e}",
  47. data={"link": article_link}
  48. )
  49. return None
  50. @retry(**retry_desc)
  51. def get_article_list_from_account(
  52. account_id: str, index=None
  53. ) -> dict | None:
  54. target_url = f"{base_url}/blogger"
  55. payload = json.dumps(
  56. {
  57. "account_id": account_id,
  58. "cursor": index
  59. }
  60. )
  61. try:
  62. response = requests.post(
  63. url=target_url, headers=headers, data=payload, timeout=120
  64. )
  65. response.raise_for_status()
  66. return response.json()
  67. except requests.exceptions.RequestException as e:
  68. log(
  69. task="get_official_account_article_list",
  70. function="get_official_account_article_list",
  71. message=f"API请求失败: {e}",
  72. data={"gh_id": account_id}
  73. )
  74. except json.JSONDecodeError as e:
  75. log(
  76. task="get_official_account_article_list",
  77. function="get_official_account_article_list",
  78. message=f"响应解析失败: {e}",
  79. data={"gh_id": account_id}
  80. )
  81. return None
  82. @retry(**retry_desc)
  83. def get_source_account_from_article(article_link) -> dict | None:
  84. """
  85. get account info from official article
  86. :param article_link:
  87. :return:
  88. """
  89. try:
  90. response = requests.get(url=article_link, headers={'User-Agent': FakeUserAgent().random}, timeout=120)
  91. response.raise_for_status()
  92. html_text = response.text
  93. regex_nickname = r"hit_nickname:\s*'([^']+)'"
  94. regex_username = r"hit_username:\s*'([^']+)'"
  95. nickname = re.search(regex_nickname, html_text)
  96. username = re.search(regex_username, html_text)
  97. # 输出提取的结果
  98. if nickname and username:
  99. return {
  100. 'name': nickname.group(1),
  101. 'gh_id': username.group(1)
  102. }
  103. else:
  104. return {}
  105. except requests.exceptions.RequestException as e:
  106. log(
  107. task="get_source_account_from_article",
  108. function="get_source_account_from_article",
  109. message=f"API请求失败: {e}",
  110. data={"link": article_link}
  111. )
  112. except json.JSONDecodeError as e:
  113. log(
  114. task="get_source_account_from_article",
  115. function="get_source_account_from_article",
  116. message=f"响应解析失败: {e}",
  117. data={"link": article_link}
  118. )
  119. return None