from __future__ import annotations import re import json import requests from fake_useragent import FakeUserAgent from tenacity import retry from applications.api import log from applications.utils import request_retry retry_desc = request_retry(retry_times=3, min_retry_delay=2, max_retry_delay=30) # url from aigc base_url = "http://crawler-cn.aiddit.com/crawler/wei_xin" headers = {"Content-Type": "application/json"} @retry(**retry_desc) def get_article_detail( article_link: str, is_count: bool = False, is_cache: bool = True ) -> dict | None: """ get official article detail """ target_url = f"{base_url}/detail" payload = json.dumps( { "content_link": article_link, "is_count": is_count, "is_ad": False, "is_cache": is_cache, } ) try: response = requests.post( url=target_url, headers=headers, data=payload, timeout=120 ) response.raise_for_status() return response.json() except requests.exceptions.RequestException as e: log( task="get_official_article_detail", function="get_official_article_detail", message=f"API请求失败: {e}", data={"link": article_link}, ) except json.JSONDecodeError as e: log( task="get_official_article_detail", function="get_official_article_detail", message=f"响应解析失败: {e}", data={"link": article_link}, ) return None @retry(**retry_desc) def get_article_list_from_account(account_id: str, index=None) -> dict | None: target_url = f"{base_url}/blogger" payload = json.dumps({"account_id": account_id, "cursor": index}) try: response = requests.post( url=target_url, headers=headers, data=payload, timeout=120 ) response.raise_for_status() return response.json() except requests.exceptions.RequestException as e: log( task="get_official_account_article_list", function="get_official_account_article_list", message=f"API请求失败: {e}", data={"gh_id": account_id}, ) except json.JSONDecodeError as e: log( task="get_official_account_article_list", function="get_official_account_article_list", message=f"响应解析失败: {e}", data={"gh_id": account_id}, ) return None @retry(**retry_desc) def get_source_account_from_article(article_link) -> dict | None: """ get account info from official article :param article_link: :return: """ try: response = requests.get( url=article_link, headers={"User-Agent": FakeUserAgent().random}, timeout=120, ) response.raise_for_status() html_text = response.text regex_nickname = r"hit_nickname:\s*'([^']+)'" regex_username = r"hit_username:\s*'([^']+)'" nickname = re.search(regex_nickname, html_text) username = re.search(regex_username, html_text) # 输出提取的结果 if nickname and username: return {"name": nickname.group(1), "gh_id": username.group(1)} else: return {} except requests.exceptions.RequestException as e: log( task="get_source_account_from_article", function="get_source_account_from_article", message=f"API请求失败: {e}", data={"link": article_link}, ) except json.JSONDecodeError as e: log( task="get_source_account_from_article", function="get_source_account_from_article", message=f"响应解析失败: {e}", data={"link": article_link}, ) return None