from __future__ import annotations import re import json import requests from fake_useragent import FakeUserAgent from tenacity import retry from applications import log from applications.utils import request_retry retry_desc = request_retry(retry_times=3, min_retry_delay=2, max_retry_delay=30) # url from aigc base_url = "http://crawler-cn.aiddit.com/crawler/wei_xin" headers = {"Content-Type": "application/json"} @retry(**retry_desc) def get_article_detail( article_link: str, is_count: bool=False, is_cache: bool=True ) -> dict | None: """ get official article detail """ target_url = f"{base_url}/detail" payload = json.dumps( { "content_link": article_link, "is_count": is_count, "is_ad": False, "is_cache": is_cache } ) try: response = requests.post( url=target_url, headers=headers, data=payload, timeout=120 ) response.raise_for_status() return response.json() except requests.exceptions.RequestException as e: log( task="get_official_article_detail", function="get_official_article_detail", message=f"API请求失败: {e}", data={"link": article_link} ) except json.JSONDecodeError as e: log( task="get_official_article_detail", function="get_official_article_detail", message=f"响应解析失败: {e}", data={"link": article_link} ) return None @retry(**retry_desc) def get_article_list_from_account( account_id: str, index=None ) -> dict | None: target_url = f"{base_url}/blogger" payload = json.dumps( { "account_id": account_id, "cursor": index } ) try: response = requests.post( url=target_url, headers=headers, data=payload, timeout=120 ) response.raise_for_status() return response.json() except requests.exceptions.RequestException as e: log( task="get_official_account_article_list", function="get_official_account_article_list", message=f"API请求失败: {e}", data={"gh_id": account_id} ) except json.JSONDecodeError as e: log( task="get_official_account_article_list", function="get_official_account_article_list", message=f"响应解析失败: {e}", data={"gh_id": account_id} ) return None @retry(**retry_desc) def get_source_account_from_article(article_link) -> dict | None: """ get account info from official article :param article_link: :return: """ try: response = requests.get(url=article_link, headers={'User-Agent': FakeUserAgent().random}, timeout=120) response.raise_for_status() html_text = response.text regex_nickname = r"hit_nickname:\s*'([^']+)'" regex_username = r"hit_username:\s*'([^']+)'" nickname = re.search(regex_nickname, html_text) username = re.search(regex_username, html_text) # 输出提取的结果 if nickname and username: return { 'name': nickname.group(1), 'gh_id': username.group(1) } else: return {} except requests.exceptions.RequestException as e: log( task="get_source_account_from_article", function="get_source_account_from_article", message=f"API请求失败: {e}", data={"link": article_link} ) except json.JSONDecodeError as e: log( task="get_source_account_from_article", function="get_source_account_from_article", message=f"响应解析失败: {e}", data={"link": article_link} ) return None