123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129 |
- from __future__ import annotations
- import re
- import json
- import requests
- from fake_useragent import FakeUserAgent
- from tenacity import retry
- from applications import log
- from applications.utils import request_retry
- retry_desc = request_retry(retry_times=3, min_retry_delay=2, max_retry_delay=30)
- # url from aigc
- base_url = "http://crawler-cn.aiddit.com/crawler/wei_xin"
- headers = {"Content-Type": "application/json"}
- @retry(**retry_desc)
- def get_article_detail(
- article_link: str, is_count: bool=False, is_cache: bool=True
- ) -> dict | None:
- """
- get official article detail
- """
- target_url = f"{base_url}/detail"
- payload = json.dumps(
- {
- "content_link": article_link,
- "is_count": is_count,
- "is_ad": False,
- "is_cache": is_cache
- }
- )
- try:
- response = requests.post(
- url=target_url, headers=headers, data=payload, timeout=120
- )
- response.raise_for_status()
- return response.json()
- except requests.exceptions.RequestException as e:
- log(
- task="get_official_article_detail",
- function="get_official_article_detail",
- message=f"API请求失败: {e}",
- data={"link": article_link}
- )
- except json.JSONDecodeError as e:
- log(
- task="get_official_article_detail",
- function="get_official_article_detail",
- message=f"响应解析失败: {e}",
- data={"link": article_link}
- )
- return None
- @retry(**retry_desc)
- def get_article_list_from_account(
- account_id: str, index=None
- ) -> dict | None:
- target_url = f"{base_url}/blogger"
- payload = json.dumps(
- {
- "account_id": account_id,
- "cursor": index
- }
- )
- try:
- response = requests.post(
- url=target_url, headers=headers, data=payload, timeout=120
- )
- response.raise_for_status()
- return response.json()
- except requests.exceptions.RequestException as e:
- log(
- task="get_official_account_article_list",
- function="get_official_account_article_list",
- message=f"API请求失败: {e}",
- data={"gh_id": account_id}
- )
- except json.JSONDecodeError as e:
- log(
- task="get_official_account_article_list",
- function="get_official_account_article_list",
- message=f"响应解析失败: {e}",
- data={"gh_id": account_id}
- )
- return None
- @retry(**retry_desc)
- def get_source_account_from_article(article_link) -> dict | None:
- """
- get account info from official article
- :param article_link:
- :return:
- """
- try:
- response = requests.get(url=article_link, headers={'User-Agent': FakeUserAgent().random}, timeout=120)
- response.raise_for_status()
- html_text = response.text
- regex_nickname = r"hit_nickname:\s*'([^']+)'"
- regex_username = r"hit_username:\s*'([^']+)'"
- nickname = re.search(regex_nickname, html_text)
- username = re.search(regex_username, html_text)
- # 输出提取的结果
- if nickname and username:
- return {
- 'name': nickname.group(1),
- 'gh_id': username.group(1)
- }
- else:
- return {}
- except requests.exceptions.RequestException as e:
- log(
- task="get_source_account_from_article",
- function="get_source_account_from_article",
- message=f"API请求失败: {e}",
- data={"link": article_link}
- )
- except json.JSONDecodeError as e:
- log(
- task="get_source_account_from_article",
- function="get_source_account_from_article",
- message=f"响应解析失败: {e}",
- data={"link": article_link}
- )
- return None
|