Server
/
LongArticleTaskServer


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
							from __future__ import annotations

import re
import json
import requests
from fake_useragent import FakeUserAgent
from tenacity import retry

from applications.api import log
from applications.utils import request_retry
from applications.utils import AsyncHttpClient

retry_desc = request_retry(retry_times=3, min_retry_delay=2, max_retry_delay=30)

# url from aigc
base_url = "http://crawler-cn.aiddit.com/crawler/wei_xin"
headers = {"Content-Type": "application/json"}


@retry(**retry_desc)
async def get_article_detail(
    article_link: str, is_count: bool = False, is_cache: bool = True
) -> dict | None:
    """
    get official article detail
    """
    target_url = f"{base_url}/detail"
    payload = json.dumps(
        {
            "content_link": article_link,
            "is_count": is_count,
            "is_ad": False,
            "is_cache": is_cache,
        }
    )
    async with AsyncHttpClient(timeout=10) as http_client:
        response = await http_client.post(target_url, headers=headers, data=payload)

    return response


@retry(**retry_desc)
async def get_article_list_from_account(account_id: str, index=None) -> dict | None:
    target_url = f"{base_url}/blogger"
    payload = json.dumps({"account_id": account_id, "cursor": index})
    async with AsyncHttpClient(timeout=120) as http_client:
        response = await http_client.post(target_url, headers=headers, data=payload)
    return response


@retry(**retry_desc)
def get_source_account_from_article(article_link) -> dict | None:
    """
    get account info from official article
    :param article_link:
    :return:
    """
    try:
        response = requests.get(
            url=article_link,
            headers={"User-Agent": FakeUserAgent().random},
            timeout=120,
        )
        response.raise_for_status()
        html_text = response.text
        regex_nickname = r"hit_nickname:\s*'([^']+)'"
        regex_username = r"hit_username:\s*'([^']+)'"
        nickname = re.search(regex_nickname, html_text)
        username = re.search(regex_username, html_text)
        # 输出提取的结果
        if nickname and username:
            return {"name": nickname.group(1), "gh_id": username.group(1)}
        else:
            return {}
    except requests.exceptions.RequestException as e:
        log(
            task="get_source_account_from_article",
            function="get_source_account_from_article",
            message=f"API请求失败: {e}",
            data={"link": article_link},
        )
    except json.JSONDecodeError as e:
        log(
            task="get_source_account_from_article",
            function="get_source_account_from_article",
            message=f"响应解析失败: {e}",
            data={"link": article_link},
        )
    return None


@retry(**retry_desc)
async def weixin_search(keyword: str, page="1") -> dict | None:
    url = "{}/keyword".format(base_url)
    payload = json.dumps({"keyword": keyword, "cursor": page})
    async with AsyncHttpClient(timeout=120) as http_client:
        response = await http_client.post(url=url, headers=headers, data=payload)

    return response