| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107 |
- """V3-M1C: 视频号 client search/normalization/retry/blocked tests."""
- from __future__ import annotations
- import httpx
- import pytest
- from content_agent.errors import ContentAgentError, ErrorCode
- from content_agent.integrations.shipinhao import CrawapiShipinhaoClient
- class FakeHttpClient:
- def __init__(self, responses):
- self.responses = list(responses)
- self.requests = []
- def post(self, url, json, headers, timeout):
- self.requests.append({"url": url, "json": json})
- return self.responses.pop(0)
- def _response(status_code, data):
- return httpx.Response(
- status_code, json=data, request=httpx.Request("POST", "http://crawler.test/x")
- )
- def _query():
- return {
- "search_query_id": "q_001",
- "search_query": "彩虹",
- "discovery_start_source": "pattern_itemset",
- }
- def _client(responses):
- sleeps: list[float] = []
- client = CrawapiShipinhaoClient(
- base_url="http://crawler.test",
- http_client=FakeHttpClient(responses),
- sleep_fn=sleeps.append,
- )
- return client, sleeps
- _SUCCESS = {
- "code": 0,
- "data": {
- "has_more": True,
- "next_cursor": 12,
- "data": [
- {
- "channel_content_id": "finderobj_abc",
- "title": "圆形彩虹 #彩虹 #见者好运",
- "content_type": "video",
- "video_url_list": [{"video_url": "https://findermp.video.qq.com/x"}],
- "channel_account_id": "acc_123",
- "channel_account_name": "掌上巴彦淖尔",
- "like_count": 92,
- "publish_timestamp": 1780904037000,
- }
- ],
- },
- }
- _FAIL_25011 = {"code": 25011, "msg": "视频号接口异常: 获取搜索结果失败", "data": None}
- def test_shipinhao_search_maps_canonical_fields():
- client, _ = _client([_response(200, _SUCCESS)])
- result = client.search(_query())[0]
- assert result["platform"] == "shipinhao"
- assert result["platform_content_id"] == "finderobj_abc"
- assert result["platform_author_id"] == "acc_123"
- assert result["author_display_name"] == "掌上巴彦淖尔"
- assert result["tags"] == ["#彩虹", "#见者好运"]
- assert result["play_url"] == "https://findermp.video.qq.com/x"
- assert result["statistics"]["digg_count"] == 92
- assert result["create_time"] == 1780904037 # ms -> s
- assert result["has_more"] is True
- assert result["next_cursor"] == "12"
- def test_shipinhao_search_retries_on_25011_then_succeeds():
- client, sleeps = _client([_response(200, _FAIL_25011), _response(200, _SUCCESS)])
- result = client.search(_query())
- assert len(result) == 1
- assert sleeps == [1] # one backoff before the successful retry
- def test_shipinhao_search_does_not_retry_empty_result():
- empty = {"code": 0, "data": {"has_more": False, "next_cursor": "", "data": []}}
- client, sleeps = _client([_response(200, empty)])
- assert client.search(_query()) == []
- assert sleeps == []
- def test_shipinhao_search_raises_after_exhausted():
- client, sleeps = _client([_response(200, _FAIL_25011) for _ in range(3)])
- with pytest.raises(ContentAgentError) as exc:
- client.search(_query())
- assert exc.value.error_code == ErrorCode.PLATFORM_REQUEST_FAILED
- assert sleeps == [1, 2] # backoff before attempts 2 and 3
- def test_shipinhao_fetch_author_works_blocked_returns_empty():
- client, _ = _client([])
- assert client.fetch_author_works({"platform_author_id": "acc_123"}) == []
|