test_shipinhao_client.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. """V3-M1C: 视频号 client search/normalization/retry/blocked tests."""
  2. from __future__ import annotations
  3. import httpx
  4. import pytest
  5. from content_agent.errors import ContentAgentError, ErrorCode
  6. from content_agent.integrations.shipinhao import CrawapiShipinhaoClient
  7. class FakeHttpClient:
  8. def __init__(self, responses):
  9. self.responses = list(responses)
  10. self.requests = []
  11. def post(self, url, json, headers, timeout):
  12. self.requests.append({"url": url, "json": json})
  13. return self.responses.pop(0)
  14. def _response(status_code, data):
  15. return httpx.Response(
  16. status_code, json=data, request=httpx.Request("POST", "http://crawler.test/x")
  17. )
  18. def _query():
  19. return {
  20. "search_query_id": "q_001",
  21. "search_query": "彩虹",
  22. "discovery_start_source": "pattern_itemset",
  23. }
  24. def _client(responses):
  25. sleeps: list[float] = []
  26. client = CrawapiShipinhaoClient(
  27. base_url="http://crawler.test",
  28. http_client=FakeHttpClient(responses),
  29. sleep_fn=sleeps.append,
  30. )
  31. return client, sleeps
  32. _SUCCESS = {
  33. "code": 0,
  34. "data": {
  35. "has_more": True,
  36. "next_cursor": 12,
  37. "data": [
  38. {
  39. "channel_content_id": "finderobj_abc",
  40. "title": "圆形彩虹 #彩虹 #见者好运",
  41. "content_type": "video",
  42. "video_url_list": [{"video_url": "https://findermp.video.qq.com/x"}],
  43. "channel_account_id": "acc_123",
  44. "channel_account_name": "掌上巴彦淖尔",
  45. "like_count": 92,
  46. "publish_timestamp": 1780904037000,
  47. }
  48. ],
  49. },
  50. }
  51. _FAIL_25011 = {"code": 25011, "msg": "视频号接口异常: 获取搜索结果失败", "data": None}
  52. def test_shipinhao_search_maps_canonical_fields():
  53. client, _ = _client([_response(200, _SUCCESS)])
  54. result = client.search(_query())[0]
  55. assert result["platform"] == "shipinhao"
  56. assert result["platform_content_id"] == "finderobj_abc"
  57. assert result["platform_author_id"] == "acc_123"
  58. assert result["author_display_name"] == "掌上巴彦淖尔"
  59. assert result["tags"] == ["#彩虹", "#见者好运"]
  60. assert result["play_url"] == "https://findermp.video.qq.com/x"
  61. assert result["statistics"]["digg_count"] == 92
  62. assert result["create_time"] == 1780904037 # ms -> s
  63. assert result["has_more"] is True
  64. assert result["next_cursor"] == "12"
  65. def test_shipinhao_search_retries_on_25011_then_succeeds():
  66. client, sleeps = _client([_response(200, _FAIL_25011), _response(200, _SUCCESS)])
  67. result = client.search(_query())
  68. assert len(result) == 1
  69. assert sleeps == [1] # one backoff before the successful retry
  70. def test_shipinhao_search_does_not_retry_empty_result():
  71. empty = {"code": 0, "data": {"has_more": False, "next_cursor": "", "data": []}}
  72. client, sleeps = _client([_response(200, empty)])
  73. assert client.search(_query()) == []
  74. assert sleeps == []
  75. def test_shipinhao_search_raises_after_exhausted():
  76. client, sleeps = _client([_response(200, _FAIL_25011) for _ in range(3)])
  77. with pytest.raises(ContentAgentError) as exc:
  78. client.search(_query())
  79. assert exc.value.error_code == ErrorCode.PLATFORM_REQUEST_FAILED
  80. assert sleeps == [1, 2] # backoff before attempts 2 and 3
  81. def test_shipinhao_fetch_author_works_blocked_returns_empty():
  82. client, _ = _client([])
  83. assert client.fetch_author_works({"platform_author_id": "acc_123"}) == []