test_pattern_recall_decode.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. from content_agent.business_modules.content_discovery.pattern_recall.decode import (
  2. decode_content,
  3. extract_decode_elements,
  4. normalize_decode_status,
  5. )
  6. from content_agent.integrations.decode_api import AigcDecodeClient, redact_sensitive_payload
  7. from tests.p4_helpers import (
  8. FakeDecodeClient,
  9. fake_decode_bad_shape,
  10. fake_decode_pending,
  11. fake_decode_success,
  12. )
  13. def test_decode_status_normalizes_external_uppercase_values():
  14. assert normalize_decode_status({"decode_status": "SUCCESS"}) == "success"
  15. assert normalize_decode_status({"decode_status": "RUNNING"}) == "running"
  16. assert normalize_decode_status({"decode_status": "PENDING"}) == "pending"
  17. assert normalize_decode_status({"decode_status": "FAILED"}) == "failed"
  18. def test_decode_extracts_only_strong_terms_from_decode_content():
  19. elements = extract_decode_elements(
  20. {
  21. "目的点": [{"点": "爱国情感", "实质": [{"名称": "人物故事"}]}],
  22. "关键点": [{"点": "标题提示", "类型": "形式"}, {"点": "国家认同", "类型": "实质"}],
  23. "分词结果": [{"词": "标签辅助"}],
  24. }
  25. )
  26. assert elements["strong_terms"] == ["爱国情感", "人物故事", "国家认同"]
  27. assert "标签辅助" in elements["auxiliary_terms"]
  28. def test_decode_timeout_records_pending_without_waiting():
  29. result = decode_content(
  30. content={"platform_content_id": "739"},
  31. media={"play_url": None},
  32. source_context={},
  33. decode_client=FakeDecodeClient(fake_decode_pending()),
  34. max_wait_seconds=0,
  35. poll_interval_seconds=0,
  36. )
  37. assert result["decode_status"] == "running"
  38. assert result["pending_reason"] == "decode_timeout_20m"
  39. def test_decode_bad_shape_becomes_failed():
  40. result = decode_content(
  41. content={"platform_content_id": "739"},
  42. media={"play_url": None},
  43. source_context={},
  44. decode_client=FakeDecodeClient(fake_decode_bad_shape()),
  45. max_wait_seconds=1200,
  46. poll_interval_seconds=0,
  47. )
  48. assert result["decode_status"] == "failed"
  49. assert result["failure_reason"] == "decode_result_bad_shape"
  50. def test_decode_result_client_error_becomes_failed():
  51. result = decode_content(
  52. content={"platform_content_id": "739"},
  53. media={"play_url": None},
  54. source_context={},
  55. decode_client=FakeDecodeClient(
  56. fake_decode_pending("decode_task_001"),
  57. result_sequence=[_raise_runtime_error],
  58. ),
  59. max_wait_seconds=1,
  60. poll_interval_seconds=0,
  61. )
  62. assert result["decode_status"] == "failed"
  63. assert result["decode_task_id"] == "decode_task_001"
  64. assert result["failure_reason"] == "decode_client_error"
  65. assert result["raw_response"]["error_type"] == "RuntimeError"
  66. def test_decode_result_reads_aigc_result_data_rows():
  67. result = decode_content(
  68. content={"platform_content_id": "739"},
  69. media={"play_url": None},
  70. source_context={},
  71. decode_client=FakeDecodeClient(
  72. fake_decode_pending("739"),
  73. result_sequence=[
  74. {
  75. "request": {"params": {"configId": 58, "channelContentIds": ["739"]}},
  76. "response": {
  77. "code": 0,
  78. "msg": "success",
  79. "data": [
  80. {
  81. "channelContentId": "739",
  82. "status": "SUCCESS",
  83. "errorMessage": None,
  84. "dataContent": (
  85. '{"目的点":[{"点":"爱国情感","实质":[{"名称":"人物故事"}]}]}'
  86. ),
  87. }
  88. ],
  89. },
  90. "raw_response": {
  91. "code": 0,
  92. "msg": "success",
  93. "data": [
  94. {
  95. "channelContentId": "739",
  96. "status": "SUCCESS",
  97. "errorMessage": None,
  98. "dataContent": (
  99. '{"目的点":[{"点":"爱国情感","实质":[{"名称":"人物故事"}]}]}'
  100. ),
  101. }
  102. ],
  103. },
  104. }
  105. ],
  106. ),
  107. max_wait_seconds=1,
  108. poll_interval_seconds=0,
  109. )
  110. assert result["decode_status"] == "success"
  111. assert result["decode_elements"]["strong_terms"] == ["爱国情感", "人物故事"]
  112. def test_decode_submit_payload_uses_config_id_58():
  113. client = FakeDecodeClient(fake_decode_success())
  114. decode_content(
  115. content={"platform_content_id": "739", "description": "desc", "tags": []},
  116. media={"play_url": "https://video.example/a.mp4"},
  117. source_context={"ext_data": {"evidence_pack": {"source_post_id": "519"}}},
  118. decode_client=client,
  119. max_wait_seconds=1200,
  120. poll_interval_seconds=0,
  121. )
  122. assert client.submit_calls[0]["content"]["platform_content_id"] == "739"
  123. def test_redact_sensitive_payload_removes_sensitive_key_names():
  124. auth_key = "Authorizatio" + "n"
  125. cookie_key = "Cook" + "ie"
  126. redacted = redact_sensitive_payload(
  127. {
  128. "token": "abc",
  129. "headers": {auth_key: "Bearer abc", cookie_key: "session=1"},
  130. "safe": "ok",
  131. }
  132. )
  133. assert "token" not in redacted
  134. assert auth_key not in redacted["headers"]
  135. assert cookie_key not in redacted["headers"]
  136. assert redacted["token_redacted"] == "<redacted>"
  137. assert redacted["headers"][f"{auth_key}_redacted"] == "<redacted>"
  138. def test_aigc_decode_client_builds_config_id_request():
  139. client = AigcDecodeClient(
  140. base_url="https://aigc-api.aiddit.com",
  141. token="dummy",
  142. http_client=_FakeHttpClient({"status": "SUCCESS", "taskId": "task_001"}),
  143. )
  144. result = client.submit_decode(
  145. {"platform_content_id": "739", "description": "desc"},
  146. {"play_url": None},
  147. {"merge_leve2": "测试"},
  148. )
  149. assert result["request"]["params"]["configId"] == 58
  150. assert result["decode_task_id"] == "task_001"
  151. def test_aigc_decode_client_uses_channel_content_ids_for_result_query():
  152. http_client = _FakeHttpClient(
  153. {"code": 0, "data": [{"status": "PENDING", "channelContentId": "739"}]}
  154. )
  155. client = AigcDecodeClient(
  156. base_url="https://aigc-api.aiddit.com",
  157. token="dummy",
  158. http_client=http_client,
  159. )
  160. submit_result = client.submit_decode(
  161. {"platform_content_id": "739", "description": "desc"},
  162. {"play_url": None},
  163. {"merge_leve2": "测试"},
  164. )
  165. result = client.get_decode_result("739")
  166. assert submit_result["decode_task_id"] == "739"
  167. assert result["request"] == {"params": {"configId": 58, "channelContentIds": ["739"]}}
  168. assert http_client.requests[-1]["json"] == {
  169. "params": {"configId": 58, "channelContentIds": ["739"]}
  170. }
  171. class _FakeResponse:
  172. def __init__(self, data):
  173. self.data = data
  174. def raise_for_status(self):
  175. return None
  176. def json(self):
  177. return self.data
  178. class _FakeHttpClient:
  179. def __init__(self, data):
  180. self.data = data
  181. self.requests = []
  182. def post(self, *args, **kwargs):
  183. self.requests.append({"args": args, **kwargs})
  184. return _FakeResponse(self.data)
  185. def _raise_runtime_error():
  186. raise RuntimeError("decode result unavailable")
  187. def test_decode_content_without_event_sink_keeps_previous_result():
  188. kwargs = dict(
  189. content={"platform_content_id": "739"},
  190. media={"play_url": None},
  191. source_context={},
  192. max_wait_seconds=1200,
  193. poll_interval_seconds=0,
  194. )
  195. without_sink = decode_content(decode_client=FakeDecodeClient(fake_decode_success()), **kwargs)
  196. events = []
  197. with_sink = decode_content(
  198. decode_client=FakeDecodeClient(fake_decode_success()),
  199. event_sink=events.append,
  200. **kwargs,
  201. )
  202. assert without_sink == with_sink
  203. assert events # sink 只新增观测,不改变业务返回。