import copy import pytest from content_agent.business_modules import search_intent from content_agent.errors import ContentAgentError from content_agent.integrations.query_prompt_config import DEFAULT_PROFILE from content_agent.run_service import RunService from content_agent.schemas import RunStartRequest from tests.p1_helpers import FakeQueryVariantClient, REAL_SOURCE_FIXTURE FORBIDDEN_FIXED_BUSINESS_TERMS = [ "\u8d2a\u8150", "\u57fa\u5c42\u516c\u804c\u4eba\u5458", "\u6848\u4f8b", "\u89e3\u8bfb", "\u8b66\u793a", ] class _Runtime: def __init__(self): self.rows = {} def append_jsonl(self, _run_id, filename, rows): self.rows[filename] = rows def _seed_pack(): return { "seed_terms": ["中医养生"], "itemset_items": ["补气血"], "category_bindings": [{"category_id": "c1"}], "element_bindings": [{"element_id": "e1"}], "pattern_source_system": "pg_pattern_v2", "pattern_execution_id": 1987, "mining_config_id": 58, "source_post_id": "60219550", "matched_post_ids": ["60219550"], "itemset_ids": [1607977], "support": 0.2, "absolute_support": 31, "confidence": 0.8, } def test_search_seed_and_queries_do_not_inject_fixed_business_terms(tmp_path): service = RunService( runtime_root=tmp_path / "runtime" / "v1", query_variant_client=FakeQueryVariantClient( { "爱国情感": "家国叙事素材", "人物故事": "榜样人物素材", } ), ) state = service.start_run( RunStartRequest(platform_mode="mock", source=str(REAL_SOURCE_FIXTURE)) ) run_id = state["run_id"] pattern_seed_pack = service.read_json(run_id, "pattern_seed_pack.json") queries = service.read_jsonl(run_id, "search_queries.jsonl") p2_queries = [ row for row in queries if row["search_query_generation_method"] in {"item_single", "llm_variant"} ] assert pattern_seed_pack["seed_terms"] == ["爱国情感", "人物故事"] assert [row["search_query_id"] for row in p2_queries] == ["q_001", "q_002", "q_003", "q_004"] assert [row["search_query"] for row in p2_queries] == [ "爱国情感", "家国叙事素材", "人物故事", "榜样人物素材", ] assert [row["search_query_generation_method"] for row in p2_queries] == [ "item_single", "llm_variant", "item_single", "llm_variant", ] assert p2_queries[1]["llm_variant_of"] == "q_001" assert p2_queries[3]["llm_variant_of"] == "q_003" for value in [ *pattern_seed_pack["seed_terms"], *(row["search_query"] for row in p2_queries), ]: assert not any(term in value for term in FORBIDDEN_FIXED_BUSINESS_TERMS) def test_search_queries_preserve_source_terms_for_replay(tmp_path): service = RunService( runtime_root=tmp_path / "runtime" / "v1", query_variant_client=FakeQueryVariantClient( { "爱国情感": "家国叙事素材", "人物故事": "榜样人物素材", } ), ) state = service.start_run( RunStartRequest(platform_mode="mock", source=str(REAL_SOURCE_FIXTURE)) ) queries = service.read_jsonl(state["run_id"], "search_queries.jsonl") p2_queries = [ query for query in queries if query["search_query_generation_method"] in {"item_single", "llm_variant"} ] expected_source_terms = [["爱国情感"], ["爱国情感"], ["人物故事"], ["人物故事"]] for query, source_terms in zip(p2_queries, expected_source_terms, strict=True): assert query["query_source_terms"] == source_terms assert query["query_source_fields"] == ["seed_terms"] assert query["raw_payload"]["query_source_terms"] == source_terms assert query["pattern_seed_ref"]["source_field"] == "seed_terms" assert query["pattern_seed_ref"]["seed_term"] == source_terms[0] assert query["raw_payload"]["pattern_seed_ref"]["seed_term"] == source_terms[0] llm_queries = [ query for query in p2_queries if query["search_query_generation_method"] == "llm_variant" ] assert len(llm_queries) == 2 for query in llm_queries: assert query["raw_payload"]["llm_prompt_version"] == "fake-query-prompt-v1" assert query["raw_payload"]["llm_generation_model"] == "fake-query-model" assert query["raw_payload"]["llm_input_evidence"]["source_field"] == "seed_terms" assert query["raw_payload"]["llm_input_evidence"]["itemset_items"] def test_search_intent_custom_evidence_fields_whitelist(): client = FakeQueryVariantClient({"中医养生": "气血食疗"}) client.profile = copy.deepcopy(DEFAULT_PROFILE) client.profile["evidence_fields"] = ["seed_term", "support"] runtime = _Runtime() queries = search_intent.run("run_1", "policy_1", _seed_pack(), runtime, client) llm_query = [row for row in queries if row["search_query_generation_method"] == "llm_variant"][0] assert list(llm_query["llm_input_evidence"].keys()) == ["seed_term", "support"] assert list(llm_query["raw_payload"]["llm_input_evidence"].keys()) == ["seed_term", "support"] assert llm_query["query_source_fields"] == ["seed_terms"] def test_search_intent_custom_generic_filter_blocks_query(): client = FakeQueryVariantClient({"中医养生": "禁用泛词"}) client.profile = copy.deepcopy(DEFAULT_PROFILE) client.profile["generic_filter"] = {"queries": ["禁用泛词"], "tokens": []} with pytest.raises(ContentAgentError) as exc: search_intent.run("run_1", "policy_1", _seed_pack(), _Runtime(), client) assert exc.value.error_code == "QUERY_GENERATION_FAILED" assert exc.value.detail["reason"] == "llm_variant_generic" def test_search_intent_rejects_unsupported_variants_per_seed(): client = FakeQueryVariantClient({"中医养生": "气血食疗"}) client.profile = copy.deepcopy(DEFAULT_PROFILE) client.profile["variants_per_seed"] = 2 with pytest.raises(ContentAgentError) as exc: search_intent.run("run_1", "policy_1", _seed_pack(), _Runtime(), client) assert exc.value.error_code == "QUERY_GENERATION_FAILED" assert exc.value.detail == {"reason": "variants_per_seed_unsupported", "variants_per_seed": 2}