howard
/
Agent


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
							"""
从帖子解构内容中提取选题点（灵感点/目的点/关键点 下 分词结果 中的 词），去重后输出。
"""
import json
import argparse
from pathlib import Path


def extract_post_topic(account_name: str, post_id: str) -> list[str]:
    """
    从解构内容中提取选题点并去重。

    :param account_name: 账号名
    :param post_id: 帖子ID
    :return: 去重后的选题点字符串列表
    """
    base = Path(__file__).resolve().parent
    input_path = base / "input" / account_name / "原始数据" / "解构内容" / f"{post_id}.json"

    with open(input_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    topics: list[str] = []
    for key in ("灵感点", "目的点", "关键点"):
        for item in data.get(key, []):
            for seg in item.get("分词结果", []):
                word = seg.get("词")
                if word and isinstance(word, str) and word.strip():
                    topics.append(word.strip())

    # 去重且保持首次出现顺序
    seen = set()
    unique_topics: list[str] = []
    for w in topics:
        if w not in seen:
            seen.add(w)
            unique_topics.append(w)

    return unique_topics


def main(account_name: str, post_id: str):
    # parser = argparse.ArgumentParser(description="从解构内容中提取选题点")
    # parser.add_argument("account_name", help="账号名")
    # parser.add_argument("post_id", help="帖子ID")
    # args = parser.parse_args()

    topics = extract_post_topic(account_name, post_id)

    base = Path(__file__).resolve().parent
    out_dir = base / "input" / account_name / "post_topic"
    out_dir.mkdir(parents=True, exist_ok=True)
    out_path = out_dir / f"{post_id}.json"

    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(topics, f, ensure_ascii=False, indent=2)

    print(f"已写入 {len(topics)} 个选题点到 {out_path}")


if __name__ == "__main__":
    main(account_name="家有大志", post_id="69185d49000000000d00f94e")