howard
/
Agent


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
							#!/usr/bin/env python3
"""
探查 output-new 文件夹：
 1. 99 folders 是否一一对应到现有 v0 的 REQ_xxx（按 requirement 文本匹配）
 2. 统计每 folder 的 cap / strat / resource 数量
 3. 检测有无异常（req 文本缺失、JSON schema 不规范等）
"""
import json, sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
from knowhub.knowhub_db.pg_capability_store import PostgreSQLCapabilityStore

OUTPUT = Path('/Users/sunlit/Downloads/output-new')


def main():
    s = PostgreSQLCapabilityStore()
    cur = s._get_cursor()
    try:
        cur.execute('SELECT id, description FROM requirement')
        v0_reqs = {r['description']: r['id'] for r in cur.fetchall()}
        print(f'v0 reqs in DB: {len(v0_reqs)}', flush=True)
    finally:
        cur.close(); s.close()

    folders = sorted([f for f in OUTPUT.iterdir() if f.is_dir()])
    print(f'tao_dev folders: {len(folders)}', flush=True)

    matched, unmatched = 0, []
    cap_total = strat_total = res_total = 0
    cap_is_new_true = 0
    strat_cap_links = 0
    for folder in folders:
        sp = folder / 'strategy.json'
        cp = folder / 'capabilities_extracted.json'
        if not sp.exists() or not cp.exists():
            unmatched.append((folder.name, 'missing file')); continue
        try:
            sd = json.loads(sp.read_text(encoding='utf-8'))
            cd = json.loads(cp.read_text(encoding='utf-8'))
        except Exception as e:
            unmatched.append((folder.name, f'parse: {e}')); continue

        req_text = sd.get('requirement') or cd.get('requirement')
        orig_req_id = v0_reqs.get(req_text)
        if not orig_req_id:
            unmatched.append((folder.name, f'no match: {req_text[:40]!r}...')); continue
        matched += 1

        caps = cd.get('extracted_capabilities', [])
        cap_total += len(caps)
        cap_is_new_true += sum(1 for c in caps if c.get('is_new'))

        strats = sd.get('strategies', [])
        strat_total += len(strats)
        for st in strats:
            for ph in st.get('workflow_outline', []) or []:
                if isinstance(ph, dict):
                    strat_cap_links += len(ph.get('capabilities', []) or [])

        rc = folder / 'raw_cases'
        if rc.exists():
            for cf in rc.iterdir():
                try:
                    cj = json.loads(cf.read_text(encoding='utf-8'))
                    res_total += len(cj.get('cases', []))
                except Exception:
                    pass

    print(f'\n匹配到 v0 req: {matched}/{len(folders)}', flush=True)
    print(f'未匹配: {len(unmatched)}', flush=True)
    for n, why in unmatched[:10]:
        print(f'  {n}: {why}', flush=True)
    print(f'\n总计:', flush=True)
    print(f'  extracted_capabilities: {cap_total}  (其中 is_new=true: {cap_is_new_true})', flush=True)
    print(f'  strategies: {strat_total}  (平均 {strat_total/max(matched,1):.1f}/req)', flush=True)
    print(f'  strategy→cap 链接: {strat_cap_links}', flush=True)
    print(f'  raw_cases 案例: {res_total}  (平均 {res_total/max(matched,1):.1f}/req)', flush=True)


if __name__ == '__main__':
    main()