hai 3 meses · 6e99378759
--- a/examples/test_skill.py
+++ b/examples/test_skill.py
@@ -0,0 +1,129 @@
 
															+import json
														
 
															+import subprocess
														
 
															+import time
														
 
															+from pathlib import Path
														
 
															+
														
 
															+
														
 
															+def run_cli(session: str, args: list[str]) -> dict:
														
 
															+    command = ["browser-use", "--session", session, "--json"] + args
														
 
															+    result = subprocess.run(command, capture_output=True, text=True)
														
 
															+    if result.returncode != 0:
														
 
															+        raise RuntimeError(result.stderr.strip() or "browser-use command failed")
														
 
															+    payload = result.stdout.strip()
														
 
															+    if not payload:
														
 
															+        raise RuntimeError("browser-use returned empty output")
														
 
															+    data = json.loads(payload)
														
 
															+    if not data.get("success", False):
														
 
															+        raise RuntimeError(data.get("error", "browser-use command error"))
														
 
															+    return data.get("data", {})
														
 
															+
														
 
															+
														
 
															+def stop_session_server(session: str) -> None:
														
 
															+    subprocess.run(
														
 
															+        ["browser-use", "--session", session, "server", "stop"],
														
 
															+        capture_output=True,
														
 
															+        text=True,
														
 
															+    )
														
 
															+
														
 
															+
														
 
															+def main():
														
 
															+    project_root = Path(__file__).resolve().parents[1]
														
 
															+    output_dir = project_root / "output"
														
 
															+    output_dir.mkdir(parents=True, exist_ok=True)
														
 
															+
														
 
															+    json_file = output_dir / "skill_baidu.json"
														
 
															+    html_file = output_dir / "skill_baidu_page.html"
														
 
															+
														
 
															+    session = "skill_baidu"
														
 
															+    keyword = "瑜伽美女"
														
 
															+
														
 
															+    try:
														
 
															+        stop_session_server(session)
														
 
															+        try:
														
 
															+            run_cli(session, ["open", "https://www.baidu.com"])
														
 
															+        except RuntimeError:
														
 
															+            stop_session_server(session)
														
 
															+            run_cli(session, ["open", "https://www.baidu.com"])
														
 
															+
														
 
															+        search_js = (
														
 
															+            "(function(){"
														
 
															+            "const input=document.querySelector('#kw');"
														
 
															+            "const btn=document.querySelector('#su');"
														
 
															+            "if(input){input.value='" + keyword + "';}"
														
 
															+            "if(btn){btn.click();}"
														
 
															+            "else if(input&&input.form){input.form.submit();}"
														
 
															+            "return {hasInput:!!input,hasButton:!!btn};"
														
 
															+            "})()"
														
 
															+        )
														
 
															+        run_cli(session, ["eval", search_js])
														
 
															+
														
 
															+        wait_js = (
														
 
															+            "(function(){"
														
 
															+            "const items=document.querySelectorAll('#content_left .result, #content_left .c-container, #content_left .result-op');"
														
 
															+            "const bodyReady=!!document.body;"
														
 
															+            "const bodyLen=bodyReady?(document.body.innerText||'').length:0;"
														
 
															+            "return {count:items.length, bodyReady:bodyReady, bodyLen:bodyLen};"
														
 
															+            "})()"
														
 
															+        )
														
 
															+
														
 
															+        count = 0
														
 
															+        for _ in range(12):
														
 
															+            data = run_cli(session, ["eval", wait_js])
														
 
															+            result = data.get("result") if isinstance(data, dict) else {}
														
 
															+            count = int(result.get("count") or 0)
														
 
															+            body_len = int(result.get("bodyLen") or 0)
														
 
															+            if count >= 3 or body_len > 1000:
														
 
															+                break
														
 
															+            time.sleep(1)
														
 
															+
														
 
															+        extract_js = (
														
 
															+            "(function(){"
														
 
															+            "const items=Array.from(document.querySelectorAll('#content_left .result, #content_left .c-container, #content_left .result-op'));"
														
 
															+            "const results=[];"
														
 
															+            "for(const item of items){"
														
 
															+            "const a=item.querySelector('h3 a')||item.querySelector('a[data-click]')||item.querySelector('a');"
														
 
															+            "if(!a) continue;"
														
 
															+            "const title=(a.textContent||'').trim();"
														
 
															+            "const link=a.href||'';"
														
 
															+            "const summaryEl=item.querySelector('.c-abstract, .content-right_8Zs40, .content-right_8Zs40_2gVt2');"
														
 
															+            "const summary=(summaryEl?summaryEl.textContent:'').trim();"
														
 
															+            "results.push({index:results.length+1,title,link,summary});"
														
 
															+            "if(results.length>=10) break;"
														
 
															+            "}"
														
 
															+            "return {success:true,keyword:'" + keyword + "',count:results.length,timestamp:new Date().toISOString(),results:results};"
														
 
															+            "})()"
														
 
															+        )
														
 
															+
														
 
															+        data = run_cli(session, ["eval", extract_js])
														
 
															+        extracted = data.get("result") if isinstance(data, dict) else data
														
 
															+
														
 
															+        if not extracted:
														
 
															+            extracted = {
														
 
															+                "success": False,
														
 
															+                "keyword": keyword,
														
 
															+                "count": 0,
														
 
															+                "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S"),
														
 
															+                "results": [],
														
 
															+            }
														
 
															+
														
 
															+        with open(json_file, "w", encoding="utf-8") as f:
														
 
															+            json.dump(extracted, f, ensure_ascii=False, indent=2)
														
 
															+
														
 
															+        html_data = run_cli(session, ["eval", "document.documentElement.outerHTML"])
														
 
															+        html_content = html_data.get("result") if isinstance(html_data, dict) else html_data
														
 
															+
														
 
															+        with open(html_file, "w", encoding="utf-8") as f:
														
 
															+            f.write(html_content or "")
														
 
															+
														
 
															+        print(f"✅ 数据已保存到: {json_file}")
														
 
															+        print(f"✅ HTML 已保存到: {html_file}")
														
 
															+
														
 
															+    finally:
														
 
															+        try:
														
 
															+            run_cli(session, ["close"])
														
 
															+        except Exception:
														
 
															+            pass
														
 
															+
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    main()
														
--- a/examples/test_tools_xhs.py
+++ b/examples/test_tools_xhs.py
@@ -1,247 +0,0 @@
 
															-import asyncio
														
 
															-import json
														
 
															-import os
														
 
															-import sys
														
 
															-from datetime import datetime
														
 
															-from pathlib import Path
														
 
															-from urllib.parse import quote
														
 
															-
														
 
															-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
														
 
															-
														
 
															-from tools.baseClassTools import (
														
 
															-    init_browser_session,
														
 
															-    navigate_to_url,
														
 
															-    wait,
														
 
															-    get_page_html,
														
 
															-    evaluate,
														
 
															-    scroll_page,
														
 
															-    cleanup_browser_session,
														
 
															-)
														
 
															-
														
 
															-
														
 
															-async def run_task():
														
 
															-    project_root = Path(__file__).resolve().parents[1]
														
 
															-    output_dir = project_root / "output"
														
 
															-    output_dir.mkdir(parents=True, exist_ok=True)
														
 
															-
														
 
															-    json_file = output_dir / "xhs.json"
														
 
															-    html_file = output_dir / "xhs_page.html"
														
 
															-
														
 
															-    def normalize_output(raw: str) -> str:
														
 
															-        value = raw
														
 
															-        if value.startswith("Result: "):
														
 
															-            value = value[8:]
														
 
															-        return value.strip()
														
 
															-
														
 
															-    try:
														
 
															-        await init_browser_session(headless=False, profile_name="xhs_profile")
														
 
															-
														
 
															-        await navigate_to_url("https://www.xiaohongshu.com")
														
 
															-        await wait(seconds=3)
														
 
															-
														
 
															-        keyword = "瑜伽美女"
														
 
															-        search_url = f"https://www.xiaohongshu.com/search_result?keyword={quote(keyword)}&type=51"
														
 
															-        await navigate_to_url(search_url)
														
 
															-        await wait(seconds=6)
														
 
															-
														
 
															-        unlock_js = """
														
 
															-        (function(){
														
 
															-            try {
														
 
															-                document.documentElement.classList.remove('reds-lock-scroll');
														
 
															-                document.body.classList.remove('reds-lock-scroll');
														
 
															-                const candidates = Array.from(document.querySelectorAll('[role="dialog"], .reds-modal, .reds-alert, [class*="modal"], [class*="mask"], [class*="dialog"]'));
														
 
															-                for (const el of candidates) {
														
 
															-                    try {
														
 
															-                        const style = window.getComputedStyle(el);
														
 
															-                        const z = parseInt(style.zIndex || '0', 10);
														
 
															-                        if (style.position === 'fixed' && z >= 999) {
														
 
															-                            el.remove();
														
 
															-                        }
														
 
															-                    } catch {}
														
 
															-                }
														
 
															-                const closeButtons = Array.from(document.querySelectorAll('button, [role="button"]'));
														
 
															-                for (const btn of closeButtons) {
														
 
															-                    const text = (btn.textContent || '').trim();
														
 
															-                    const label = (btn.getAttribute('aria-label') || '').trim();
														
 
															-                    if (text.includes('关闭') || text.includes('我知道了') || text.includes('同意') || label.includes('关闭')) {
														
 
															-                        btn.click();
														
 
															-                    }
														
 
															-                }
														
 
															-                return true;
														
 
															-            } catch (e) {
														
 
															-                return false;
														
 
															-            }
														
 
															-        })()
														
 
															-        """
														
 
															-
														
 
															-        count_js = """
														
 
															-        (function(){
														
 
															-            const anchorCount = document.querySelectorAll('a[href*="/explore/"]').length;
														
 
															-            const cardCount = document.querySelectorAll('[data-testid="search-note-item"], .note-item, article, li[data-note-id]').length;
														
 
															-            return JSON.stringify({count: Math.max(anchorCount, cardCount)});
														
 
															-        })()
														
 
															-        """
														
 
															-
														
 
															-        await evaluate(code=unlock_js)
														
 
															-
														
 
															-        for _ in range(8):
														
 
															-            count_result = await evaluate(code=count_js)
														
 
															-            count_output = normalize_output(count_result.output)
														
 
															-            try:
														
 
															-                count_value = int(json.loads(count_output).get("count", 0))
														
 
															-            except Exception:
														
 
															-                count_value = 0
														
 
															-            if count_value >= 5:
														
 
															-                break
														
 
															-            await scroll_page(down=True, pages=0.8)
														
 
															-            await wait(seconds=3)
														
 
															-            await evaluate(code=unlock_js)
														
 
															-
														
 
															-        extract_js = """
														
 
															-        (function(){
														
 
															-            try {
														
 
															-                const results = [];
														
 
															-
														
 
															-                const jsonScripts = Array.from(document.querySelectorAll('script[type="application/json"], script#__NEXT_DATA__'));
														
 
															-                for (const s of jsonScripts) {
														
 
															-                    try {
														
 
															-                        const txt = s.textContent.trim();
														
 
															-                        if (txt && txt.length > 0) {
														
 
															-                            const data = JSON.parse(txt);
														
 
															-                            const candidates = [];
														
 
															-                            function collect(obj) {
														
 
															-                                if (!obj || typeof obj !== 'object') return;
														
 
															-                                for (const k of Object.keys(obj)) {
														
 
															-                                    const v = obj[k];
														
 
															-                                    if (v && typeof v === 'object') {
														
 
															-                                        if (Array.isArray(v)) {
														
 
															-                                            candidates.push(v);
														
 
															-                                        }
														
 
															-                                        collect(v);
														
 
															-                                    }
														
 
															-                                }
														
 
															-                            }
														
 
															-                            collect(data);
														
 
															-                            for (const arr of candidates) {
														
 
															-                                for (const item of arr) {
														
 
															-                                    try {
														
 
															-                                        const title = (item.title || item.noteTitle || item.name || '').toString().trim();
														
 
															-                                        const link = (item.link || item.url || item.noteUrl || item.jumpUrl || '').toString().trim();
														
 
															-                                        if ((title || link) && (link.includes('/explore/') || link.startsWith('http'))) {
														
 
															-                                            results.push({
														
 
															-                                                index: results.length + 1,
														
 
															-                                                title,
														
 
															-                                                link,
														
 
															-                                                summary: (item.desc || item.content || item.noteDesc || '').toString().trim().substring(0, 200)
														
 
															-                                            });
														
 
															-                                            if (results.length >= 20) break;
														
 
															-                                        }
														
 
															-                                    } catch {}
														
 
															-                                }
														
 
															-                                if (results.length >= 20) break;
														
 
															-                            }
														
 
															-                        }
														
 
															-                    } catch {}
														
 
															-                    if (results.length >= 5) break;
														
 
															-                }
														
 
															-
														
 
															-                if (results.length < 5) {
														
 
															-                    const anchors = Array.from(document.querySelectorAll('a[href*="/explore/"]'));
														
 
															-                    const seen = new Set();
														
 
															-                    for (const a of anchors) {
														
 
															-                        try {
														
 
															-                            const href = a.href;
														
 
															-                            if (!href || seen.has(href)) continue;
														
 
															-                            seen.add(href);
														
 
															-                            let title = (a.textContent || '').trim();
														
 
															-                            if (!title) {
														
 
															-                                const img = a.querySelector('img[alt]');
														
 
															-                                if (img && img.alt) title = img.alt.trim();
														
 
															-                            }
														
 
															-                            if (!title) {
														
 
															-                                const parentTitle = a.closest('[data-testid="search-note-item"], .note-item, article, li')?.querySelector('[data-testid="note-title"], .title, h3, p');
														
 
															-                                if (parentTitle) title = (parentTitle.textContent || '').trim();
														
 
															-                            }
														
 
															-                            const descEl = a.closest('[data-testid="search-note-item"], .note-item, article, li')?.querySelector('[data-testid="note-desc"], .desc, .description, p');
														
 
															-                            const desc = descEl ? (descEl.textContent || '').trim() : '';
														
 
															-                            results.push({
														
 
															-                                index: results.length + 1,
														
 
															-                                title,
														
 
															-                                link: href,
														
 
															-                                summary: desc.substring(0, 200)
														
 
															-                            });
														
 
															-                            if (results.length >= 20) break;
														
 
															-                        } catch {}
														
 
															-                    }
														
 
															-                }
														
 
															-
														
 
															-                return {
														
 
															-                    success: true,
														
 
															-                    count: results.length,
														
 
															-                    keyword: '瑜伽美女',
														
 
															-                    timestamp: new Date().toISOString(),
														
 
															-                    results: results
														
 
															-                };
														
 
															-            } catch (e) {
														
 
															-                return {
														
 
															-                    success: false,
														
 
															-                    error: e.message,
														
 
															-                    stack: e.stack
														
 
															-                };
														
 
															-            }
														
 
															-        })()
														
 
															-        """
														
 
															-
														
 
															-        result = await evaluate(code=extract_js)
														
 
															-        output = normalize_output(result.output)
														
 
															-
														
 
															-        try:
														
 
															-            data = json.loads(output)
														
 
															-        except json.JSONDecodeError:
														
 
															-            data = {
														
 
															-                "success": False,
														
 
															-                "error": "JSON解析失败",
														
 
															-                "raw_output": output[:1000],
														
 
															-                "keyword": keyword,
														
 
															-                "timestamp": datetime.now().isoformat(),
														
 
															-            }
														
 
															-
														
 
															-        with open(json_file, "w", encoding="utf-8") as f:
														
 
															-            json.dump(data, f, ensure_ascii=False, indent=2)
														
 
															-
														
 
															-        html_result = await get_page_html()
														
 
															-        html_content = html_result.metadata.get("html", "")
														
 
															-        page_url = html_result.metadata.get("url", "")
														
 
															-        page_title = html_result.metadata.get("title", "")
														
 
															-        meta_info = (
														
 
															-            "\n".join(
														
 
															-                [
														
 
															-                    "<!--",
														
 
															-                    f"    页面标题: {page_title}",
														
 
															-                    f"    页面URL: {page_url}",
														
 
															-                    f"    保存时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
														
 
															-                    f"    搜索关键词: {keyword}",
														
 
															-                    "-->",
														
 
															-                    "",
														
 
															-                ]
														
 
															-            )
														
 
															-            + "\n"
														
 
															-        )
														
 
															-
														
 
															-        with open(html_file, "w", encoding="utf-8") as f:
														
 
															-            f.write(meta_info)
														
 
															-            f.write(html_content)
														
 
															-
														
 
															-        print(f"✅ 数据已保存到: {json_file}")
														
 
															-        print(f"✅ HTML 已保存到: {html_file}")
														
 
															-
														
 
															-    finally:
														
 
															-        await cleanup_browser_session()
														
 
															-
														
 
															-
														
 
															-def main():
														
 
															-    asyncio.run(run_task())
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    main()