4 месяцев назад · 6e99378759
--- a/examples/test_skill.py
+++ b/examples/test_skill.py
@@ -0,0 +1,129 @@
 
				+import json
			
 
				+import subprocess
			
 
				+import time
			
 
				+from pathlib import Path
			
 
				+
			
 
				+
			
 
				+def run_cli(session: str, args: list[str]) -> dict:
			
 
				+    command = ["browser-use", "--session", session, "--json"] + args
			
 
				+    result = subprocess.run(command, capture_output=True, text=True)
			
 
				+    if result.returncode != 0:
			
 
				+        raise RuntimeError(result.stderr.strip() or "browser-use command failed")
			
 
				+    payload = result.stdout.strip()
			
 
				+    if not payload:
			
 
				+        raise RuntimeError("browser-use returned empty output")
			
 
				+    data = json.loads(payload)
			
 
				+    if not data.get("success", False):
			
 
				+        raise RuntimeError(data.get("error", "browser-use command error"))
			
 
				+    return data.get("data", {})
			
 
				+
			
 
				+
			
 
				+def stop_session_server(session: str) -> None:
			
 
				+    subprocess.run(
			
 
				+        ["browser-use", "--session", session, "server", "stop"],
			
 
				+        capture_output=True,
			
 
				+        text=True,
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    project_root = Path(__file__).resolve().parents[1]
			
 
				+    output_dir = project_root / "output"
			
 
				+    output_dir.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+    json_file = output_dir / "skill_baidu.json"
			
 
				+    html_file = output_dir / "skill_baidu_page.html"
			
 
				+
			
 
				+    session = "skill_baidu"
			
 
				+    keyword = "瑜伽美女"
			
 
				+
			
 
				+    try:
			
 
				+        stop_session_server(session)
			
 
				+        try:
			
 
				+            run_cli(session, ["open", "https://www.baidu.com"])
			
 
				+        except RuntimeError:
			
 
				+            stop_session_server(session)
			
 
				+            run_cli(session, ["open", "https://www.baidu.com"])
			
 
				+
			
 
				+        search_js = (
			
 
				+            "(function(){"
			
 
				+            "const input=document.querySelector('#kw');"
			
 
				+            "const btn=document.querySelector('#su');"
			
 
				+            "if(input){input.value='" + keyword + "';}"
			
 
				+            "if(btn){btn.click();}"
			
 
				+            "else if(input&&input.form){input.form.submit();}"
			
 
				+            "return {hasInput:!!input,hasButton:!!btn};"
			
 
				+            "})()"
			
 
				+        )
			
 
				+        run_cli(session, ["eval", search_js])
			
 
				+
			
 
				+        wait_js = (
			
 
				+            "(function(){"
			
 
				+            "const items=document.querySelectorAll('#content_left .result, #content_left .c-container, #content_left .result-op');"
			
 
				+            "const bodyReady=!!document.body;"
			
 
				+            "const bodyLen=bodyReady?(document.body.innerText||'').length:0;"
			
 
				+            "return {count:items.length, bodyReady:bodyReady, bodyLen:bodyLen};"
			
 
				+            "})()"
			
 
				+        )
			
 
				+
			
 
				+        count = 0
			
 
				+        for _ in range(12):
			
 
				+            data = run_cli(session, ["eval", wait_js])
			
 
				+            result = data.get("result") if isinstance(data, dict) else {}
			
 
				+            count = int(result.get("count") or 0)
			
 
				+            body_len = int(result.get("bodyLen") or 0)
			
 
				+            if count >= 3 or body_len > 1000:
			
 
				+                break
			
 
				+            time.sleep(1)
			
 
				+
			
 
				+        extract_js = (
			
 
				+            "(function(){"
			
 
				+            "const items=Array.from(document.querySelectorAll('#content_left .result, #content_left .c-container, #content_left .result-op'));"
			
 
				+            "const results=[];"
			
 
				+            "for(const item of items){"
			
 
				+            "const a=item.querySelector('h3 a')||item.querySelector('a[data-click]')||item.querySelector('a');"
			
 
				+            "if(!a) continue;"
			
 
				+            "const title=(a.textContent||'').trim();"
			
 
				+            "const link=a.href||'';"
			
 
				+            "const summaryEl=item.querySelector('.c-abstract, .content-right_8Zs40, .content-right_8Zs40_2gVt2');"
			
 
				+            "const summary=(summaryEl?summaryEl.textContent:'').trim();"
			
 
				+            "results.push({index:results.length+1,title,link,summary});"
			
 
				+            "if(results.length>=10) break;"
			
 
				+            "}"
			
 
				+            "return {success:true,keyword:'" + keyword + "',count:results.length,timestamp:new Date().toISOString(),results:results};"
			
 
				+            "})()"
			
 
				+        )
			
 
				+
			
 
				+        data = run_cli(session, ["eval", extract_js])
			
 
				+        extracted = data.get("result") if isinstance(data, dict) else data
			
 
				+
			
 
				+        if not extracted:
			
 
				+            extracted = {
			
 
				+                "success": False,
			
 
				+                "keyword": keyword,
			
 
				+                "count": 0,
			
 
				+                "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S"),
			
 
				+                "results": [],
			
 
				+            }
			
 
				+
			
 
				+        with open(json_file, "w", encoding="utf-8") as f:
			
 
				+            json.dump(extracted, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+        html_data = run_cli(session, ["eval", "document.documentElement.outerHTML"])
			
 
				+        html_content = html_data.get("result") if isinstance(html_data, dict) else html_data
			
 
				+
			
 
				+        with open(html_file, "w", encoding="utf-8") as f:
			
 
				+            f.write(html_content or "")
			
 
				+
			
 
				+        print(f"✅ 数据已保存到: {json_file}")
			
 
				+        print(f"✅ HTML 已保存到: {html_file}")
			
 
				+
			
 
				+    finally:
			
 
				+        try:
			
 
				+            run_cli(session, ["close"])
			
 
				+        except Exception:
			
 
				+            pass
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/examples/test_tools_xhs.py
+++ b/examples/test_tools_xhs.py
@@ -1,247 +0,0 @@
 
				-import asyncio
			
 
				-import json
			
 
				-import os
			
 
				-import sys
			
 
				-from datetime import datetime
			
 
				-from pathlib import Path
			
 
				-from urllib.parse import quote
			
 
				-
			
 
				-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
			
 
				-
			
 
				-from tools.baseClassTools import (
			
 
				-    init_browser_session,
			
 
				-    navigate_to_url,
			
 
				-    wait,
			
 
				-    get_page_html,
			
 
				-    evaluate,
			
 
				-    scroll_page,
			
 
				-    cleanup_browser_session,
			
 
				-)
			
 
				-
			
 
				-
			
 
				-async def run_task():
			
 
				-    project_root = Path(__file__).resolve().parents[1]
			
 
				-    output_dir = project_root / "output"
			
 
				-    output_dir.mkdir(parents=True, exist_ok=True)
			
 
				-
			
 
				-    json_file = output_dir / "xhs.json"
			
 
				-    html_file = output_dir / "xhs_page.html"
			
 
				-
			
 
				-    def normalize_output(raw: str) -> str:
			
 
				-        value = raw
			
 
				-        if value.startswith("Result: "):
			
 
				-            value = value[8:]
			
 
				-        return value.strip()
			
 
				-
			
 
				-    try:
			
 
				-        await init_browser_session(headless=False, profile_name="xhs_profile")
			
 
				-
			
 
				-        await navigate_to_url("https://www.xiaohongshu.com")
			
 
				-        await wait(seconds=3)
			
 
				-
			
 
				-        keyword = "瑜伽美女"
			
 
				-        search_url = f"https://www.xiaohongshu.com/search_result?keyword={quote(keyword)}&type=51"
			
 
				-        await navigate_to_url(search_url)
			
 
				-        await wait(seconds=6)
			
 
				-
			
 
				-        unlock_js = """
			
 
				-        (function(){
			
 
				-            try {
			
 
				-                document.documentElement.classList.remove('reds-lock-scroll');
			
 
				-                document.body.classList.remove('reds-lock-scroll');
			
 
				-                const candidates = Array.from(document.querySelectorAll('[role="dialog"], .reds-modal, .reds-alert, [class*="modal"], [class*="mask"], [class*="dialog"]'));
			
 
				-                for (const el of candidates) {
			
 
				-                    try {
			
 
				-                        const style = window.getComputedStyle(el);
			
 
				-                        const z = parseInt(style.zIndex || '0', 10);
			
 
				-                        if (style.position === 'fixed' && z >= 999) {
			
 
				-                            el.remove();
			
 
				-                        }
			
 
				-                    } catch {}
			
 
				-                }
			
 
				-                const closeButtons = Array.from(document.querySelectorAll('button, [role="button"]'));
			
 
				-                for (const btn of closeButtons) {
			
 
				-                    const text = (btn.textContent || '').trim();
			
 
				-                    const label = (btn.getAttribute('aria-label') || '').trim();
			
 
				-                    if (text.includes('关闭') || text.includes('我知道了') || text.includes('同意') || label.includes('关闭')) {
			
 
				-                        btn.click();
			
 
				-                    }
			
 
				-                }
			
 
				-                return true;
			
 
				-            } catch (e) {
			
 
				-                return false;
			
 
				-            }
			
 
				-        })()
			
 
				-        """
			
 
				-
			
 
				-        count_js = """
			
 
				-        (function(){
			
 
				-            const anchorCount = document.querySelectorAll('a[href*="/explore/"]').length;
			
 
				-            const cardCount = document.querySelectorAll('[data-testid="search-note-item"], .note-item, article, li[data-note-id]').length;
			
 
				-            return JSON.stringify({count: Math.max(anchorCount, cardCount)});
			
 
				-        })()
			
 
				-        """
			
 
				-
			
 
				-        await evaluate(code=unlock_js)
			
 
				-
			
 
				-        for _ in range(8):
			
 
				-            count_result = await evaluate(code=count_js)
			
 
				-            count_output = normalize_output(count_result.output)
			
 
				-            try:
			
 
				-                count_value = int(json.loads(count_output).get("count", 0))
			
 
				-            except Exception:
			
 
				-                count_value = 0
			
 
				-            if count_value >= 5:
			
 
				-                break
			
 
				-            await scroll_page(down=True, pages=0.8)
			
 
				-            await wait(seconds=3)
			
 
				-            await evaluate(code=unlock_js)
			
 
				-
			
 
				-        extract_js = """
			
 
				-        (function(){
			
 
				-            try {
			
 
				-                const results = [];
			
 
				-
			
 
				-                const jsonScripts = Array.from(document.querySelectorAll('script[type="application/json"], script#__NEXT_DATA__'));
			
 
				-                for (const s of jsonScripts) {
			
 
				-                    try {
			
 
				-                        const txt = s.textContent.trim();
			
 
				-                        if (txt && txt.length > 0) {
			
 
				-                            const data = JSON.parse(txt);
			
 
				-                            const candidates = [];
			
 
				-                            function collect(obj) {
			
 
				-                                if (!obj || typeof obj !== 'object') return;
			
 
				-                                for (const k of Object.keys(obj)) {
			
 
				-                                    const v = obj[k];
			
 
				-                                    if (v && typeof v === 'object') {
			
 
				-                                        if (Array.isArray(v)) {
			
 
				-                                            candidates.push(v);
			
 
				-                                        }
			
 
				-                                        collect(v);
			
 
				-                                    }
			
 
				-                                }
			
 
				-                            }
			
 
				-                            collect(data);
			
 
				-                            for (const arr of candidates) {
			
 
				-                                for (const item of arr) {
			
 
				-                                    try {
			
 
				-                                        const title = (item.title || item.noteTitle || item.name || '').toString().trim();
			
 
				-                                        const link = (item.link || item.url || item.noteUrl || item.jumpUrl || '').toString().trim();
			
 
				-                                        if ((title || link) && (link.includes('/explore/') || link.startsWith('http'))) {
			
 
				-                                            results.push({
			
 
				-                                                index: results.length + 1,
			
 
				-                                                title,
			
 
				-                                                link,
			
 
				-                                                summary: (item.desc || item.content || item.noteDesc || '').toString().trim().substring(0, 200)
			
 
				-                                            });
			
 
				-                                            if (results.length >= 20) break;
			
 
				-                                        }
			
 
				-                                    } catch {}
			
 
				-                                }
			
 
				-                                if (results.length >= 20) break;
			
 
				-                            }
			
 
				-                        }
			
 
				-                    } catch {}
			
 
				-                    if (results.length >= 5) break;
			
 
				-                }
			
 
				-
			
 
				-                if (results.length < 5) {
			
 
				-                    const anchors = Array.from(document.querySelectorAll('a[href*="/explore/"]'));
			
 
				-                    const seen = new Set();
			
 
				-                    for (const a of anchors) {
			
 
				-                        try {
			
 
				-                            const href = a.href;
			
 
				-                            if (!href || seen.has(href)) continue;
			
 
				-                            seen.add(href);
			
 
				-                            let title = (a.textContent || '').trim();
			
 
				-                            if (!title) {
			
 
				-                                const img = a.querySelector('img[alt]');
			
 
				-                                if (img && img.alt) title = img.alt.trim();
			
 
				-                            }
			
 
				-                            if (!title) {
			
 
				-                                const parentTitle = a.closest('[data-testid="search-note-item"], .note-item, article, li')?.querySelector('[data-testid="note-title"], .title, h3, p');
			
 
				-                                if (parentTitle) title = (parentTitle.textContent || '').trim();
			
 
				-                            }
			
 
				-                            const descEl = a.closest('[data-testid="search-note-item"], .note-item, article, li')?.querySelector('[data-testid="note-desc"], .desc, .description, p');
			
 
				-                            const desc = descEl ? (descEl.textContent || '').trim() : '';
			
 
				-                            results.push({
			
 
				-                                index: results.length + 1,
			
 
				-                                title,
			
 
				-                                link: href,
			
 
				-                                summary: desc.substring(0, 200)
			
 
				-                            });
			
 
				-                            if (results.length >= 20) break;
			
 
				-                        } catch {}
			
 
				-                    }
			
 
				-                }
			
 
				-
			
 
				-                return {
			
 
				-                    success: true,
			
 
				-                    count: results.length,
			
 
				-                    keyword: '瑜伽美女',
			
 
				-                    timestamp: new Date().toISOString(),
			
 
				-                    results: results
			
 
				-                };
			
 
				-            } catch (e) {
			
 
				-                return {
			
 
				-                    success: false,
			
 
				-                    error: e.message,
			
 
				-                    stack: e.stack
			
 
				-                };
			
 
				-            }
			
 
				-        })()
			
 
				-        """
			
 
				-
			
 
				-        result = await evaluate(code=extract_js)
			
 
				-        output = normalize_output(result.output)
			
 
				-
			
 
				-        try:
			
 
				-            data = json.loads(output)
			
 
				-        except json.JSONDecodeError:
			
 
				-            data = {
			
 
				-                "success": False,
			
 
				-                "error": "JSON解析失败",
			
 
				-                "raw_output": output[:1000],
			
 
				-                "keyword": keyword,
			
 
				-                "timestamp": datetime.now().isoformat(),
			
 
				-            }
			
 
				-
			
 
				-        with open(json_file, "w", encoding="utf-8") as f:
			
 
				-            json.dump(data, f, ensure_ascii=False, indent=2)
			
 
				-
			
 
				-        html_result = await get_page_html()
			
 
				-        html_content = html_result.metadata.get("html", "")
			
 
				-        page_url = html_result.metadata.get("url", "")
			
 
				-        page_title = html_result.metadata.get("title", "")
			
 
				-        meta_info = (
			
 
				-            "\n".join(
			
 
				-                [
			
 
				-                    "<!--",
			
 
				-                    f"    页面标题: {page_title}",
			
 
				-                    f"    页面URL: {page_url}",
			
 
				-                    f"    保存时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
			
 
				-                    f"    搜索关键词: {keyword}",
			
 
				-                    "-->",
			
 
				-                    "",
			
 
				-                ]
			
 
				-            )
			
 
				-            + "\n"
			
 
				-        )
			
 
				-
			
 
				-        with open(html_file, "w", encoding="utf-8") as f:
			
 
				-            f.write(meta_info)
			
 
				-            f.write(html_content)
			
 
				-
			
 
				-        print(f"✅ 数据已保存到: {json_file}")
			
 
				-        print(f"✅ HTML 已保存到: {html_file}")
			
 
				-
			
 
				-    finally:
			
 
				-        await cleanup_browser_session()
			
 
				-
			
 
				-
			
 
				-def main():
			
 
				-    asyncio.run(run_task())
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    main()