test_skill.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. import json
  2. import subprocess
  3. import time
  4. from pathlib import Path
  5. def run_cli(session: str, args: list[str]) -> dict:
  6. command = ["browser-use", "--session", session, "--json"] + args
  7. result = subprocess.run(command, capture_output=True, text=True)
  8. if result.returncode != 0:
  9. raise RuntimeError(result.stderr.strip() or "browser-use command failed")
  10. payload = result.stdout.strip()
  11. if not payload:
  12. raise RuntimeError("browser-use returned empty output")
  13. data = json.loads(payload)
  14. if not data.get("success", False):
  15. raise RuntimeError(data.get("error", "browser-use command error"))
  16. return data.get("data", {})
  17. def stop_session_server(session: str) -> None:
  18. subprocess.run(
  19. ["browser-use", "--session", session, "server", "stop"],
  20. capture_output=True,
  21. text=True,
  22. )
  23. def main():
  24. project_root = Path(__file__).resolve().parents[1]
  25. output_dir = project_root / "output"
  26. output_dir.mkdir(parents=True, exist_ok=True)
  27. json_file = output_dir / "skill_baidu.json"
  28. html_file = output_dir / "skill_baidu_page.html"
  29. session = "skill_baidu"
  30. keyword = "瑜伽美女"
  31. try:
  32. stop_session_server(session)
  33. try:
  34. run_cli(session, ["open", "https://www.baidu.com"])
  35. except RuntimeError:
  36. stop_session_server(session)
  37. run_cli(session, ["open", "https://www.baidu.com"])
  38. search_js = (
  39. "(function(){"
  40. "const input=document.querySelector('#kw');"
  41. "const btn=document.querySelector('#su');"
  42. "if(input){input.value='" + keyword + "';}"
  43. "if(btn){btn.click();}"
  44. "else if(input&&input.form){input.form.submit();}"
  45. "return {hasInput:!!input,hasButton:!!btn};"
  46. "})()"
  47. )
  48. run_cli(session, ["eval", search_js])
  49. wait_js = (
  50. "(function(){"
  51. "const items=document.querySelectorAll('#content_left .result, #content_left .c-container, #content_left .result-op');"
  52. "const bodyReady=!!document.body;"
  53. "const bodyLen=bodyReady?(document.body.innerText||'').length:0;"
  54. "return {count:items.length, bodyReady:bodyReady, bodyLen:bodyLen};"
  55. "})()"
  56. )
  57. count = 0
  58. for _ in range(12):
  59. data = run_cli(session, ["eval", wait_js])
  60. result = data.get("result") if isinstance(data, dict) else {}
  61. count = int(result.get("count") or 0)
  62. body_len = int(result.get("bodyLen") or 0)
  63. if count >= 3 or body_len > 1000:
  64. break
  65. time.sleep(1)
  66. extract_js = (
  67. "(function(){"
  68. "const items=Array.from(document.querySelectorAll('#content_left .result, #content_left .c-container, #content_left .result-op'));"
  69. "const results=[];"
  70. "for(const item of items){"
  71. "const a=item.querySelector('h3 a')||item.querySelector('a[data-click]')||item.querySelector('a');"
  72. "if(!a) continue;"
  73. "const title=(a.textContent||'').trim();"
  74. "const link=a.href||'';"
  75. "const summaryEl=item.querySelector('.c-abstract, .content-right_8Zs40, .content-right_8Zs40_2gVt2');"
  76. "const summary=(summaryEl?summaryEl.textContent:'').trim();"
  77. "results.push({index:results.length+1,title,link,summary});"
  78. "if(results.length>=10) break;"
  79. "}"
  80. "return {success:true,keyword:'" + keyword + "',count:results.length,timestamp:new Date().toISOString(),results:results};"
  81. "})()"
  82. )
  83. data = run_cli(session, ["eval", extract_js])
  84. extracted = data.get("result") if isinstance(data, dict) else data
  85. if not extracted:
  86. extracted = {
  87. "success": False,
  88. "keyword": keyword,
  89. "count": 0,
  90. "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S"),
  91. "results": [],
  92. }
  93. with open(json_file, "w", encoding="utf-8") as f:
  94. json.dump(extracted, f, ensure_ascii=False, indent=2)
  95. html_data = run_cli(session, ["eval", "document.documentElement.outerHTML"])
  96. html_content = html_data.get("result") if isinstance(html_data, dict) else html_data
  97. with open(html_file, "w", encoding="utf-8") as f:
  98. f.write(html_content or "")
  99. print(f"✅ 数据已保存到: {json_file}")
  100. print(f"✅ HTML 已保存到: {html_file}")
  101. finally:
  102. try:
  103. run_cli(session, ["close"])
  104. except Exception:
  105. pass
  106. if __name__ == "__main__":
  107. main()