scratch_spider.py 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041
  1. import urllib.request
  2. import re
  3. import json
  4. from PIL import Image
  5. from io import BytesIO
  6. import urllib.parse
  7. import os
  8. url = 'https://comfyanonymous.github.io/ComfyUI_examples/sdxl/'
  9. try:
  10. req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
  11. with urllib.request.urlopen(req) as response:
  12. html = response.read().decode('utf-8')
  13. png_links = re.findall(r'src=["\']([^"\']+\.png)["\']', html)
  14. print('Found PNG links:', png_links)
  15. for link in png_links:
  16. full_url = urllib.parse.urljoin(url, link)
  17. print(f"Downloading {full_url}")
  18. req_img = urllib.request.Request(full_url, headers={'User-Agent': 'Mozilla/5.0'})
  19. with urllib.request.urlopen(req_img) as response_img:
  20. img_data = response_img.read()
  21. img = Image.open(BytesIO(img_data))
  22. if 'prompt' in img.info:
  23. api_json = json.loads(img.info['prompt'])
  24. filename = os.path.basename(link).replace('.png', '_api.json')
  25. # save to workspace root temporarily
  26. out_path = os.path.join("c:\\Users\\11304\\gitlab\\cybertogether\\tool_agent", filename)
  27. with open(out_path, 'w', encoding='utf-8') as f:
  28. json.dump(api_json, f, indent=2)
  29. print(f'-> Successfully saved API JSON with {len(api_json)} nodes to {filename}')
  30. else:
  31. print(f'-> No prompt chunk found in {link}.')
  32. except Exception as e:
  33. print('Error:', e)