spiders_config.yaml 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. default:
  2. base_url: http://8.217.192.46:8889
  3. request_timeout: 30
  4. headers:
  5. {"Content-Type": "application/json"}
  6. benshanzhufu:
  7. mode: recommend
  8. path: /crawler/ben_shan_zhu_fu/recommend
  9. method: post
  10. request_body:
  11. cursor: "1"
  12. paging: true
  13. max_pages: 5
  14. etl_hook: "process_video_obj"
  15. response_parse:
  16. next_cursor: "$.data.next_cursor"
  17. data_path: "$.data.data"
  18. fields:
  19. video_id: "$.nid"
  20. video_title: "$.title"
  21. play_cnt: 0
  22. publish_time_stamp: "$.update_time"
  23. out_user_id: "$.nid"
  24. cover_url: "$.video_cover"
  25. like_cnt: 0
  26. video_url: "$.video_url"
  27. out_video_id: "$.nid"
  28. zhongqingkandian:
  29. mode: recommend
  30. path: "/zqkd"
  31. paging: true
  32. max_pages: 5
  33. db_config:
  34. table: "zhongqingkandian"
  35. etl_hook: "process_video_obj"
  36. parse:
  37. data_path: "$.data[*]"
  38. fields:
  39. title: "$.title"
  40. vid: "$.id"
  41. cover: "$.cover"
  42. url: "$.video_url"
  43. custom_class: my_crawlers.ZhongqingKandianCrawler
  44. fuqihaoyundao:
  45. url: "/fuqi"
  46. method: "POST"
  47. paging: false
  48. retry_times: 2
  49. etl_hook: "process_video_obj"
  50. parse:
  51. data_path: "$.videos[*]"
  52. fields:
  53. id: "$.id"
  54. name: "$.name"
  55. mp4: "$.url"