spiders_config.yaml 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. default:
  2. base_url: http://8.217.192.46:8889
  3. request_timeout: 30
  4. headers:
  5. {"Content-Type": "application/json"}
  6. benshanzhufurecommend:
  7. platform: benshanzhufu
  8. mode: recommend
  9. path: /crawler/ben_shan_zhu_fu/recommend
  10. method: post
  11. request_body:
  12. cursor: "1"
  13. loop_times: 2
  14. loop_interval: 5
  15. response_parse:
  16. next_cursor: "$.data.next_cursor"
  17. data_path: "$.data.data"
  18. fields:
  19. video_id: "$.nid"
  20. video_title: "$.title"
  21. play_cnt: 0
  22. publish_time_stamp: "$.update_time"
  23. out_user_id: "$.nid"
  24. cover_url: "$.video_cover"
  25. like_cnt: 0
  26. video_url: "$.video_url"
  27. out_video_id: "$.nid"
  28. xngtjl_recommend_prod:
  29. platform: xiaoniangaotuijianliu
  30. mode: recommend
  31. path: /crawler/ben_shan_zhu_fu/recommend
  32. method: post
  33. request_body:
  34. cursor: "1"
  35. loop_times: 2
  36. etl_hook: "process_video_obj"
  37. response_parse:
  38. next_cursor: "$.data.next_cursor"
  39. data_path: "$.data.data"
  40. fields:
  41. video_id: "$.nid"
  42. video_title: "$.title"
  43. play_cnt: 0
  44. publish_time_stamp: "$.update_time"
  45. out_user_id: "$.nid"
  46. cover_url: "$.video_cover"
  47. like_cnt: 0
  48. video_url: "$.video_url"
  49. out_video_id: "$.nid"
  50. post_actions:
  51. - trigger: after_video_processed
  52. endpoint: "http://example.com/notify"
  53. payload:
  54. zhongqingkandian:
  55. mode: recommend
  56. path: "/zqkd"
  57. paging: true
  58. max_pages: 5
  59. db_config:
  60. table: "zhongqingkandian"
  61. etl_hook: "process_video_obj"
  62. parse:
  63. data_path: "$.data[*]"
  64. fields:
  65. title: "$.title"
  66. vid: "$.id"
  67. cover: "$.cover"
  68. url: "$.video_url"
  69. custom_class: my_crawlers.ZhongqingKandianCrawler
  70. fuqihaoyundao:
  71. url: "/fuqi"
  72. method: "POST"
  73. paging: false
  74. retry_times: 2
  75. etl_hook: "process_video_obj"
  76. parse:
  77. data_path: "$.videos[*]"
  78. fields:
  79. id: "$.id"
  80. name: "$.name"
  81. mp4: "$.url"