spiders_config.yaml 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. default:
  2. base_url: http://8.217.192.46:8889
  3. request_timeout: 30
  4. headers:
  5. {"Content-Type": "application/json"}
  6. benshanzhufurecommend:
  7. platform: benshanzhufu
  8. mode: recommend
  9. path: /crawler/ben_shan_zhu_fu/recommend
  10. method: post
  11. request_body:
  12. cursor: "{{next_cursor|'1'}}"
  13. loop_times: 200
  14. loop_interval: 5
  15. feishu_sheetid: "aTSJH4"
  16. response_parse:
  17. next_cursor: "$.data.next_cursor"
  18. data_path: "$.data.data"
  19. fields:
  20. video_id: "$.nid"
  21. video_title: "$.title"
  22. play_cnt: 0
  23. publish_time_stamp: "$.update_time"
  24. out_user_id: "$.nid"
  25. cover_url: "$.video_cover"
  26. like_cnt: 0
  27. video_url: "$.video_url"
  28. out_video_id: "$.nid"
  29. xngtjl_recommend_prod:
  30. platform: xiaoniangaotuijianliu
  31. mode: recommend
  32. path: /crawler/ben_shan_zhu_fu/recommend
  33. method: post
  34. request_body:
  35. cursor: "1"
  36. loop_times: 2
  37. etl_hook: "process_video_obj"
  38. response_parse:
  39. next_cursor: "$.data.next_cursor"
  40. data_path: "$.data.data"
  41. fields:
  42. video_id: "$.nid"
  43. video_title: "$.title"
  44. play_cnt: 0
  45. publish_time_stamp: "$.update_time"
  46. out_user_id: "$.nid"
  47. cover_url: "$.video_cover"
  48. like_cnt: 0
  49. video_url: "$.video_url"
  50. out_video_id: "$.nid"
  51. post_actions:
  52. - trigger: after_video_processed
  53. endpoint: "http://example.com/notify"
  54. payload:
  55. zhongqingkandian:
  56. mode: recommend
  57. path: "/zqkd"
  58. paging: true
  59. max_pages: 5
  60. db_config:
  61. table: "zhongqingkandian"
  62. etl_hook: "process_video_obj"
  63. parse:
  64. data_path: "$.data[*]"
  65. fields:
  66. title: "$.title"
  67. vid: "$.id"
  68. cover: "$.cover"
  69. url: "$.video_url"
  70. custom_class: my_crawlers.ZhongqingKandianCrawler
  71. fuqihaoyundao:
  72. url: "/fuqi"
  73. method: "POST"
  74. paging: false
  75. retry_times: 2
  76. etl_hook: "process_video_obj"
  77. parse:
  78. data_path: "$.videos[*]"
  79. fields:
  80. id: "$.id"
  81. name: "$.name"
  82. mp4: "$.url"