spiders_config.yaml 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. default:
  2. base_url: http://8.217.192.46:8889
  3. request_timeout: 30
  4. headers:
  5. {"Content-Type": "application/json"}
  6. benshanzhufurecommend:
  7. platform: benshanzhufu
  8. mode: recommend
  9. path: /crawler/ben_shan_zhu_fu/recommend
  10. method: post
  11. request_body:
  12. cursor: "{{next_cursor}}"
  13. loop_times: 200
  14. loop_interval: 5
  15. feishu_sheetid: "aTSJH4"
  16. response_parse:
  17. data: "$.data"
  18. next_cursor: "$.data.next_cursor"
  19. data_path: "$.data.data"
  20. fields:
  21. video_id: "$.nid"
  22. video_title: "$.title"
  23. play_cnt: 0
  24. publish_time_stamp: "$.update_time"
  25. out_user_id: "$.nid"
  26. cover_url: "$.video_cover"
  27. like_cnt: 0
  28. video_url: "$.video_url"
  29. out_video_id: "$.nid"
  30. xngtjl_recommend_prod:
  31. platform: xiaoniangaotuijianliu
  32. mode: recommend
  33. path: /crawler/ben_shan_zhu_fu/recommend
  34. method: post
  35. request_body:
  36. cursor: "1"
  37. loop_times: 2
  38. etl_hook: "process_video_obj"
  39. response_parse:
  40. next_cursor: "$.data.next_cursor"
  41. data_path: "$.data.data"
  42. fields:
  43. video_id: "$.nid"
  44. video_title: "$.title"
  45. play_cnt: 0
  46. publish_time_stamp: "$.update_time"
  47. out_user_id: "$.nid"
  48. cover_url: "$.video_cover"
  49. like_cnt: 0
  50. video_url: "$.video_url"
  51. out_video_id: "$.nid"
  52. post_actions:
  53. - trigger: after_video_processed
  54. endpoint: "http://example.com/notify"
  55. payload:
  56. zhongqingkandian:
  57. mode: recommend
  58. path: "/zqkd"
  59. paging: true
  60. max_pages: 5
  61. db_config:
  62. table: "zhongqingkandian"
  63. etl_hook: "process_video_obj"
  64. parse:
  65. data_path: "$.data[*]"
  66. fields:
  67. title: "$.title"
  68. vid: "$.id"
  69. cover: "$.cover"
  70. url: "$.video_url"
  71. custom_class: my_crawlers.ZhongqingKandianCrawler
  72. fuqihaoyundao:
  73. url: "/fuqi"
  74. method: "POST"
  75. paging: false
  76. retry_times: 2
  77. etl_hook: "process_video_obj"
  78. parse:
  79. data_path: "$.videos[*]"
  80. fields:
  81. id: "$.id"
  82. name: "$.name"
  83. mp4: "$.url"