spiders_config.yaml 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. default:
  2. base_url: http://8.217.192.46:8889
  3. request_timeout: 30
  4. max_retries: 3
  5. headers:
  6. {"Content-Type": "application/json"}
  7. benshanzhufurecommend:
  8. platform: benshanzhufu
  9. mode: recommend
  10. path: /crawler/ben_shan_zhu_fu/recommend
  11. method: post
  12. request_body:
  13. cursor: "{{next_cursor}}"
  14. loop_times: 50
  15. loop_interval:
  16. min: 30
  17. max: 60
  18. feishu_sheetid: "aTSJH4"
  19. response_parse:
  20. data: "$.data"
  21. next_cursor: "$.data.next_cursor"
  22. data_path: "$.data.data"
  23. fields:
  24. video_id: "$.nid"
  25. video_title: "$.title"
  26. play_cnt: 0
  27. publish_time_stamp: "$.update_time"
  28. out_user_id: "$.nid"
  29. cover_url: "$.video_cover"
  30. like_cnt: 0
  31. video_url: "$.video_url"
  32. out_video_id: "$.nid"
  33. yuannifuqimanmanrecommend:
  34. platform: yuannifuqimanman
  35. mode: recommend
  36. path: /crawler/yuan_ni_fu_qi_man_man/recommend
  37. method: post
  38. request_body:
  39. cursor: "{{next_cursor}}"
  40. loop_times: 100
  41. loop_interval:
  42. min: 30
  43. max: 60
  44. feishu_sheetid: "golXy9"
  45. response_parse:
  46. data: "$.data"
  47. next_cursor: "$.data.next_cursor"
  48. data_path: "$.data.data"
  49. fields:
  50. video_id: "$.nid"
  51. video_title: "$.title"
  52. out_user_id: "$.nid"
  53. cover_url: "$.video_cover"
  54. video_url: "$.video_url"
  55. out_video_id: "$.nid"
  56. xiaoniangaoauthor:
  57. platform: xiaoniangao
  58. mode: author
  59. path: /crawler/xiao_nian_gao_plus/blogger
  60. method: post
  61. request_body:
  62. cursor: "{{next_cursor}}"
  63. account_id: "{{uid}}" # 数据库的uid
  64. loop_times: 100
  65. loop_interval:
  66. min: 5
  67. max: 20
  68. feishu_sheetid: "K0gA9Y"
  69. response_parse:
  70. uid: "$.uid" # 数据库的uid
  71. next_cursor: "$.cursor"
  72. data: "$.data"
  73. has_more: "$.data.has_more"
  74. data_path: "$.data.data"
  75. fields:
  76. video_title: "$.title"
  77. duration: "$.du"
  78. play_cnt: "$.play_pv"
  79. like_cnt: "$.favor.total"
  80. comment_cnt: "$.comment_count"
  81. share_cnt: "$.share"
  82. width: "$.w"
  83. height: "$.h"
  84. avatar_url: "$.user.hurl"
  85. cover_url: "$.url"
  86. video_url: "$.v_url"
  87. out_user_id: "$.user.mid"
  88. out_video_id: "$.vid"
  89. publish_time_stamp: "$.t"