long_articles_job.py 2.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. from argparse import ArgumentParser
  2. from tasks.crawler_tasks.crawler_video.crawler_piaoquan_videos import (
  3. CrawlerPiaoQuanVideos,
  4. )
  5. from tasks.crawler_tasks.crawler_video.crawler_sohu_videos import CrawlerSohuHotVideos
  6. from tasks.crawler_tasks.crawler_video.crawler_sohu_videos import (
  7. CrawlerSohuRecommendVideos,
  8. )
  9. from tasks.crawler_tasks.crawler_video.crawler_sph_videos import (
  10. CrawlerChannelAccountVideos,
  11. )
  12. from tasks.crawler_tasks.crawler_video.crawler_gzh_videos import CrawlerGzhMetaVideos
  13. from tasks.data_tasks.fwh_data_recycle import FwhGroupPublishRecordManager
  14. from tasks.data_tasks.fwh_data_recycle import SaveFwhDataToDatabase
  15. from tasks.monitor_tasks.kimi_balance_monitor import check_kimi_balance
  16. from tasks.publish_tasks.top_article_generalize import (
  17. TopArticleGeneralizeFromArticlePool,
  18. )
  19. def run_piaoquan_video_crawler():
  20. crawler = CrawlerPiaoQuanVideos()
  21. crawler.deal()
  22. def run_sohu_video_crawler():
  23. # step1, crawl sohu hot videos
  24. crawler_sohu_hot_videos = CrawlerSohuHotVideos()
  25. crawler_sohu_hot_videos.deal()
  26. # step2, crawl sohu recommend videos
  27. crawler_sohu_recommend_videos = CrawlerSohuRecommendVideos()
  28. crawler_sohu_recommend_videos.deal()
  29. def run_sph_video_crawler():
  30. crawler_channel_account_videos = CrawlerChannelAccountVideos()
  31. crawler_channel_account_videos.deal()
  32. def run_fwh_data_manager():
  33. fwh_group_publish_record_manager = FwhGroupPublishRecordManager()
  34. fwh_group_publish_record_manager.deal()
  35. # 2. 保存数据到数据库
  36. save_fwh_data_to_database = SaveFwhDataToDatabase()
  37. save_fwh_data_to_database.deal()
  38. def run_top_article_generalize_from_article_pool():
  39. task = TopArticleGeneralizeFromArticlePool()
  40. task.deal()
  41. def crawler_gzh_meta_videos():
  42. task = CrawlerGzhMetaVideos()
  43. task.deal()
  44. def main():
  45. """
  46. run long_articles_job
  47. """
  48. parser = ArgumentParser()
  49. parser.add_argument("--task_name", help="which task you want to run")
  50. parser.add_argument("--run_date", help="task specify run date")
  51. args = parser.parse_args()
  52. task_name = args.task_name
  53. if task_name is None:
  54. print("task_name cannot be None")
  55. return
  56. else:
  57. match task_name:
  58. case "run_piaoquan_video_crawler":
  59. run_piaoquan_video_crawler()
  60. case "run_sohu_video_crawler":
  61. run_sohu_video_crawler()
  62. case "run_check_kimi_balance":
  63. check_kimi_balance()
  64. case "run_fwh_data_manager":
  65. run_fwh_data_manager()
  66. case "run_sph_video_crawler":
  67. run_sph_video_crawler()
  68. case "top_article_generalize":
  69. run_top_article_generalize_from_article_pool()
  70. case "crawler_gzh_meta_videos":
  71. crawler_gzh_meta_videos()
  72. case _:
  73. print("task_name cannot be None")
  74. if __name__ == "__main__":
  75. main()