long_articles_job.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. from argparse import ArgumentParser
  2. from tasks.crawler_tasks.crawler_video.crawler_piaoquan_videos import (
  3. CrawlerPiaoQuanVideos,
  4. )
  5. from tasks.crawler_tasks.crawler_video.crawler_sohu_videos import CrawlerSohuHotVideos
  6. from tasks.crawler_tasks.crawler_video.crawler_sohu_videos import (
  7. CrawlerSohuRecommendVideos,
  8. )
  9. from tasks.crawler_tasks.crawler_video.crawler_sph_videos import (
  10. CrawlerChannelAccountVideos,
  11. )
  12. from tasks.crawler_tasks.crawler_video.crawler_gzh_videos import CrawlerGzhMetaVideos
  13. from tasks.data_tasks.fwh_data_recycle import FwhGroupPublishRecordManager
  14. from tasks.data_tasks.fwh_data_recycle import SaveFwhDataToDatabase
  15. from tasks.data_tasks.fwh_data_recycle import FwhGroupPublishMonitor
  16. from tasks.monitor_tasks.kimi_balance_monitor import check_kimi_balance
  17. from tasks.publish_tasks.top_article_generalize import (
  18. TopArticleGeneralizeFromArticlePool,
  19. )
  20. def run_piaoquan_video_crawler():
  21. crawler = CrawlerPiaoQuanVideos()
  22. crawler.deal()
  23. def run_sohu_video_crawler():
  24. # step1, crawl sohu hot videos
  25. crawler_sohu_hot_videos = CrawlerSohuHotVideos()
  26. crawler_sohu_hot_videos.deal()
  27. # step2, crawl sohu recommend videos
  28. crawler_sohu_recommend_videos = CrawlerSohuRecommendVideos()
  29. crawler_sohu_recommend_videos.deal()
  30. def run_sph_video_crawler():
  31. crawler_channel_account_videos = CrawlerChannelAccountVideos()
  32. crawler_channel_account_videos.deal()
  33. def run_fwh_data_manager():
  34. # 1. 从 aigc 获取数据
  35. fwh_group_publish_record_manager = FwhGroupPublishRecordManager()
  36. fwh_group_publish_record_manager.deal()
  37. # 2. 监测报警
  38. fwh_group_publish_monitor = FwhGroupPublishMonitor()
  39. fwh_group_publish_monitor.deal()
  40. # 3. 保存数据到数据库
  41. save_fwh_data_to_database = SaveFwhDataToDatabase()
  42. save_fwh_data_to_database.deal()
  43. def run_top_article_generalize_from_article_pool():
  44. task = TopArticleGeneralizeFromArticlePool()
  45. task.deal()
  46. def crawler_gzh_meta_videos():
  47. task = CrawlerGzhMetaVideos()
  48. task.deal()
  49. def main():
  50. """
  51. run long_articles_job
  52. """
  53. parser = ArgumentParser()
  54. parser.add_argument("--task_name", help="which task you want to run")
  55. parser.add_argument("--run_date", help="task specify run date")
  56. args = parser.parse_args()
  57. task_name = args.task_name
  58. if task_name is None:
  59. print("task_name cannot be None")
  60. return
  61. else:
  62. match task_name:
  63. case "run_piaoquan_video_crawler":
  64. run_piaoquan_video_crawler()
  65. case "run_sohu_video_crawler":
  66. run_sohu_video_crawler()
  67. case "run_check_kimi_balance":
  68. check_kimi_balance()
  69. case "run_fwh_data_manager":
  70. run_fwh_data_manager()
  71. case "run_sph_video_crawler":
  72. run_sph_video_crawler()
  73. case "top_article_generalize":
  74. run_top_article_generalize_from_article_pool()
  75. case "crawler_gzh_meta_videos":
  76. crawler_gzh_meta_videos()
  77. case _:
  78. print("task_name cannot be None")
  79. if __name__ == "__main__":
  80. main()