long_articles_job.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. from argparse import ArgumentParser
  2. from cold_start.crawler.baidu import BaiduVideoCrawler
  3. from tasks.ai_tasks import run_title_similarity_task
  4. from tasks.crawler_tasks.crawler_video.crawler_piaoquan_videos import (
  5. CrawlerPiaoQuanVideos,
  6. )
  7. from tasks.crawler_tasks.crawler_video.crawler_toutiao_videos import (
  8. CrawlerToutiaoAccountVideos,
  9. )
  10. from tasks.crawler_tasks.crawler_video.crawler_sohu_videos import (
  11. CrawlerSohuRecommendVideos,
  12. CrawlerSohuHotVideos,
  13. )
  14. from tasks.crawler_tasks.crawler_video.crawler_sph_videos import (
  15. CrawlerChannelAccountVideos,
  16. )
  17. from tasks.crawler_tasks.crawler_video.crawler_gzh_videos import CrawlerGzhMetaVideos
  18. from tasks.data_tasks.fwh_data_recycle import FwhGroupPublishRecordManager
  19. from tasks.data_tasks.fwh_data_recycle import SaveFwhDataToDatabase
  20. from tasks.data_tasks.fwh_data_recycle import FwhGroupPublishMonitor
  21. from tasks.monitor_tasks.kimi_balance_monitor import check_kimi_balance
  22. from tasks.publish_tasks.top_article_generalize import (
  23. TopArticleGeneralizeFromArticlePool,
  24. )
  25. class CrawlerTasks:
  26. @classmethod
  27. def run_piaoquan_video_crawler(cls):
  28. crawler = CrawlerPiaoQuanVideos()
  29. crawler.deal()
  30. @classmethod
  31. def run_sohu_video_crawler(cls):
  32. # step1, crawl sohu hot videos
  33. crawler_sohu_hot_videos = CrawlerSohuHotVideos()
  34. crawler_sohu_hot_videos.deal()
  35. # step2, crawl sohu recommend videos
  36. crawler_sohu_recommend_videos = CrawlerSohuRecommendVideos()
  37. crawler_sohu_recommend_videos.deal()
  38. @classmethod
  39. def run_sph_video_crawler(cls):
  40. crawler_channel_account_videos = CrawlerChannelAccountVideos()
  41. crawler_channel_account_videos.deal()
  42. @classmethod
  43. def crawler_gzh_meta_videos(cls):
  44. task = CrawlerGzhMetaVideos()
  45. task.deal()
  46. @classmethod
  47. def run_toutiao_video_crawler(cls):
  48. crawler = CrawlerToutiaoAccountVideos()
  49. crawler.deal()
  50. @classmethod
  51. def run_baidu_video_crawler(cls):
  52. task = BaiduVideoCrawler()
  53. task.deal()
  54. def run_fwh_data_manager():
  55. # 1. 从 aigc 获取数据
  56. fwh_group_publish_record_manager = FwhGroupPublishRecordManager()
  57. fwh_group_publish_record_manager.deal()
  58. # 2. 监测报警
  59. fwh_group_publish_monitor = FwhGroupPublishMonitor()
  60. fwh_group_publish_monitor.deal()
  61. # 3. 保存数据到数据库
  62. save_fwh_data_to_database = SaveFwhDataToDatabase()
  63. save_fwh_data_to_database.deal()
  64. def run_top_article_generalize_from_article_pool():
  65. task = TopArticleGeneralizeFromArticlePool()
  66. task.deal()
  67. def main():
  68. """
  69. run long_articles_job
  70. """
  71. crawler = CrawlerTasks()
  72. parser = ArgumentParser()
  73. parser.add_argument("--task_name", help="which task you want to run")
  74. parser.add_argument("--run_date", help="task specify run date")
  75. args = parser.parse_args()
  76. task_name = args.task_name
  77. if task_name is None:
  78. print("task_name cannot be None")
  79. return
  80. else:
  81. match task_name:
  82. case "run_piaoquan_video_crawler":
  83. crawler.run_piaoquan_video_crawler()
  84. case "run_sohu_video_crawler":
  85. crawler.run_sohu_video_crawler()
  86. case "run_sph_video_crawler":
  87. crawler.run_sph_video_crawler()
  88. case "crawler_gzh_meta_videos":
  89. crawler.crawler_gzh_meta_videos()
  90. case "run_toutiao_video_crawler":
  91. crawler.run_toutiao_video_crawler()
  92. case "run_baidu_video_crawler":
  93. crawler.run_baidu_video_crawler()
  94. case "run_check_kimi_balance":
  95. check_kimi_balance()
  96. case "run_fwh_data_manager":
  97. run_fwh_data_manager()
  98. case "run_title_similarity_task":
  99. run_title_similarity_task()
  100. case "top_article_generalize":
  101. run_top_article_generalize_from_article_pool()
  102. case _:
  103. print("task_name cannot be None")
  104. if __name__ == "__main__":
  105. main()