long_articles_job.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. from argparse import ArgumentParser
  2. from cold_start.crawler.baidu import BaiduVideoCrawler
  3. from tasks.ai_tasks import run_title_similarity_task
  4. from tasks.crawler_tasks.crawler_video.crawler_piaoquan_videos import (
  5. CrawlerPiaoQuanVideos,
  6. )
  7. from tasks.crawler_tasks.crawler_video.crawler_toutiao_videos import (
  8. CrawlerToutiaoAccountVideos,
  9. )
  10. from tasks.crawler_tasks.crawler_video.crawler_sohu_videos import (
  11. CrawlerSohuRecommendVideos,
  12. CrawlerSohuHotVideos,
  13. )
  14. from tasks.crawler_tasks.crawler_video.crawler_sph_videos import (
  15. CrawlerChannelAccountVideos,
  16. )
  17. from tasks.crawler_tasks.crawler_video.crawler_gzh_videos import CrawlerGzhMetaVideos
  18. from tasks.data_tasks.fwh_data_recycle import FwhGroupPublishRecordManager
  19. from tasks.data_tasks.fwh_data_recycle import SaveFwhDataToDatabase
  20. from tasks.data_tasks.fwh_data_recycle import FwhGroupPublishMonitor
  21. from tasks.monitor_tasks.kimi_balance_monitor import check_kimi_balance
  22. from tasks.monitor_tasks.outside_server_accounts_monitor import run_outside_server_accounts_monitor
  23. from tasks.publish_tasks.top_article_generalize import (
  24. TopArticleGeneralizeFromArticlePool,
  25. )
  26. class CrawlerTasks:
  27. @classmethod
  28. def run_piaoquan_video_crawler(cls):
  29. crawler = CrawlerPiaoQuanVideos()
  30. crawler.deal()
  31. @classmethod
  32. def run_sohu_video_crawler(cls):
  33. # step1, crawl sohu hot videos
  34. crawler_sohu_hot_videos = CrawlerSohuHotVideos()
  35. crawler_sohu_hot_videos.deal()
  36. # step2, crawl sohu recommend videos
  37. crawler_sohu_recommend_videos = CrawlerSohuRecommendVideos()
  38. crawler_sohu_recommend_videos.deal()
  39. @classmethod
  40. def run_sph_video_crawler(cls):
  41. crawler_channel_account_videos = CrawlerChannelAccountVideos()
  42. crawler_channel_account_videos.deal()
  43. @classmethod
  44. def crawler_gzh_meta_videos(cls):
  45. task = CrawlerGzhMetaVideos()
  46. task.deal()
  47. @classmethod
  48. def run_toutiao_video_crawler(cls):
  49. crawler = CrawlerToutiaoAccountVideos()
  50. crawler.deal()
  51. @classmethod
  52. def run_baidu_video_crawler(cls):
  53. task = BaiduVideoCrawler()
  54. task.deal()
  55. def run_fwh_data_manager():
  56. # 1. 从 aigc 获取数据
  57. fwh_group_publish_record_manager = FwhGroupPublishRecordManager()
  58. fwh_group_publish_record_manager.deal()
  59. # 2. 监测报警
  60. fwh_group_publish_monitor = FwhGroupPublishMonitor()
  61. fwh_group_publish_monitor.deal()
  62. # 3. 保存数据到数据库
  63. save_fwh_data_to_database = SaveFwhDataToDatabase()
  64. save_fwh_data_to_database.deal()
  65. def run_top_article_generalize_from_article_pool():
  66. task = TopArticleGeneralizeFromArticlePool()
  67. task.deal()
  68. def main():
  69. """
  70. run long_articles_job
  71. """
  72. crawler = CrawlerTasks()
  73. parser = ArgumentParser()
  74. parser.add_argument("--task_name", help="which task you want to run")
  75. parser.add_argument("--run_date", help="task specify run date")
  76. args = parser.parse_args()
  77. task_name = args.task_name
  78. if task_name is None:
  79. print("task_name cannot be None")
  80. return
  81. else:
  82. match task_name:
  83. case "run_piaoquan_video_crawler":
  84. crawler.run_piaoquan_video_crawler()
  85. case "run_sohu_video_crawler":
  86. crawler.run_sohu_video_crawler()
  87. case "run_sph_video_crawler":
  88. crawler.run_sph_video_crawler()
  89. case "crawler_gzh_meta_videos":
  90. crawler.crawler_gzh_meta_videos()
  91. case "run_toutiao_video_crawler":
  92. crawler.run_toutiao_video_crawler()
  93. case "run_baidu_video_crawler":
  94. crawler.run_baidu_video_crawler()
  95. case "run_check_kimi_balance":
  96. check_kimi_balance()
  97. case "run_fwh_data_manager":
  98. run_fwh_data_manager()
  99. case "run_title_similarity_task":
  100. run_title_similarity_task()
  101. case "top_article_generalize":
  102. run_top_article_generalize_from_article_pool()
  103. case "run_outside_server_accounts_monitor":
  104. run_outside_server_accounts_monitor()
  105. case _:
  106. print("task_name cannot be None")
  107. if __name__ == "__main__":
  108. main()