long_articles_job.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. import time
  2. from argparse import ArgumentParser
  3. from cold_start.crawler.baidu import BaiduVideoCrawler
  4. from tasks.ai_tasks import run_title_similarity_task
  5. from tasks.safety_tasks import GetOffVideos
  6. from tasks.crawler_tasks.crawler_video.crawler_piaoquan_videos import (
  7. CrawlerPiaoQuanVideos,
  8. )
  9. from tasks.crawler_tasks.crawler_video.crawler_toutiao_videos import (
  10. CrawlerToutiaoAccountVideos,
  11. )
  12. from tasks.crawler_tasks.crawler_video.crawler_sohu_videos import (
  13. CrawlerSohuRecommendVideos,
  14. CrawlerSohuHotVideos,
  15. )
  16. from tasks.crawler_tasks.crawler_video.crawler_sph_videos import (
  17. CrawlerChannelAccountVideos,
  18. )
  19. from tasks.crawler_tasks.crawler_video.crawler_gzh_videos import CrawlerGzhMetaVideos
  20. from tasks.data_tasks.fwh_data_recycle import FwhGroupPublishRecordManager
  21. from tasks.data_tasks.fwh_data_recycle import SaveFwhDataToDatabase
  22. from tasks.data_tasks.fwh_data_recycle import FwhGroupPublishMonitor
  23. from tasks.monitor_tasks.kimi_balance_monitor import check_kimi_balance
  24. from tasks.monitor_tasks.outside_server_accounts_monitor import (
  25. run_outside_server_accounts_monitor,
  26. )
  27. from tasks.publish_tasks.top_article_generalize import (
  28. TopArticleGeneralizeFromArticlePool,
  29. )
  30. class CrawlerTasks:
  31. @classmethod
  32. def run_piaoquan_video_crawler(cls):
  33. crawler = CrawlerPiaoQuanVideos()
  34. crawler.deal()
  35. @classmethod
  36. def run_sohu_video_crawler(cls):
  37. # step1, crawl sohu hot videos
  38. crawler_sohu_hot_videos = CrawlerSohuHotVideos()
  39. crawler_sohu_hot_videos.deal()
  40. # step2, crawl sohu recommend videos
  41. crawler_sohu_recommend_videos = CrawlerSohuRecommendVideos()
  42. crawler_sohu_recommend_videos.deal()
  43. @classmethod
  44. def run_sph_video_crawler(cls):
  45. crawler_channel_account_videos = CrawlerChannelAccountVideos()
  46. crawler_channel_account_videos.deal()
  47. @classmethod
  48. def crawler_gzh_meta_videos(cls):
  49. task = CrawlerGzhMetaVideos()
  50. task.deal()
  51. @classmethod
  52. def run_toutiao_video_crawler(cls):
  53. crawler = CrawlerToutiaoAccountVideos()
  54. crawler.deal()
  55. @classmethod
  56. def run_baidu_video_crawler(cls):
  57. task = BaiduVideoCrawler()
  58. task.deal()
  59. def run_fwh_data_manager():
  60. # 1. 从 aigc 获取数据
  61. fwh_group_publish_record_manager = FwhGroupPublishRecordManager()
  62. fwh_group_publish_record_manager.deal()
  63. # 2. 监测报警
  64. fwh_group_publish_monitor = FwhGroupPublishMonitor()
  65. fwh_group_publish_monitor.deal()
  66. # 3. 保存数据到数据库
  67. save_fwh_data_to_database = SaveFwhDataToDatabase()
  68. save_fwh_data_to_database.deal()
  69. def run_top_article_generalize_from_article_pool():
  70. task = TopArticleGeneralizeFromArticlePool()
  71. task.deal()
  72. def run_get_off_videos():
  73. GetOffVideos().deal()
  74. def main():
  75. """
  76. run long_articles_job
  77. """
  78. crawler = CrawlerTasks()
  79. parser = ArgumentParser()
  80. parser.add_argument("--task_name", help="which task you want to run")
  81. parser.add_argument("--run_date", help="task specify run date")
  82. args = parser.parse_args()
  83. task_name = args.task_name
  84. if task_name is None:
  85. print("task_name cannot be None")
  86. return
  87. else:
  88. match task_name:
  89. case "run_piaoquan_video_crawler":
  90. crawler.run_piaoquan_video_crawler()
  91. case "run_sohu_video_crawler":
  92. crawler.run_sohu_video_crawler()
  93. case "run_sph_video_crawler":
  94. crawler.run_sph_video_crawler()
  95. case "crawler_gzh_meta_videos":
  96. crawler.crawler_gzh_meta_videos()
  97. case "run_toutiao_video_crawler":
  98. crawler.run_toutiao_video_crawler()
  99. case "run_baidu_video_crawler":
  100. crawler.run_baidu_video_crawler()
  101. case "run_check_kimi_balance":
  102. check_kimi_balance()
  103. case "run_fwh_data_manager":
  104. run_fwh_data_manager()
  105. case "run_title_similarity_task":
  106. run_title_similarity_task()
  107. case "top_article_generalize":
  108. run_top_article_generalize_from_article_pool()
  109. case "run_get_off_videos":
  110. run_get_off_videos()
  111. case "run_outside_server_accounts_monitor":
  112. run_outside_server_accounts_monitor()
  113. case _:
  114. print("task_name cannot be None")
  115. if __name__ == "__main__":
  116. main()