123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154 |
- import time
- from argparse import ArgumentParser
- from cold_start.crawler.baidu import BaiduVideoCrawler
- from tasks.ai_tasks import run_title_similarity_task
- from tasks.safety_tasks import GetOffVideos
- from tasks.crawler_tasks.crawler_video.crawler_piaoquan_videos import (
- CrawlerPiaoQuanVideos,
- )
- from tasks.crawler_tasks.crawler_video.crawler_toutiao_videos import (
- CrawlerToutiaoAccountVideos,
- )
- from tasks.crawler_tasks.crawler_video.crawler_sohu_videos import (
- CrawlerSohuRecommendVideos,
- CrawlerSohuHotVideos,
- )
- from tasks.crawler_tasks.crawler_video.crawler_sph_videos import (
- CrawlerChannelAccountVideos,
- )
- from tasks.crawler_tasks.crawler_video.crawler_gzh_videos import CrawlerGzhMetaVideos
- from tasks.data_tasks.fwh_data_recycle import FwhGroupPublishRecordManager
- from tasks.data_tasks.fwh_data_recycle import SaveFwhDataToDatabase
- from tasks.data_tasks.fwh_data_recycle import FwhGroupPublishMonitor
- from tasks.monitor_tasks.kimi_balance_monitor import check_kimi_balance
- from tasks.monitor_tasks.outside_server_accounts_monitor import (
- run_outside_server_accounts_monitor,
- )
- from tasks.publish_tasks.top_article_generalize import (
- TopArticleGeneralizeFromArticlePool,
- )
- class CrawlerTasks:
- @classmethod
- def run_piaoquan_video_crawler(cls):
- crawler = CrawlerPiaoQuanVideos()
- crawler.deal()
- @classmethod
- def run_sohu_video_crawler(cls):
- # step1, crawl sohu hot videos
- crawler_sohu_hot_videos = CrawlerSohuHotVideos()
- crawler_sohu_hot_videos.deal()
- # step2, crawl sohu recommend videos
- crawler_sohu_recommend_videos = CrawlerSohuRecommendVideos()
- crawler_sohu_recommend_videos.deal()
- @classmethod
- def run_sph_video_crawler(cls):
- crawler_channel_account_videos = CrawlerChannelAccountVideos()
- crawler_channel_account_videos.deal()
- @classmethod
- def crawler_gzh_meta_videos(cls):
- task = CrawlerGzhMetaVideos()
- task.deal()
- @classmethod
- def run_toutiao_video_crawler(cls):
- crawler = CrawlerToutiaoAccountVideos()
- crawler.deal()
- @classmethod
- def run_baidu_video_crawler(cls):
- task = BaiduVideoCrawler()
- task.deal()
- def run_fwh_data_manager():
- # 1. 从 aigc 获取数据
- fwh_group_publish_record_manager = FwhGroupPublishRecordManager()
- fwh_group_publish_record_manager.deal()
- # 2. 监测报警
- fwh_group_publish_monitor = FwhGroupPublishMonitor()
- fwh_group_publish_monitor.deal()
- # 3. 保存数据到数据库
- save_fwh_data_to_database = SaveFwhDataToDatabase()
- save_fwh_data_to_database.deal()
- def run_top_article_generalize_from_article_pool():
- task = TopArticleGeneralizeFromArticlePool()
- task.deal()
- def run_get_off_videos():
- GetOffVideos().deal()
- def main():
- """
- run long_articles_job
- """
- crawler = CrawlerTasks()
- parser = ArgumentParser()
- parser.add_argument("--task_name", help="which task you want to run")
- parser.add_argument("--run_date", help="task specify run date")
- args = parser.parse_args()
- task_name = args.task_name
- if task_name is None:
- print("task_name cannot be None")
- return
- else:
- match task_name:
- case "run_piaoquan_video_crawler":
- crawler.run_piaoquan_video_crawler()
- case "run_sohu_video_crawler":
- crawler.run_sohu_video_crawler()
- case "run_sph_video_crawler":
- crawler.run_sph_video_crawler()
- case "crawler_gzh_meta_videos":
- crawler.crawler_gzh_meta_videos()
- case "run_toutiao_video_crawler":
- crawler.run_toutiao_video_crawler()
- case "run_baidu_video_crawler":
- crawler.run_baidu_video_crawler()
- case "run_check_kimi_balance":
- check_kimi_balance()
- case "run_fwh_data_manager":
- run_fwh_data_manager()
- case "run_title_similarity_task":
- run_title_similarity_task()
- case "top_article_generalize":
- run_top_article_generalize_from_article_pool()
- case "run_get_off_videos":
- run_get_off_videos()
- case "run_outside_server_accounts_monitor":
- run_outside_server_accounts_monitor()
- case _:
- print("task_name cannot be None")
- if __name__ == "__main__":
- main()
|