|
@@ -1,16 +1,22 @@
|
|
|
from argparse import ArgumentParser
|
|
|
|
|
|
+from cold_start.crawler.baidu import BaiduVideoCrawler
|
|
|
+from tasks.ai_tasks import run_title_similarity_task
|
|
|
from tasks.crawler_tasks.crawler_video.crawler_piaoquan_videos import (
|
|
|
CrawlerPiaoQuanVideos,
|
|
|
)
|
|
|
-from tasks.crawler_tasks.crawler_video.crawler_sohu_videos import CrawlerSohuHotVideos
|
|
|
+from tasks.crawler_tasks.crawler_video.crawler_toutiao_videos import (
|
|
|
+ CrawlerToutiaoAccountVideos,
|
|
|
+)
|
|
|
from tasks.crawler_tasks.crawler_video.crawler_sohu_videos import (
|
|
|
CrawlerSohuRecommendVideos,
|
|
|
+ CrawlerSohuHotVideos,
|
|
|
)
|
|
|
from tasks.crawler_tasks.crawler_video.crawler_sph_videos import (
|
|
|
CrawlerChannelAccountVideos,
|
|
|
)
|
|
|
from tasks.crawler_tasks.crawler_video.crawler_gzh_videos import CrawlerGzhMetaVideos
|
|
|
+
|
|
|
from tasks.data_tasks.fwh_data_recycle import FwhGroupPublishRecordManager
|
|
|
from tasks.data_tasks.fwh_data_recycle import SaveFwhDataToDatabase
|
|
|
from tasks.data_tasks.fwh_data_recycle import FwhGroupPublishMonitor
|
|
@@ -20,24 +26,41 @@ from tasks.publish_tasks.top_article_generalize import (
|
|
|
)
|
|
|
|
|
|
|
|
|
-def run_piaoquan_video_crawler():
|
|
|
- crawler = CrawlerPiaoQuanVideos()
|
|
|
- crawler.deal()
|
|
|
+class CrawlerTasks:
|
|
|
+ @classmethod
|
|
|
+ def run_piaoquan_video_crawler(cls):
|
|
|
+ crawler = CrawlerPiaoQuanVideos()
|
|
|
+ crawler.deal()
|
|
|
+
|
|
|
+ @classmethod
|
|
|
+ def run_sohu_video_crawler(cls):
|
|
|
+ # step1, crawl sohu hot videos
|
|
|
+ crawler_sohu_hot_videos = CrawlerSohuHotVideos()
|
|
|
+ crawler_sohu_hot_videos.deal()
|
|
|
|
|
|
+ # step2, crawl sohu recommend videos
|
|
|
+ crawler_sohu_recommend_videos = CrawlerSohuRecommendVideos()
|
|
|
+ crawler_sohu_recommend_videos.deal()
|
|
|
|
|
|
-def run_sohu_video_crawler():
|
|
|
- # step1, crawl sohu hot videos
|
|
|
- crawler_sohu_hot_videos = CrawlerSohuHotVideos()
|
|
|
- crawler_sohu_hot_videos.deal()
|
|
|
+ @classmethod
|
|
|
+ def run_sph_video_crawler(cls):
|
|
|
+ crawler_channel_account_videos = CrawlerChannelAccountVideos()
|
|
|
+ crawler_channel_account_videos.deal()
|
|
|
|
|
|
- # step2, crawl sohu recommend videos
|
|
|
- crawler_sohu_recommend_videos = CrawlerSohuRecommendVideos()
|
|
|
- crawler_sohu_recommend_videos.deal()
|
|
|
+ @classmethod
|
|
|
+ def crawler_gzh_meta_videos(cls):
|
|
|
+ task = CrawlerGzhMetaVideos()
|
|
|
+ task.deal()
|
|
|
|
|
|
+ @classmethod
|
|
|
+ def run_toutiao_video_crawler(cls):
|
|
|
+ crawler = CrawlerToutiaoAccountVideos()
|
|
|
+ crawler.deal()
|
|
|
|
|
|
-def run_sph_video_crawler():
|
|
|
- crawler_channel_account_videos = CrawlerChannelAccountVideos()
|
|
|
- crawler_channel_account_videos.deal()
|
|
|
+ @classmethod
|
|
|
+ def run_baidu_video_crawler(cls):
|
|
|
+ task = BaiduVideoCrawler()
|
|
|
+ task.deal()
|
|
|
|
|
|
|
|
|
def run_fwh_data_manager():
|
|
@@ -59,15 +82,11 @@ def run_top_article_generalize_from_article_pool():
|
|
|
task.deal()
|
|
|
|
|
|
|
|
|
-def crawler_gzh_meta_videos():
|
|
|
- task = CrawlerGzhMetaVideos()
|
|
|
- task.deal()
|
|
|
-
|
|
|
-
|
|
|
def main():
|
|
|
"""
|
|
|
run long_articles_job
|
|
|
"""
|
|
|
+ crawler = CrawlerTasks()
|
|
|
parser = ArgumentParser()
|
|
|
parser.add_argument("--task_name", help="which task you want to run")
|
|
|
parser.add_argument("--run_date", help="task specify run date")
|
|
@@ -81,19 +100,25 @@ def main():
|
|
|
else:
|
|
|
match task_name:
|
|
|
case "run_piaoquan_video_crawler":
|
|
|
- run_piaoquan_video_crawler()
|
|
|
+ crawler.run_piaoquan_video_crawler()
|
|
|
case "run_sohu_video_crawler":
|
|
|
- run_sohu_video_crawler()
|
|
|
+ crawler.run_sohu_video_crawler()
|
|
|
+ case "run_sph_video_crawler":
|
|
|
+ crawler.run_sph_video_crawler()
|
|
|
+ case "crawler_gzh_meta_videos":
|
|
|
+ crawler.crawler_gzh_meta_videos()
|
|
|
+ case "run_toutiao_video_crawler":
|
|
|
+ crawler.run_toutiao_video_crawler()
|
|
|
+ case "run_baidu_video_crawler":
|
|
|
+ crawler.run_baidu_video_crawler()
|
|
|
case "run_check_kimi_balance":
|
|
|
check_kimi_balance()
|
|
|
case "run_fwh_data_manager":
|
|
|
run_fwh_data_manager()
|
|
|
- case "run_sph_video_crawler":
|
|
|
- run_sph_video_crawler()
|
|
|
+ case "run_title_similarity_task":
|
|
|
+ run_title_similarity_task()
|
|
|
case "top_article_generalize":
|
|
|
run_top_article_generalize_from_article_pool()
|
|
|
- case "crawler_gzh_meta_videos":
|
|
|
- crawler_gzh_meta_videos()
|
|
|
case _:
|
|
|
print("task_name cannot be None")
|
|
|
|