|
@@ -1,6 +1,6 @@
|
|
|
# -*- coding: utf-8 -*-
|
|
|
-# @Author:
|
|
|
-# @Time: 2023/9/7
|
|
|
+# @Author: luojunhui
|
|
|
+# @Time: 2023/10/18
|
|
|
import argparse
|
|
|
import random
|
|
|
import multiprocessing
|
|
@@ -9,21 +9,19 @@ from mq_http_sdk.mq_client import *
|
|
|
from mq_http_sdk.mq_consumer import *
|
|
|
from mq_http_sdk.mq_exception import MQExceptionBase
|
|
|
|
|
|
-
|
|
|
sys.path.append(os.getcwd())
|
|
|
from common.common import Common
|
|
|
from common.public import get_consumer, ack_message, task_fun_mq, get_rule_from_mysql
|
|
|
from common.scheduling_db import MysqlHelper
|
|
|
-from huanhuanxixizhufudao.huanhuanxixizhufudao_recommend.huanhuanxixizhufudao_recommend import HhxxzfdRecommend
|
|
|
-
|
|
|
+from huanhuanxixizhufudao.huanhuanxixizhufudao_recommend import HHXXZFDScheduling
|
|
|
|
|
|
|
|
|
def run(args1, args2, args3, args4, args5):
|
|
|
- HhxxzfdRecommend.start_wechat(log_type=args1,
|
|
|
- crawler=args2,
|
|
|
- rule_dict=args3,
|
|
|
- our_uid=args4,
|
|
|
- env=args5)
|
|
|
+ HHXXZFDScheduling(log_type=args1,
|
|
|
+ crawler=args2,
|
|
|
+ rule_dict=args3,
|
|
|
+ our_uid=args4,
|
|
|
+ env=args5)
|
|
|
|
|
|
|
|
|
class HhxxzfdMain:
|
|
@@ -96,40 +94,21 @@ class HhxxzfdMain:
|
|
|
r_d[k] = val
|
|
|
Common.logger(log_type, crawler).info(f"抓取规则:{r_d}")
|
|
|
Common.logging(log_type, crawler, env, f"抓取规则:{r_d}")
|
|
|
- process = multiprocessing.Process(
|
|
|
- target=run,
|
|
|
- args=(log_type, crawler, rule_dict, our_uid, env)
|
|
|
+ # 初始化
|
|
|
+ HHXXZFD = HHXXZFDScheduling(
|
|
|
+ log_type=log_type,
|
|
|
+ crawler=crawler,
|
|
|
+ env=env,
|
|
|
+ rule_dict=r_d,
|
|
|
+ our_uid=our_uid
|
|
|
)
|
|
|
- process.start()
|
|
|
- print("进程开始")
|
|
|
-
|
|
|
- for i in range(10):
|
|
|
- if not process.is_alive():
|
|
|
-
|
|
|
- print("进程异常,准备重启")
|
|
|
- process.terminate()
|
|
|
- os.system("adb forward --remove-all")
|
|
|
+ for i in range(20):
|
|
|
+ if HHXXZFD.download_count >= int(rule_dict.get("videos_cnt", {}).get("min", 10)):
|
|
|
+ HHXXZFD.download_count = 0
|
|
|
+ break
|
|
|
+ else:
|
|
|
+ HHXXZFD.get_videoList(page_id=i + 1, page_limit=10)
|
|
|
time.sleep(60)
|
|
|
- new_r = get_rule_from_mysql(task_id=task_id, log_type=log_type, crawler=crawler, env=env)
|
|
|
- r_d = {}
|
|
|
- for item in new_r:
|
|
|
- for k, val in item.items():
|
|
|
- r_d[k] = val
|
|
|
- Common.logger(log_type, crawler).info(f"抓取规则:{r_d}")
|
|
|
- Common.logging(log_type, crawler, env, f"抓取规则:{r_d}")
|
|
|
- process = multiprocessing.Process(
|
|
|
- target=run,
|
|
|
- args=(log_type, crawler, rule_dict, our_uid, env)
|
|
|
- )
|
|
|
- process.start()
|
|
|
- time.sleep(60)
|
|
|
-
|
|
|
- # # 抓取符合规则的视频列表
|
|
|
- # ZFQZRecommend.start_wechat(log_type=log_type,
|
|
|
- # crawler=crawler,
|
|
|
- # rule_dict=rule_dict,
|
|
|
- # our_uid=our_uid,
|
|
|
- # env=env)
|
|
|
Common.logger(log_type, crawler).info('抓取一轮结束\n')
|
|
|
Common.logging(log_type, crawler, env, '抓取一轮结束\n')
|
|
|
|
|
@@ -155,7 +134,7 @@ if __name__ == "__main__":
|
|
|
parser.add_argument('--env') ## 添加参数
|
|
|
args = parser.parse_args() ### 参数赋值,也可以通过终端赋值
|
|
|
HhxxzfdMain.main(log_type=args.log_type,
|
|
|
- crawler=args.crawler,
|
|
|
- topic_name=args.topic_name,
|
|
|
- group_id=args.group_id,
|
|
|
- env=args.env)
|
|
|
+ crawler=args.crawler,
|
|
|
+ topic_name=args.topic_name,
|
|
|
+ group_id=args.group_id,
|
|
|
+ env=args.env)
|