wangkun před 2 roky
rodič
revize
c02ca0e5a9

+ 0 - 1
gongzhonghao/gongzhonghao_author/gongzhonghao_author.py

@@ -307,7 +307,6 @@ class GongzhonghaoAuthor:
                             video_dict["crawler_rule"] = json.dumps(rule_dict)
                             video_dict["user_id"] = user_dict["uid"]  # 站内 UID?爬虫获取不到了(随机发布到原 5 个账号中)
                             video_dict["publish_time"] = video_dict["publish_time_str"]
-
                             mq.send_msg(video_dict)
                     except Exception as e:
                         Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")

+ 5 - 1
gongzhonghao/gongzhonghao_main/run_gzh_author.py

@@ -71,18 +71,22 @@ def main(log_type, crawler, topic_name, group_id, env):
                 # ack_mq_message
                 ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
 
-                # 处理爬虫业务
+                # 解析 task_dict
                 task_dict = task_fun_mq(msg.message_body)['task_dict']
                 Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
                 Common.logging(log_type, crawler, env, f"调度任务:{task_dict}")
 
+                # 解析 rule_dict
                 rule_dict = task_fun_mq(msg.message_body)['rule_dict']
                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}\n")
                 Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}\n")
 
+                # 解析 user_list
                 task_id = task_dict['id']
                 select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
                 user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
+
+                # 计算启动脚本数 crawler_num
                 user_num = len(user_list)
                 chunk_size = 100  # 每个进程处理的用户数量
                 crawler_num = int(user_num // chunk_size)  # 向下取整

+ 1 - 1
xigua/xigua_main/run_xg_search.py

@@ -9,7 +9,7 @@ sys.path.append(os.getcwd())
 from common.common import Common
 from common.public import get_consumer, ack_message, task_fun_mq
 from common.scheduling_db import MysqlHelper
-from xigua.xigua_search.xigua_search_scheduling import XiguasearchScheduling
+from xigua.xigua_search.xigua_search_scheduling0628 import XiguasearchScheduling
 
 
 def main(log_type, crawler, topic_name, group_id, env):

+ 2 - 1
xigua/xigua_search/xigua_search_scheduling0628.py

@@ -23,7 +23,7 @@ from common.scheduling_db import MysqlHelper
 from common.common import Common
 from common.feishu import Feishu
 from common.publish import Publish
-from common.public import get_config_from_mysql, download_rule, get_title_score
+from common.public import get_config_from_mysql, download_rule
 from common.userAgent import get_random_user_agent
 
 
@@ -686,6 +686,7 @@ class XiguasearchScheduling:
                         video_dict["strategy_type"] = log_type
                         mq.send_msg(video_dict)
                         cls.download_cnt += 1
+                        Common.logging(log_type, crawler, env, "已下载视频数+1")
 
                 except Exception as e:
                     Common.logger(log_type, crawler).warning(f"抓取单条视频异常:{e}\n")