ソースを参照

从 Mysql 读取Rule_dict

罗俊辉 1 年間 前
コミット
4ad0d31633

+ 16 - 3
common/public.py

@@ -10,10 +10,13 @@ import os, sys, jieba
 import time
 import random
 import difflib
+
 sys.path.append(os.getcwd())
 from common.common import Common
 from common.feishu import Feishu
 from common.scheduling_db import MysqlHelper
+
+
 # from common import Common
 # from feishu import Feishu
 # from scheduling_db import MysqlHelper
@@ -28,6 +31,7 @@ def get_user_from_mysql(log_type, crawler, source, env, action=''):
         Common.logger(log_type, crawler).warning(f"爬虫:{crawler},没有查到抓取名单")
         return []
 
+
 def similarity(title1, title2):
     # 分词
     seg1 = jieba.lcut(title1)
@@ -41,6 +45,7 @@ def similarity(title1, title2):
     similar = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0]
     return similar
 
+
 def title_like(log_type, crawler, platform, title, env):
     """
     标题相似度
@@ -103,6 +108,13 @@ def get_config_from_mysql(log_type, source, env, text, action=''):
         return search_word_list
 
 
+def get_rule_from_mysql(task_id, log_type, crawler, env):
+    select_rule_sql = f"""select rule from crawler_task_v3 where id={task_id}"""
+    rule_list = MysqlHelper.get_values(log_type, crawler, select_rule_sql, env, action="")
+    # print(rule_list)
+    return rule_list
+
+
 def random_title(log_type, crawler, env, text):
     random_title_list = get_config_from_mysql(log_type, crawler, env, text)
     return random.choice(random_title_list)
@@ -203,10 +215,12 @@ def download_rule(log_type, crawler, video_dict, rule_dict):
         for rule_key, rule_value in rule_dict.items():
             if video_key == rule_key == "period":
                 result = 0 <= int(video_value) <= int(rule_value["max"])
-                Common.logger(log_type, crawler).info(f'{video_key}: 0 <= {video_value} <= {rule_value["min"]}, {result}')
+                Common.logger(log_type, crawler).info(
+                    f'{video_key}: 0 <= {video_value} <= {rule_value["min"]}, {result}')
             elif video_key == rule_key:
                 result = int(rule_value["min"]) <= int(video_value) <= int(rule_value["max"])
-                Common.logger(log_type, crawler).info(f'{video_key}: {rule_value["min"]} <= {video_value} <= {rule_value["max"]},{result}')
+                Common.logger(log_type, crawler).info(
+                    f'{video_key}: {rule_value["min"]} <= {video_value} <= {rule_value["max"]},{result}')
             else:
                 result = True
 
@@ -293,7 +307,6 @@ def task_unbind(log_type, crawler, taskid, uids, env):
         return response.text
 
 
-
 if __name__ == "__main__":
     print(get_title_score("recommend", "kuaishou", "16QspO", "0usaDk", '像梦一场'))
     pass

+ 9 - 8
xiaoniangaoplus/xiaoniangaoplus_main/run_xngplus_recommend.py

@@ -9,7 +9,7 @@ from mq_http_sdk.mq_exception import MQExceptionBase
 import multiprocessing
 
 sys.path.append(os.getcwd())
-from common.public import get_consumer, ack_message, task_fun_mq
+from common.public import get_consumer, ack_message, task_fun_mq, get_rule_from_mysql
 from common.common import Common
 from common.scheduling_db import MysqlHelper
 # from xiaoniangaoplus.xiaoniangaoplus.xiaoniangao_plus_scheduling import XiaoNianGaoPlusRecommend
@@ -100,7 +100,8 @@ def main(log_type, crawler, topic_name, group_id, env):
                         process.terminate()
                         os.system("adb forward --remove-all")
                         time.sleep(60)
-                        process = multiprocessing.Process(target=run, args=(log_type, crawler, env, rule_dict, our_uid))
+                        new_r = get_rule_from_mysql(task_id=task_id, log_type=log_type, crawler=crawler, env=env)
+                        process = multiprocessing.Process(target=run, args=(log_type, crawler, env, new_r, our_uid))
                         process.start()
                     time.sleep(60)
                 # XiaoNianGaoPlusRecommend.start_wechat(log_type=log_type,
@@ -109,12 +110,12 @@ def main(log_type, crawler, topic_name, group_id, env):
                 #                                       our_uid=our_uid,
                 #                                       env=env)
                 # Common.del_logs(log_type, crawler)
-                Common.logger(log_type, crawler).info('抓取一轮结束\n')
-                Common.logging(log_type, crawler, env, '抓取一轮结束\n')
-                xng_play_end_time = int(time.time())
-                xng_play_duration = xng_play_start_time - xng_play_end_time
-                Common.logger(log_type, crawler).info(f"duration {xng_play_duration}")
-                Common.logging(log_type, crawler, env, f"duration {xng_play_duration}")
+                # Common.logger(log_type, crawler).info('抓取一轮结束\n')
+                # Common.logging(log_type, crawler, env, '抓取一轮结束\n')
+                # xng_play_end_time = int(time.time())
+                # xng_play_duration = xng_play_start_time - xng_play_end_time
+                # Common.logger(log_type, crawler).info(f"duration {xng_play_duration}")
+                # Common.logging(log_type, crawler, env, f"duration {xng_play_duration}")
         except MQExceptionBase as err:
             # Topic中没有消息可消费。
             if err.type == "MessageNotExist":