|
@@ -10,10 +10,13 @@ import os, sys, jieba
|
|
|
import time
|
|
|
import random
|
|
|
import difflib
|
|
|
+
|
|
|
sys.path.append(os.getcwd())
|
|
|
from common.common import Common
|
|
|
from common.feishu import Feishu
|
|
|
from common.scheduling_db import MysqlHelper
|
|
|
+
|
|
|
+
|
|
|
# from common import Common
|
|
|
# from feishu import Feishu
|
|
|
# from scheduling_db import MysqlHelper
|
|
@@ -28,6 +31,7 @@ def get_user_from_mysql(log_type, crawler, source, env, action=''):
|
|
|
Common.logger(log_type, crawler).warning(f"爬虫:{crawler},没有查到抓取名单")
|
|
|
return []
|
|
|
|
|
|
+
|
|
|
def similarity(title1, title2):
|
|
|
# 分词
|
|
|
seg1 = jieba.lcut(title1)
|
|
@@ -41,6 +45,7 @@ def similarity(title1, title2):
|
|
|
similar = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0]
|
|
|
return similar
|
|
|
|
|
|
+
|
|
|
def title_like(log_type, crawler, platform, title, env):
|
|
|
"""
|
|
|
标题相似度
|
|
@@ -103,6 +108,13 @@ def get_config_from_mysql(log_type, source, env, text, action=''):
|
|
|
return search_word_list
|
|
|
|
|
|
|
|
|
+def get_rule_from_mysql(task_id, log_type, crawler, env):
|
|
|
+ select_rule_sql = f"""select rule from crawler_task_v3 where id={task_id}"""
|
|
|
+ rule_list = MysqlHelper.get_values(log_type, crawler, select_rule_sql, env, action="")
|
|
|
+ # print(rule_list)
|
|
|
+ return rule_list
|
|
|
+
|
|
|
+
|
|
|
def random_title(log_type, crawler, env, text):
|
|
|
random_title_list = get_config_from_mysql(log_type, crawler, env, text)
|
|
|
return random.choice(random_title_list)
|
|
@@ -203,10 +215,12 @@ def download_rule(log_type, crawler, video_dict, rule_dict):
|
|
|
for rule_key, rule_value in rule_dict.items():
|
|
|
if video_key == rule_key == "period":
|
|
|
result = 0 <= int(video_value) <= int(rule_value["max"])
|
|
|
- Common.logger(log_type, crawler).info(f'{video_key}: 0 <= {video_value} <= {rule_value["min"]}, {result}')
|
|
|
+ Common.logger(log_type, crawler).info(
|
|
|
+ f'{video_key}: 0 <= {video_value} <= {rule_value["min"]}, {result}')
|
|
|
elif video_key == rule_key:
|
|
|
result = int(rule_value["min"]) <= int(video_value) <= int(rule_value["max"])
|
|
|
- Common.logger(log_type, crawler).info(f'{video_key}: {rule_value["min"]} <= {video_value} <= {rule_value["max"]},{result}')
|
|
|
+ Common.logger(log_type, crawler).info(
|
|
|
+ f'{video_key}: {rule_value["min"]} <= {video_value} <= {rule_value["max"]},{result}')
|
|
|
else:
|
|
|
result = True
|
|
|
|
|
@@ -293,7 +307,6 @@ def task_unbind(log_type, crawler, taskid, uids, env):
|
|
|
return response.text
|
|
|
|
|
|
|
|
|
-
|
|
|
if __name__ == "__main__":
|
|
|
print(get_title_score("recommend", "kuaishou", "16QspO", "0usaDk", '像梦一场'))
|
|
|
pass
|