|
@@ -3,6 +3,7 @@
|
|
|
"""
|
|
|
from datetime import datetime
|
|
|
|
|
|
+from applications.feishu import bot
|
|
|
from applications.const import server_const
|
|
|
from applications.functions.video_item import VideoProducer
|
|
|
from applications.log import logging
|
|
@@ -11,7 +12,8 @@ from .spiderAB import SearchABTest
|
|
|
from .spiderSchedule import SearchMethod
|
|
|
|
|
|
|
|
|
-async def save_video_to_mysql(video_obj, user, trace_id, platform, content_id, crawler_video_table, db_client, similarity_score):
|
|
|
+async def save_video_to_mysql(video_obj, user, trace_id, platform, content_id, crawler_video_table, db_client,
|
|
|
+ similarity_score):
|
|
|
"""
|
|
|
异步处理微信 video_obj
|
|
|
公众号和站内账号一一对应
|
|
@@ -103,20 +105,26 @@ async def search_videos_from_web(info, gh_id_map, db_client):
|
|
|
trace_id=info['trace_id']
|
|
|
)
|
|
|
# 按照标题相似度排序
|
|
|
- ranked_result = await title_similarity_with_nlp(content_title=info['ori_title'].split("@@")[-1], recall_list=recall_list)
|
|
|
- rank_alg = ranked_result['alg']
|
|
|
+ ranked_result = await title_similarity_with_nlp(content_title=info['ori_title'].split("@@")[-1],
|
|
|
+ recall_list=recall_list)
|
|
|
ranked_list = ranked_result['result']
|
|
|
+ if recall_list and not ranked_list:
|
|
|
+ bot(
|
|
|
+ title="NLP服务请求失败",
|
|
|
+ detail={
|
|
|
+ "trace_id": info['trace_id']
|
|
|
+ },
|
|
|
+ mention=False
|
|
|
+ )
|
|
|
+
|
|
|
for recall_obj in ranked_list:
|
|
|
if recall_obj:
|
|
|
platform = recall_obj['platform']
|
|
|
recall_video = recall_obj['result']
|
|
|
score = recall_obj['score']
|
|
|
- # 过滤掉jcd分数为0的
|
|
|
- if rank_alg == 'jcd' and score == server_const.JCD_SIMILARITY_THRESHOLD:
|
|
|
- continue
|
|
|
|
|
|
- # 过滤掉nlp分低于0.3的
|
|
|
- if rank_alg == 'nlp' and score < server_const.NLP_SIMILARITY_THRESHOLD:
|
|
|
+ # 过滤掉nlp分低于0.45的
|
|
|
+ if score < server_const.NLP_SIMILARITY_THRESHOLD:
|
|
|
continue
|
|
|
|
|
|
if recall_video:
|
|
@@ -130,4 +138,4 @@ async def search_videos_from_web(info, gh_id_map, db_client):
|
|
|
db_client=db_client,
|
|
|
similarity_score=score
|
|
|
)
|
|
|
- return len(ranked_list)
|
|
|
+ return len(ranked_list)
|