|
@@ -3,14 +3,17 @@
|
|
|
"""
|
|
|
from datetime import datetime
|
|
|
|
|
|
+from applications.feishu import bot
|
|
|
+from applications.const import server_const
|
|
|
from applications.functions.video_item import VideoProducer
|
|
|
from applications.log import logging
|
|
|
-from applications.match_algorithm.rank import title_similarity_rank
|
|
|
+from applications.match_algorithm import title_similarity_with_nlp
|
|
|
from .spiderAB import SearchABTest
|
|
|
from .spiderSchedule import SearchMethod
|
|
|
|
|
|
|
|
|
-async def save_video_to_mysql(video_obj, user, trace_id, platform, content_id, crawler_video_table, db_client, similarity_score):
|
|
|
+async def save_video_to_mysql(video_obj, user, trace_id, platform, content_id, crawler_video_table, db_client,
|
|
|
+ similarity_score):
|
|
|
"""
|
|
|
异步处理微信 video_obj
|
|
|
公众号和站内账号一一对应
|
|
@@ -94,7 +97,7 @@ async def search_videos_from_web(info, gh_id_map, db_client):
|
|
|
trace_id = info['trace_id']
|
|
|
gh_id = info['gh_id']
|
|
|
content_id = info['content_id']
|
|
|
- recall_list = await search_AB.ab_5()
|
|
|
+ recall_list = await search_AB.ab_6()
|
|
|
logging(
|
|
|
code="1006",
|
|
|
info="搜索到{}条视频".format(len(recall_list)),
|
|
@@ -102,12 +105,28 @@ async def search_videos_from_web(info, gh_id_map, db_client):
|
|
|
trace_id=info['trace_id']
|
|
|
)
|
|
|
# 按照标题相似度排序
|
|
|
- ranked_list = title_similarity_rank(content_title=info['ori_title'].split("@@")[-1], recall_list=recall_list)
|
|
|
+ ranked_result = await title_similarity_with_nlp(content_title=info['ori_title'].split("@@")[-1],
|
|
|
+ recall_list=recall_list)
|
|
|
+ ranked_list = ranked_result['result']
|
|
|
+ if recall_list and not ranked_list:
|
|
|
+ bot(
|
|
|
+ title="NLP服务请求失败",
|
|
|
+ detail={
|
|
|
+ "trace_id": info['trace_id']
|
|
|
+ },
|
|
|
+ mention=False
|
|
|
+ )
|
|
|
+
|
|
|
for recall_obj in ranked_list:
|
|
|
if recall_obj:
|
|
|
platform = recall_obj['platform']
|
|
|
recall_video = recall_obj['result']
|
|
|
score = recall_obj['score']
|
|
|
+
|
|
|
+ # 过滤掉nlp分低于0.45的
|
|
|
+ if score < server_const.NLP_SIMILARITY_THRESHOLD:
|
|
|
+ continue
|
|
|
+
|
|
|
if recall_video:
|
|
|
await save_video_to_mysql(
|
|
|
video_obj=recall_video,
|
|
@@ -119,4 +138,4 @@ async def search_videos_from_web(info, gh_id_map, db_client):
|
|
|
db_client=db_client,
|
|
|
similarity_score=score
|
|
|
)
|
|
|
- return len(ranked_list)
|
|
|
+ return len(ranked_list)
|