|
@@ -3,9 +3,10 @@
|
|
|
"""
|
|
|
from datetime import datetime
|
|
|
|
|
|
+from applications.const import server_const
|
|
|
from applications.functions.video_item import VideoProducer
|
|
|
from applications.log import logging
|
|
|
-from applications.match_algorithm.rank import title_similarity_rank
|
|
|
+from applications.match_algorithm import title_similarity_with_nlp
|
|
|
from .spiderAB import SearchABTest
|
|
|
from .spiderSchedule import SearchMethod
|
|
|
|
|
@@ -94,7 +95,7 @@ async def search_videos_from_web(info, gh_id_map, db_client):
|
|
|
trace_id = info['trace_id']
|
|
|
gh_id = info['gh_id']
|
|
|
content_id = info['content_id']
|
|
|
- recall_list = await search_AB.ab_5()
|
|
|
+ recall_list = await search_AB.ab_6()
|
|
|
logging(
|
|
|
code="1006",
|
|
|
info="搜索到{}条视频".format(len(recall_list)),
|
|
@@ -102,12 +103,22 @@ async def search_videos_from_web(info, gh_id_map, db_client):
|
|
|
trace_id=info['trace_id']
|
|
|
)
|
|
|
# 按照标题相似度排序
|
|
|
- ranked_list = title_similarity_rank(content_title=info['ori_title'].split("@@")[-1], recall_list=recall_list)
|
|
|
+ ranked_result = await title_similarity_with_nlp(content_title=info['ori_title'].split("@@")[-1], recall_list=recall_list)
|
|
|
+ rank_alg = ranked_result['alg']
|
|
|
+ ranked_list = ranked_result['result']
|
|
|
for recall_obj in ranked_list:
|
|
|
if recall_obj:
|
|
|
platform = recall_obj['platform']
|
|
|
recall_video = recall_obj['result']
|
|
|
score = recall_obj['score']
|
|
|
+ # 过滤掉jcd分数为0的
|
|
|
+ if rank_alg == 'jcd' and score == server_const.JCD_SIMILARITY_THRESHOLD:
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 过滤掉nlp分低于0.3的
|
|
|
+ if rank_alg == 'nlp' and score < server_const.NLP_SIMILARITY_THRESHOLD:
|
|
|
+ continue
|
|
|
+
|
|
|
if recall_video:
|
|
|
await save_video_to_mysql(
|
|
|
video_obj=recall_video,
|