|  | @@ -3,9 +3,10 @@
 | 
	
		
			
				|  |  |  """
 | 
	
		
			
				|  |  |  from datetime import datetime
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +from applications.const import server_const
 | 
	
		
			
				|  |  |  from applications.functions.video_item import VideoProducer
 | 
	
		
			
				|  |  |  from applications.log import logging
 | 
	
		
			
				|  |  | -from applications.match_algorithm.rank import title_similarity_rank
 | 
	
		
			
				|  |  | +from applications.match_algorithm import title_similarity_with_nlp
 | 
	
		
			
				|  |  |  from .spiderAB import SearchABTest
 | 
	
		
			
				|  |  |  from .spiderSchedule import SearchMethod
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -94,7 +95,7 @@ async def search_videos_from_web(info, gh_id_map, db_client):
 | 
	
		
			
				|  |  |      trace_id = info['trace_id']
 | 
	
		
			
				|  |  |      gh_id = info['gh_id']
 | 
	
		
			
				|  |  |      content_id = info['content_id']
 | 
	
		
			
				|  |  | -    recall_list = await search_AB.ab_5()
 | 
	
		
			
				|  |  | +    recall_list = await search_AB.ab_6()
 | 
	
		
			
				|  |  |      logging(
 | 
	
		
			
				|  |  |          code="1006",
 | 
	
		
			
				|  |  |          info="搜索到{}条视频".format(len(recall_list)),
 | 
	
	
		
			
				|  | @@ -102,12 +103,22 @@ async def search_videos_from_web(info, gh_id_map, db_client):
 | 
	
		
			
				|  |  |          trace_id=info['trace_id']
 | 
	
		
			
				|  |  |      )
 | 
	
		
			
				|  |  |      # 按照标题相似度排序
 | 
	
		
			
				|  |  | -    ranked_list = title_similarity_rank(content_title=info['ori_title'].split("@@")[-1], recall_list=recall_list)
 | 
	
		
			
				|  |  | +    ranked_result = await title_similarity_with_nlp(content_title=info['ori_title'].split("@@")[-1], recall_list=recall_list)
 | 
	
		
			
				|  |  | +    rank_alg = ranked_result['alg']
 | 
	
		
			
				|  |  | +    ranked_list = ranked_result['result']
 | 
	
		
			
				|  |  |      for recall_obj in ranked_list:
 | 
	
		
			
				|  |  |          if recall_obj:
 | 
	
		
			
				|  |  |              platform = recall_obj['platform']
 | 
	
		
			
				|  |  |              recall_video = recall_obj['result']
 | 
	
		
			
				|  |  |              score = recall_obj['score']
 | 
	
		
			
				|  |  | +            # 过滤掉jcd分数为0的
 | 
	
		
			
				|  |  | +            if rank_alg == 'jcd' and score == server_const.JCD_SIMILARITY_THRESHOLD:
 | 
	
		
			
				|  |  | +                continue
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +            # 过滤掉nlp分低于0.3的
 | 
	
		
			
				|  |  | +            if rank_alg == 'nlp' and score < server_const.NLP_SIMILARITY_THRESHOLD:
 | 
	
		
			
				|  |  | +                continue
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |              if recall_video:
 | 
	
		
			
				|  |  |                  await save_video_to_mysql(
 | 
	
		
			
				|  |  |                      video_obj=recall_video,
 |