| 
					
				 | 
			
			
				@@ -3,14 +3,17 @@ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from datetime import datetime 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from applications.feishu import bot 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from applications.const import server_const 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from applications.functions.video_item import VideoProducer 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from applications.log import logging 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-from applications.match_algorithm.rank import title_similarity_rank 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from applications.match_algorithm import title_similarity_with_nlp 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from .spiderAB import SearchABTest 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from .spiderSchedule import SearchMethod 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-async def save_video_to_mysql(video_obj, user, trace_id, platform, content_id, crawler_video_table, db_client, similarity_score): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+async def save_video_to_mysql(video_obj, user, trace_id, platform, content_id, crawler_video_table, db_client, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                              similarity_score): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     异步处理微信 video_obj 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     公众号和站内账号一一对应 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -94,7 +97,7 @@ async def search_videos_from_web(info, gh_id_map, db_client): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     trace_id = info['trace_id'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     gh_id = info['gh_id'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     content_id = info['content_id'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    recall_list = await search_AB.ab_5() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    recall_list = await search_AB.ab_6() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     logging( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         code="1006", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         info="搜索到{}条视频".format(len(recall_list)), 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -102,12 +105,28 @@ async def search_videos_from_web(info, gh_id_map, db_client): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         trace_id=info['trace_id'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # 按照标题相似度排序 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    ranked_list = title_similarity_rank(content_title=info['ori_title'].split("@@")[-1], recall_list=recall_list) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    ranked_result = await title_similarity_with_nlp(content_title=info['ori_title'].split("@@")[-1], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                                    recall_list=recall_list) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    ranked_list = ranked_result['result'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if recall_list and not ranked_list: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        bot( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            title="NLP服务请求失败", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            detail={ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                "trace_id": info['trace_id'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            }, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            mention=False 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     for recall_obj in ranked_list: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if recall_obj: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             platform = recall_obj['platform'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             recall_video = recall_obj['result'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             score = recall_obj['score'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            # 过滤掉nlp分低于0.45的 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if score < server_const.NLP_SIMILARITY_THRESHOLD: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             if recall_video: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 await save_video_to_mysql( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     video_obj=recall_video, 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -119,4 +138,4 @@ async def search_videos_from_web(info, gh_id_map, db_client): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     db_client=db_client, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     similarity_score=score 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    return len(ranked_list) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return len(ranked_list) 
			 |