| 
														
															@@ -3,14 +3,17 @@ 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 """ 
														 | 
														
														 | 
														
															 """ 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from datetime import datetime 
														 | 
														
														 | 
														
															 from datetime import datetime 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+from applications.feishu import bot 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+from applications.const import server_const 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from applications.functions.video_item import VideoProducer 
														 | 
														
														 | 
														
															 from applications.functions.video_item import VideoProducer 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from applications.log import logging 
														 | 
														
														 | 
														
															 from applications.log import logging 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-from applications.match_algorithm.rank import title_similarity_rank 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+from applications.match_algorithm import title_similarity_with_nlp 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from .spiderAB import SearchABTest 
														 | 
														
														 | 
														
															 from .spiderAB import SearchABTest 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from .spiderSchedule import SearchMethod 
														 | 
														
														 | 
														
															 from .spiderSchedule import SearchMethod 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															-async def save_video_to_mysql(video_obj, user, trace_id, platform, content_id, crawler_video_table, db_client, similarity_score): 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+async def save_video_to_mysql(video_obj, user, trace_id, platform, content_id, crawler_video_table, db_client, 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+                              similarity_score): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     """ 
														 | 
														
														 | 
														
															     """ 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     异步处理微信 video_obj 
														 | 
														
														 | 
														
															     异步处理微信 video_obj 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     公众号和站内账号一一对应 
														 | 
														
														 | 
														
															     公众号和站内账号一一对应 
														 | 
													
												
											
										
											
												
													
														 | 
														
															@@ -94,7 +97,7 @@ async def search_videos_from_web(info, gh_id_map, db_client): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     trace_id = info['trace_id'] 
														 | 
														
														 | 
														
															     trace_id = info['trace_id'] 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     gh_id = info['gh_id'] 
														 | 
														
														 | 
														
															     gh_id = info['gh_id'] 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     content_id = info['content_id'] 
														 | 
														
														 | 
														
															     content_id = info['content_id'] 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    recall_list = await search_AB.ab_5() 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    recall_list = await search_AB.ab_6() 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     logging( 
														 | 
														
														 | 
														
															     logging( 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         code="1006", 
														 | 
														
														 | 
														
															         code="1006", 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         info="搜索到{}条视频".format(len(recall_list)), 
														 | 
														
														 | 
														
															         info="搜索到{}条视频".format(len(recall_list)), 
														 | 
													
												
											
										
											
												
													
														 | 
														
															@@ -102,12 +105,28 @@ async def search_videos_from_web(info, gh_id_map, db_client): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         trace_id=info['trace_id'] 
														 | 
														
														 | 
														
															         trace_id=info['trace_id'] 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     ) 
														 | 
														
														 | 
														
															     ) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     # 按照标题相似度排序 
														 | 
														
														 | 
														
															     # 按照标题相似度排序 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    ranked_list = title_similarity_rank(content_title=info['ori_title'].split("@@")[-1], recall_list=recall_list) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    ranked_result = await title_similarity_with_nlp(content_title=info['ori_title'].split("@@")[-1], 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+                                                    recall_list=recall_list) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    ranked_list = ranked_result['result'] 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    if recall_list and not ranked_list: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        bot( 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+            title="NLP服务请求失败", 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+            detail={ 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+                "trace_id": info['trace_id'] 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+            }, 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+            mention=False 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        ) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+ 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     for recall_obj in ranked_list: 
														 | 
														
														 | 
														
															     for recall_obj in ranked_list: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         if recall_obj: 
														 | 
														
														 | 
														
															         if recall_obj: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															             platform = recall_obj['platform'] 
														 | 
														
														 | 
														
															             platform = recall_obj['platform'] 
														 | 
													
												
											
												
													
														| 
														 | 
														
															             recall_video = recall_obj['result'] 
														 | 
														
														 | 
														
															             recall_video = recall_obj['result'] 
														 | 
													
												
											
												
													
														| 
														 | 
														
															             score = recall_obj['score'] 
														 | 
														
														 | 
														
															             score = recall_obj['score'] 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+ 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+            # 过滤掉nlp分低于0.45的 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+            if score < server_const.NLP_SIMILARITY_THRESHOLD: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+                continue 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+ 
														 | 
													
												
											
												
													
														| 
														 | 
														
															             if recall_video: 
														 | 
														
														 | 
														
															             if recall_video: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															                 await save_video_to_mysql( 
														 | 
														
														 | 
														
															                 await save_video_to_mysql( 
														 | 
													
												
											
												
													
														| 
														 | 
														
															                     video_obj=recall_video, 
														 | 
														
														 | 
														
															                     video_obj=recall_video, 
														 | 
													
												
											
										
											
												
													
														 | 
														
															@@ -119,4 +138,4 @@ async def search_videos_from_web(info, gh_id_map, db_client): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															                     db_client=db_client, 
														 | 
														
														 | 
														
															                     db_client=db_client, 
														 | 
													
												
											
												
													
														| 
														 | 
														
															                     similarity_score=score 
														 | 
														
														 | 
														
															                     similarity_score=score 
														 | 
													
												
											
												
													
														| 
														 | 
														
															                 ) 
														 | 
														
														 | 
														
															                 ) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    return len(ranked_list) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    return len(ranked_list) 
														 |