linfan 1 سال پیش
والد
کامیت
dc9a8013c0
4فایلهای تغییر یافته به همراه64 افزوده شده و 62 حذف شده
  1. 37 47
      recommend.py
  2. 10 1
      utils.py
  3. 4 2
      video_rank.py
  4. 13 12
      video_recall.py

+ 37 - 47
recommend.py

@@ -363,6 +363,7 @@ def new_video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
     """
     #1. recall
     result = {}
+    result['rankResult'] =  []
     # ####### 多进程召回
     start_recall = time.time()
 
@@ -380,8 +381,10 @@ def new_video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
         region_code = province_code
     if region_code == '':
         region_code = '-1'
+    
+    print("region_code:", region_code)
 
-    size =1000
+    #size =1000
     pool_recall = PoolRecall(request_id=request_id,
                              app_type=app_type, mid=mid, uid=uid, ab_code=ab_code,
                              client_info=client_info, rule_key=rule_key, data_key=data_key, no_op_flag=no_op_flag,
@@ -389,22 +392,32 @@ def new_video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
     if app_type in [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]:
         t = [gevent.spawn(pool_recall.get_region_hour_recall, size, region_code),
              gevent.spawn(pool_recall.get_region_day_recall, size, region_code),
-             gevent.spawn(pool_recall.get_selected_recall, size),
-             gevent.spawn(pool_recall.get_no_selected_recall, size)
+             gevent.spawn(pool_recall.get_selected_recall, size, region_code),
+             gevent.spawn(pool_recall.get_no_selected_recall, size, region_code)
              ]
     else:
-        t = [gevent.spawn(pool_recall.get_region_hour_recall, size),
-             gevent.spawn(pool_recall.get_region_day_recall, size),
-             gevent.spawn(pool_recall.get_selected_recall, size),
-             gevent.spawn(pool_recall.get_no_selected_recall, size),
+        t = [gevent.spawn(pool_recall.get_region_hour_recall, size, region_code),
+             gevent.spawn(pool_recall.get_region_day_recall, size, region_code),
+             gevent.spawn(pool_recall.get_selected_recall, size, region_code),
+             gevent.spawn(pool_recall.get_no_selected_recall, size, region_code),
              gevent.spawn(pool_recall.flow_pool_recall, size, config_.QUICK_FLOW_POOL_ID),
              gevent.spawn(pool_recall.flow_pool_recall, size),
              gevent.spawn(pool_recall.get_sim_hot_item_reall)]
     gevent.joinall(t)
     # all recall_result
     all_recall_result_list = [i.get() for i in t]
+    all_recall_result = []
     result['recallTime'] = (time.time() - start_recall) * 1000
 
+    if not all_recall_result_list or len(all_recall_result_list)==0:
+        return result
+    for recall_item in all_recall_result_list:
+        if not recall_item or len(recall_item)==0:
+            continue
+        for per_item in recall_item:
+            all_recall_result.append(per_item)
+
+    #print("all_recall_result:", all_recall_result)
     #2. duplicate
     recall_dict = {}
     fast_flow_set = set('')
@@ -414,15 +427,18 @@ def new_video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
     region_day_recall = []
     select_day_recall = []
     no_selected_recall  = []
-    for per_item in all_recall_result_list:
+    for per_item in all_recall_result:
+        #print(per_item)
         vId = per_item.get("videoId",'0')
         if vId=='0':
             continue
         recall_name = per_item.get("pushFrom",'')
         if recall_name=='fast_flow_recall':
             fast_flow_set.add(vId)
+            all_flow_set.add(vId)
         if recall_name=='flow_recall':
             flow_flow_set.add(vId)
+            all_flow_set.add(vId)
         #duplicate divide into
         if vId not in recall_dict:
             if recall_name == config_.PUSH_FROM['rov_recall_region_h']:
@@ -438,19 +454,21 @@ def new_video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
         else:
             recall_name = recall_dict[vId] + "," + recall_name
             recall_dict[vId] = recall_name
-    all_flow_set.add(fast_flow_set)
-    all_flow_set.add(flow_flow_set)
+    #all_flow_set = set.union(fast_flow_set, flow_flow_set)
     #3. filter video, 先过预曝光
     filter_ = FilterVideos(request_id=request_id,
-                           app_type=app_type, mid=mid, uid=uid, video_ids=recall_dict.keys())
+                           app_type=app_type, mid=mid, uid=uid, video_ids=list(recall_dict.keys()))
     #a).expose filter
     expose_filterd_videos = filter_.new_filter_video()
+    print("------------------expose_filterd_videos------------------------------")
+    print("expose_filterd_videos:",expose_filterd_videos)
     if expose_filterd_videos is None:
-        return
+        return result
     #b). sep_filter
     normal_video_list, flow_video_list = filter_.new_flow_video(expose_filterd_videos, all_flow_set, region_code, shield_config)
-    if len(normal_video_list) and len(flow_video_list)==0:
-        return
+    print("normal_video_list:", normal_video_list, "flow_video_list:", flow_video_list)
+    if len(normal_video_list)==0 and len(flow_video_list)==0:
+        return result
     #4. sort: old sort: flow 按概率出
     start_rank = time.time()
     #quick_flow_pool_P get from redis
@@ -475,38 +493,6 @@ def new_video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
 
     result['rankResult'] = rank_result
     result['rankTime'] = (time.time() - start_rank) * 1000
-
-    # if not rank_result:
-    #     # 兜底策略
-    #     # log_.info('====== bottom strategy')
-    #     start_bottom = time.time()
-    #     rank_result = bottom_strategy2(
-    #         size=size, app_type=app_type, mid=mid, uid=uid, ab_code=ab_code, client_info=client_info, params=params
-    #     )
-    #
-    #     # if ab_code == config_.AB_CODE['region_rank_by_h'].get('abtest_130'):
-    #     #     rank_result = bottom_strategy2(
-    #     #         size=size, app_type=app_type, mid=mid, uid=uid, ab_code=ab_code, client_info=client_info, params=params
-    #     #     )
-    #     # else:
-    #     #     rank_result = bottom_strategy(
-    #     #         request_id=request_id, size=size, app_type=app_type, ab_code=ab_code, params=params
-    #     #     )
-    #
-    #     # log_.info({
-    #     #     'logTimestamp': int(time.time() * 1000),
-    #     #     'request_id': request_id,
-    #     #     'mid': mid,
-    #     #     'uid': uid,
-    #     #     'operation': 'bottom',
-    #     #     'bottom_result': rank_result,
-    #     #     'executeTime': (time.time() - start_bottom) * 1000
-    #     # })
-    #     result['bottomResult'] = rank_result
-    #     result['bottomTime'] = (time.time() - start_bottom) * 1000
-    #
-    # result['rankResult'] = rank_result
-
     return result
     # return rank_result, last_rov_recall_key
 
@@ -1322,7 +1308,9 @@ def video_homepage_recommend(request_id, mid, uid, size, app_type, algo_type,
 
     # 简单召回 - 排序 - 兜底
     get_result_st = time.time()
+    print("ab_code:", ab_code)
     if ab_code == "60047":
+        print("ab_code:", ab_code)
         result = new_video_recommend(request_id=request_id,
                              mid=mid, uid=uid, app_type=app_type,
                              size=size, top_K=top_K, flow_pool_P=flow_pool_P,
@@ -1424,7 +1412,9 @@ def video_relevant_recommend(request_id, video_id, mid, uid, size, app_type, ab_
 
     # 简单召回 - 排序 - 兜底
     get_result_st = time.time()
-    if ab_code == "60047":
+    print("ab_code:", ab_code)
+    if ab_code == 60047:
+        #print("new_video_recommend:", new_video_recommend)
         result = new_video_recommend(request_id=request_id,
                                  mid=mid, uid=uid, app_type=app_type,
                                  size=size, top_K=top_K, flow_pool_P=flow_pool_P,

+ 10 - 1
utils.py

@@ -377,7 +377,9 @@ class FilterVideos(object):
         redis_helper = RedisHelper()
         # key拼接
         key_name = f"{config_.PREVIEW_KEY_PREFIX}{self.app_type}:{self.mid}"
+        print("key_name:", key_name)
         pe_videos_list = redis_helper.get_data_from_set(key_name)
+        print("pe_videos_list:", pe_videos_list)
         # log_.info('****app_type = {}, mid = {}, uid = {}, pe_videos_list = {}'.format(
         #     self.app_type, self.mid, self.uid, pe_videos_list))
         # log_.info('****app_type = {}, mid = {}, uid = {}, video_ids = {}'.format(
@@ -385,6 +387,7 @@ class FilterVideos(object):
         if not pe_videos_list:
             return video_ids
         pe_videos = [int(video) for video in pe_videos_list]
+        print("pe_videos:", pe_videos)
         filtered_videos = [video_id for video_id in video_ids if video_id not in pe_videos]
         return filtered_videos
 
@@ -439,10 +442,13 @@ class FilterVideos(object):
                         "uid": self.uid,
                         "types": list(types),
                         "videoIds": video_ids}
+        print(request_data)
         # 调用http接口
         result = request_post(request_url=config_.VIDEO_FILTER_URL, request_data=request_data, timeout=(0.1, 1))
 
+        print("result:", result)
         if result is None:
+            print("result is None")
             # log_.info('过滤失败,types: {}'.format(types))
             return []
 
@@ -480,6 +486,7 @@ class FilterVideos(object):
         """视频过滤"""
         # 1. 预曝光过滤
         st_pre = time.time()
+        #print("new_filter video_ids:", self.video_ids)
         filtered_pre_result = self.filter_video_previewed(self.video_ids)
         if not filtered_pre_result:
             return None
@@ -496,6 +503,8 @@ class FilterVideos(object):
          })
         #2. 视频已曝光过滤
         st_viewed = time.time()
+        print("---filtered viewed---")
+        print("filtered_pre_result:",filtered_pre_result)
         filtered_viewed_result = self.filter_video_viewed(video_ids=filtered_pre_result)
         if not filtered_viewed_result:
             return None
@@ -505,7 +514,7 @@ class FilterVideos(object):
         flow_video_list = []
         normal_video_list = []
         for v_id in vid_list:
-            if vid_list in flow_vids_set:
+            if v_id in flow_vids_set:
                 flow_video_list.append(v_id)
             else:
                 normal_video_list.append(v_id)

+ 4 - 2
video_rank.py

@@ -235,9 +235,11 @@ def refactor_video_rank(rov_recall_rank, fast_flow_set, flow_set, size, top_K, f
     fast_flow_recall_rank = []
     flow_recall_rank = []
     for item in rov_recall_rank:
-        if item[0] in fast_flow_set:
+        vid = item.get('videoId', 0)
+        #print(item)
+        if vid in fast_flow_set:
             fast_flow_recall_rank.append(item)
-        elif item[0] in flow_set:
+        elif vid in flow_set:
             flow_recall_rank.append(item)
     # all flow result
     all_flow_recall_rank = fast_flow_recall_rank + flow_recall_rank

+ 13 - 12
video_recall.py

@@ -2125,9 +2125,9 @@ class PoolRecall(object):
         return recall_result
 
     # get region_hour_recall
-    def get_region_hour_recall(self, size=4, region_code='',):
+    def get_region_hour_recall(self, size=4, region_code='-1'):
         pool_key_prefix = config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H
-        recall_key = f"{pool_key_prefix}:{region_code}:{self.data_key}:{self.rule_key}"
+        recall_key = f"{pool_key_prefix}{region_code}:{self.data_key}:{self.rule_key}"
         print("recall_key:", recall_key)
         data = self.redis_helper.get_data_from_redis(key_name=recall_key)
         print(data)
@@ -2144,13 +2144,13 @@ class PoolRecall(object):
         return recall_result
 
     # get region_day_recall
-    def get_region_day_recall(self, size=4,region_code=''):
+    def get_region_day_recall(self, size=4,region_code='-1'):
         """召回池召回视频"""
         pool_key_prefix = config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H
-        recall_key = f"{pool_key_prefix}:{region_code}:{self.data_key}:{self.rule_key}"
+        recall_key = f"{pool_key_prefix}{region_code}:{self.data_key}:{self.rule_key}"
         print("recall_key:", recall_key)
         data = self.redis_helper.get_data_from_redis(key_name=recall_key)
-        print(data)
+        #print(data)
         recall_result = []
         if data is not None:
             json_result = json.loads(data)
@@ -2164,36 +2164,37 @@ class PoolRecall(object):
         return recall_result
 
 
-    def get_selected_recall(self, size=4, region_code=''):
+    def get_selected_recall(self, size=4, region_code='-1'):
         """召回池召回视频"""
         pool_key_prefix = config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H
-        recall_key = f"{pool_key_prefix}:{self.data_key}:{self.rule_key}"
+        recall_key = f"{pool_key_prefix}{region_code}:{self.data_key}:{self.rule_key}"
         print("recall_key:", recall_key)
         data = self.redis_helper.get_data_from_redis(key_name=recall_key)
-        print(data)
+        #print(data)
         recall_result = []
         if data is not None:
             json_result = json.loads(data)
-            print("json_result:", json_result)
+            #print("json_result:", json_result)
             for per_item in json_result:
                 recall_result.append(
                     {'videoId': per_item[0], 'flowPool': '',
                      'rovScore': per_item[1], 'pushFrom': config_.PUSH_FROM['rov_recall_24h'],
                      'abCode': self.ab_code}
                 )
+        #print("recall_result:", recall_result)
         return recall_result
 
-    def get_no_selected_recall(self, size=4, region_code=''):
+    def get_no_selected_recall(self, size=4, region_code='-1'):
         """未选择召回池召回视频"""
         pool_key_prefix = config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H
-        recall_key = f"{pool_key_prefix}:{self.data_key}:{self.rule_key}"
+        recall_key = f"{pool_key_prefix}{region_code}:{self.data_key}:{self.rule_key}"
         print("recall_key:", recall_key)
         data = self.redis_helper.get_data_from_redis(key_name=recall_key)
         print(data)
         recall_result = []
         if data is not None:
             json_result = json.loads(data)
-            print("json_result:", json_result)
+            #print("json_result:", json_result)
             for per_item in json_result:
                 recall_result.append(
                     {'videoId': per_item[0], 'flowPool': '',