浏览代码

Merge branch 'master_20230524' into test

linfan 1 年之前
父节点
当前提交
66a94976d5
共有 4 个文件被更改,包括 118 次插入62 次删除
  1. 6 1
      config.py
  2. 58 44
      recommend.py
  3. 7 4
      video_rank.py
  4. 47 13
      video_recall.py

+ 6 - 1
config.py

@@ -153,6 +153,7 @@ class BaseConfig(object):
             'abtest_328': 60054,
             'abtest_332': 60055,
             'abtest_333': 60056,
+            'abtest_334': 60057,
         },  # 地域分组小时级规则实验
 
         'rank_by_24h': {
@@ -364,6 +365,10 @@ class BaseConfig(object):
         '333': {
             'data_key': 'data10', 'rule_key': 'rule7',
             'ab_code': AB_CODE['region_rank_by_h'].get('abtest_333')
+        },
+        '334': {
+            'data_key': 'data10', 'rule_key': 'rule7',
+            'ab_code': AB_CODE['region_rank_by_h'].get('abtest_334')
         }
     }
 
@@ -443,7 +448,7 @@ class BaseConfig(object):
         'fast_flow_recall': 'fast_flow_recall', #快速流量池召回
         'normal_flow_recall': 'normal_flow_recall',  # 普通流量池召回
         'hot_3_day_recall': 'hot_3_day_recall',  # 3天召回
-        'hot_7_day_recall': 'hot_7_day_recall',  # 3天召回
+        'hot_recall': 'hot_recall',  # hot召回
     }
 
     # category id mapping

+ 58 - 44
recommend.py

@@ -364,6 +364,8 @@ def video_old_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
     # ####### 多进程召回
     start_recall = time.time()
     # log_.info('====== recall')
+
+    #print("abcode",ab_code)
     recall_result_list = []
     pool_recall = PoolRecall(request_id=request_id,
                              app_type=app_type, mid=mid, uid=uid, ab_code=ab_code,
@@ -371,67 +373,70 @@ def video_old_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
                              params=params, rule_key_30day=rule_key_30day, shield_config=shield_config, video_id= video_id)
 
     if app_type in [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]:
-        t = [gevent.spawn(pool_recall.rov_pool_recall_with_region, size, expire_time),
-             pool_recall.get_sim_hot_item_reall_filter]
+        t = [gevent.spawn(pool_recall.rov_pool_recall_with_region, size, expire_time)]
+        if ab_code ==60054:
+            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter))
         if ab_code == 60055:
             t.append(gevent.spawn(pool_recall.get_3days_hot_item_reall))
         if ab_code == 60056:
-            t.append(gevent.spawn(pool_recall.get_7days_hot_item_reall))
+            t.append(gevent.spawn(pool_recall.get_hot_item_reall))
     else:
         t = [gevent.spawn(pool_recall.rov_pool_recall_with_region, size, expire_time),
              gevent.spawn(pool_recall.flow_pool_recall, size, config_.QUICK_FLOW_POOL_ID),
-             gevent.spawn(pool_recall.flow_pool_recall, size),
-             gevent.spawn(pool_recall.get_sim_hot_item_reall_filter)]
+             gevent.spawn(pool_recall.flow_pool_recall, size)]
+        if  ab_code == 60054:
+            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter))
         if ab_code == 60055:
             t.append(gevent.spawn(pool_recall.get_3days_hot_item_reall))
         if ab_code == 60056:
-            t.append(gevent.spawn(pool_recall.get_7days_hot_item_reall))
+            t.append(gevent.spawn(pool_recall.get_hot_item_reall))
 
     gevent.joinall(t)
     recall_result_list = [i.get() for i in t]
 
-    #print(recall_result_list)
+    #print("recall:",recall_result_list)
     if len(recall_result_list)<0:
         result['recallResult']= []
         result['rankResult'] = []
         return result
-    #merge simrecall
-    rov_pool_recall = []
-    if len(recall_result_list) >= 2:
-        region_recall = recall_result_list[0]
-        sim_recall = []
-        if app_type in [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]:
-            sim_recall = recall_result_list[1]
-        else:
-            if len(recall_result_list)>=4:
-                sim_recall = recall_result_list[3]
-        now_video_ids = set('')
-        if len(region_recall) > 0:
-            for video in region_recall:
-                video_id = video.get('videoId')
-                if video_id not in now_video_ids:
-                    rov_pool_recall.append(video)
-                    now_video_ids.add(video_id)
-        if len(sim_recall) > 0:
-            for video in sim_recall:
-                video_id = video.get('videoId')
-                # print("sim video_id:", video_id)
-                if video_id not in now_video_ids:
-                    rov_pool_recall.append(video)
-                    now_video_ids.add(video_id)
-        if len(rov_pool_recall) > 0:
-            recall_result_list[0] = rov_pool_recall
+    #1. merge simrecall
+    if ab_code == 60054:
+        rov_pool_recall = []
+        if len(recall_result_list) >= 2:
+            region_recall = recall_result_list[0]
+            sim_recall = []
+            if app_type in [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]:
+                sim_recall = recall_result_list[1]
+            else:
+                if len(recall_result_list)>=4:
+                    sim_recall = recall_result_list[3]
+            now_video_ids = set('')
+            if len(region_recall) > 0:
+                for video in region_recall:
+                    video_id = video.get('videoId')
+                    if video_id not in now_video_ids:
+                        rov_pool_recall.append(video)
+                        now_video_ids.add(video_id)
+            if len(sim_recall) > 0:
+                for video in sim_recall:
+                    video_id = video.get('videoId')
+                    # print("sim video_id:", video_id)
+                    if video_id not in now_video_ids:
+                        rov_pool_recall.append(video)
+                        now_video_ids.add(video_id)
+            if len(rov_pool_recall) > 0:
+                recall_result_list[0] = rov_pool_recall
     # merge hot 3day recall
     if ab_code == 60055:
         rov_pool_recall = []
-        if len(recall_result_list)>=3:
+        if len(recall_result_list)>=2:
             region_recall = recall_result_list[0]
             hot_3_day_recall = []
             if app_type in [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]:
                 hot_3_day_recall = recall_result_list[1]
             else:
-                if len(recall_result_list)>=5:
-                    hot_3_day_recall = recall_result_list[4]
+                if len(recall_result_list)>=4:
+                    hot_3_day_recall = recall_result_list[3]
             #print("sim:",sim_recall)
             now_video_ids = set('')
             if len(region_recall)>0:
@@ -449,17 +454,17 @@ def video_old_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
                         now_video_ids.add(video_id)
             if len(rov_pool_recall)>0:
                 recall_result_list[0] = rov_pool_recall
-    # merge hot 7day recall
+    # merge old day recall
     if ab_code == 60056:
         rov_pool_recall = []
-        if len(recall_result_list)>=4:
+        if len(recall_result_list)>=2:
             region_recall = recall_result_list[0]
             hot_7_day_recall = []
             if app_type in [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]:
                 hot_7_day_recall = recall_result_list[1]
             else:
-                if len(recall_result_list) >= 5:
-                    hot_7_day_recall = recall_result_list[4]
+                if len(recall_result_list) >= 3:
+                    hot_7_day_recall = recall_result_list[3]
             now_video_ids = set('')
             if len(region_recall)>0:
                 for video in region_recall:
@@ -1523,6 +1528,7 @@ def video_homepage_recommend(request_id, mid, uid, size, app_type, algo_type,
     # 简单召回 - 排序 - 兜底
     get_result_st = time.time()
     #print("ab_code:", ab_code)
+    #new pipeline
     if ab_code == 60047 or ab_code == 60048 or ab_code == 60049:
         result = new_video_recommend(request_id=request_id,
                              mid=mid, uid=uid, app_type=app_type,
@@ -1532,6 +1538,7 @@ def video_homepage_recommend(request_id, mid, uid, size, app_type, algo_type,
                              rule_key=rule_key, data_key=data_key,
                              no_op_flag=no_op_flag, old_video_index=old_video_index,
                              params=params, rule_key_30day=rule_key_30day, shield_config=shield_config)
+    # old base (test4, test5)
     elif ab_code == 60050 or ab_code == 60051:
         result = video_recommend(request_id=request_id,
                                  mid=mid, uid=uid, app_type=app_type,
@@ -1541,7 +1548,11 @@ def video_homepage_recommend(request_id, mid, uid, size, app_type, algo_type,
                                  rule_key=rule_key, data_key=data_key,
                                  no_op_flag=no_op_flag, old_video_index=old_video_index,
                                  params=params, rule_key_30day=rule_key_30day, shield_config=shield_config)
-    elif ab_code == 60052 or ab_code == 60053 or ab_code == 60054 or ab_code == 60055 or ab_code == 60056:
+    # 60052,60053,60057(test7, test8, test6)
+    # simrecal: 60054
+    # 3 days , 7days recall: 60005, test1
+    # old video: 60056, test2
+    elif ab_code == 60052 or ab_code == 60053 or ab_code == 60054 or ab_code == 60055 or ab_code == 60056 or ab_code==60057:
         result = video_old_recommend(request_id=request_id,
                                      mid=mid, uid=uid, app_type=app_type,
                                      size=size, top_K=top_K, flow_pool_P=flow_pool_P,
@@ -1583,7 +1594,8 @@ def video_homepage_recommend(request_id, mid, uid, size, app_type, algo_type,
         update_flow_redis_data(result=rank_result, app_type=app_type, mid=mid, top_K=top_K)
     elif ab_code == 60050 or  ab_code == 60051:
         update_redis_data(result=rank_result, app_type=app_type, mid=mid, top_K=top_K)
-    elif ab_code == 60052 or ab_code == 60053 or ab_code == 60054 or ab_code == 60055 or ab_code == 60056:
+    elif ab_code == 60052 or ab_code == 60053 or ab_code == 60054 or ab_code == 60055 \
+            or ab_code == 60056 or ab_code==60057:
         update_redis_data(result=rank_result, app_type=app_type, mid=mid, top_K=top_K)
     else:
         update_redis_data(result=rank_result, app_type=app_type, mid=mid, top_K=top_K)
@@ -1667,7 +1679,8 @@ def video_relevant_recommend(request_id, video_id, mid, uid, size, app_type, ab_
                                  rule_key=rule_key, data_key=data_key, no_op_flag=no_op_flag,
                                  old_video_index=old_video_index, video_id=video_id,
                                  params=params, rule_key_30day=rule_key_30day, shield_config=shield_config)
-    elif ab_code == 60052 or ab_code == 60053 or ab_code == 60054 or ab_code == 60055 or ab_code == 60056:
+    elif ab_code == 60052 or ab_code == 60053 or ab_code == 60054 or ab_code == 60055 or \
+            ab_code == 60056 or ab_code==60057:
         result = video_old_recommend(request_id=request_id,
                                  mid=mid, uid=uid, app_type=app_type,
                                  size=size, top_K=top_K, flow_pool_P=flow_pool_P,
@@ -1710,7 +1723,8 @@ def video_relevant_recommend(request_id, video_id, mid, uid, size, app_type, ab_
          update_flow_redis_data(result=rank_result, app_type=app_type, mid=mid, top_K=top_K)
     elif ab_code == 60050 or ab_code == 60051:
          update_redis_data(result=rank_result, app_type=app_type, mid=mid, top_K=top_K)
-    elif ab_code == 60052 or ab_code == 60053 or ab_code == 60054 or ab_code == 60055 or ab_code == 60056:
+    elif ab_code == 60052 or ab_code == 60053 or ab_code == 60054 or ab_code == 60055 or \
+            ab_code == 60056 or ab_code==60057:
          update_redis_data(result=rank_result, app_type=app_type, mid=mid, top_K=top_K)
     else:
          update_redis_data(result=rank_result, app_type=app_type, mid=mid, top_K=top_K)

+ 7 - 4
video_rank.py

@@ -608,12 +608,14 @@ def video_new_rank2(data, size, top_K, flow_pool_P, ab_code):
     vidKeys = []
     recall_list = []
     pre_str = "k_p2:"
-    if ab_code == 60052 or ab_code == 60055:
+    if ab_code == 60052 or ab_code == 60053  or ab_code==60057:
+        pre_str = "k_p2:"
+    elif ab_code == 60054:
         pre_str = "k_p3:"
-    elif ab_code == 60053 or ab_code == 60056:
+    elif ab_code == 60055:
         pre_str = "k_p4:"
-    elif ab_code ==60054:
-        pre_str = "k_p2:"
+    elif ab_code == 60056:
+        pre_str = "k_p5:"
     #print("pre_str:", pre_str)
     for recall_item in data['rov_pool_recall']:
         if len(recall_item)<=0:
@@ -637,6 +639,7 @@ def video_new_rank2(data, size, top_K, flow_pool_P, ab_code):
                 recall_list[i]['sort_score'] = 0.0
     #sort_items = sorted(video_items, key=lambda k: k[1], reverse=True)
     rov_recall_rank =sorted(recall_list, key=lambda k: k.get('sort_score', 0), reverse=True)
+    #print(rov_recall_rank)
     flow_recall_rank = sorted(data['flow_pool_recall'], key=lambda k: k.get('rovScore', 0), reverse=True)
     rov_recall_rank, flow_recall_rank = remove_duplicate(rov_recall=rov_recall_rank, flow_recall=flow_recall_rank,
                                                      top_K=top_K)

+ 47 - 13
video_recall.py

@@ -2136,10 +2136,10 @@ class PoolRecall(object):
         if self.video_id is None:
             return  []
         recall_key = "sim_hot_" + str(self.video_id)
-        # print("recall_key:", recall_key)
+        print("recall_key:", recall_key)
         data = self.redis_helper.get_data_from_redis(key_name=recall_key)
 
-        # print(data)
+        print(data)
         recall_result = []
         recall_dict  = {}
         video_ids = []
@@ -2164,11 +2164,11 @@ class PoolRecall(object):
         filtered_viewed_videos = filter_.filter_videos(pool_type='rov')
         if filtered_viewed_videos is None:
             return recall_result
-        #print("filtered_viewed_videos:", filtered_viewed_videos)
+        print("filtered_viewed_videos:", filtered_viewed_videos)
         for vid in filtered_viewed_videos:
             if vid in recall_dict:
                 recall_result.append(recall_dict[vid])
-        return recall_result
+        return recall_result[:30]
     # get region_hour_recall
     def get_region_hour_recall(self, size=4, region_code='-1'):
         pool_key_prefix = config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H
@@ -2186,7 +2186,7 @@ class PoolRecall(object):
                      'rovScore': per_item[1], 'pushFrom': config_.PUSH_FROM['rov_recall_region_h'],
                      'abCode': self.ab_code}
                 )
-        return recall_result[:200]
+        return recall_result[:30]
 
     # get region_day_recall
     def get_region_day_recall(self, size=4,region_code='-1'):
@@ -2373,8 +2373,6 @@ class PoolRecall(object):
         return flow_pool_recall_result[:size]
 
     def get_3days_hot_item_reall(self):
-        if self.video_id is None:
-            return  []
         recall_key = "hot_3day:"
         #print("recall_key:", recall_key)
         data = self.redis_helper.get_data_from_redis(key_name=recall_key)
@@ -2385,7 +2383,7 @@ class PoolRecall(object):
         video_ids = []
         if data is not None:
             json_result = json.loads(data)
-            print("json_result:", json_result)
+            #print("json_result:", json_result)
             for per_item in json_result:
                 try:
                     vid = int(per_item[0])
@@ -2413,10 +2411,8 @@ class PoolRecall(object):
         #print("recall_dict:", recall_dict)
         return recall_result
 
-    def get_7days_hot_item_reall(self):
-        if self.video_id is None:
-            return  []
-        recall_key = "hot_7day:"
+    def get_hot_item_reall(self):
+        recall_key = "hot_video:"
         #print("recall_key:", recall_key)
         data = self.redis_helper.get_data_from_redis(key_name=recall_key)
 
@@ -2432,7 +2428,7 @@ class PoolRecall(object):
                     vid = int(per_item[0])
                     video_ids.append(vid)
                     recall_dict[vid] = {'videoId': vid, 'flowPool': '',
-                         'rovScore': per_item[1], 'pushFrom': config_.PUSH_FROM['hot_7_day_recall'],
+                         'rovScore': per_item[1], 'pushFrom': config_.PUSH_FROM['hot_recall'],
                          'abCode': self.ab_code}
                 except Exception as e:
                     continue
@@ -2450,4 +2446,42 @@ class PoolRecall(object):
             if vid in recall_dict:
                 recall_result.append(recall_dict[vid])
         #print("hot recall_result:", recall_result)
+        return recall_result
+
+    def get_title_recall(self):
+        if self.video_id is None:
+            return []
+        recall_key = "title_I2I:" + str(self.video_id)
+        # print("recall_key:", recall_key)
+        data = self.redis_helper.get_data_from_redis(key_name=recall_key)
+
+        # print(data)
+        recall_result = []
+        recall_dict = {}
+        video_ids = []
+        if data is not None:
+            json_result = json.loads(data)
+            # print("json_result:", json_result)
+            for per_item in json_result:
+                try:
+                    vid = int(per_item[0])
+                    video_ids.append(vid)
+                    recall_dict[vid] = {'videoId': vid, 'flowPool': '',
+                                        'rovScore': per_item[1], 'pushFrom': config_.PUSH_FROM['title_i2i_recall'],
+                                        'abCode': self.ab_code}
+                except Exception as e:
+                    continue
+        if len(video_ids) <= 0:
+            return recall_result
+        video_ids = video_ids[:50]
+        # print(video_ids)
+        filter_ = FilterVideos(request_id=self.request_id,
+                               app_type=self.app_type, mid=self.mid, uid=self.uid, video_ids=video_ids)
+        filtered_viewed_videos = filter_.filter_videos(pool_type='rov')
+        if filtered_viewed_videos is None:
+            return recall_result
+        # print("filtered_viewed_videos:", filtered_viewed_videos)
+        for vid in filtered_viewed_videos:
+            if vid in recall_dict:
+                recall_result.append(recall_dict[vid])
         return recall_result