Browse Source

update recommend

linfan 1 year ago
parent
commit
4a37d858bc
2 changed files with 40 additions and 80 deletions
  1. 2 20
      recommend.py
  2. 38 60
      video_rank.py

+ 2 - 20
recommend.py

@@ -420,26 +420,8 @@ def video_old_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
             }
     #if ab_code=="ab_new_test":
     print("data:", data)
-    #rank_result = video_new_rank2(data=data, size=size, top_K=top_K, flow_pool_P=float(flow_pool_P))
-    # else:
-    #     rank_result = video_rank(data=data, size=size, top_K=top_K, flow_pool_P=float(flow_pool_P))
-
-
-    # 老视频实验
-    # if ab_code in [config_.AB_CODE['old_video']]:
-    #     rank_result = video_rank_with_old_video(rank_result=rank_result, old_video_recall=recall_result_list[2],
-    #                                             size=size, top_K=top_K, old_video_index=old_video_index)
-
-    # end_rank = time.time()
-    # log_.info({
-    #     'logTimestamp': int(time.time() * 1000),
-    #     'request_id': request_id,
-    #     'mid': mid,
-    #     'uid': uid,
-    #     'operation': 'rank',
-    #     'rank_result': rank_result,
-    #     'executeTime': (time.time() - start_rank) * 1000
-    # })
+    rank_result = video_new_rank2(data=data, size=size, top_K=top_K, flow_pool_P=float(flow_pool_P))
+    print(rank_result)
 
     result['rankResult'] = rank_result
     result['rankTime'] = (time.time() - start_rank) * 1000

+ 38 - 60
video_rank.py

@@ -592,7 +592,7 @@ def video_rank_with_old_video(rank_result, old_video_recall, size, top_K, old_vi
     return new_rank_result[:size]
 
 
-def video_new_rank2(data, fast_flow_set, flow_set, size, top_K, flow_pool_P):
+def video_new_rank2(data, size, top_K, flow_pool_P):
     """
         视频分发排序
         :param data: 各路召回的视频 type-dict {'rov_pool_recall': [], 'flow_pool_recall': []}
@@ -601,41 +601,37 @@ def video_new_rank2(data, fast_flow_set, flow_set, size, top_K, flow_pool_P):
         :param flow_pool_P: size-top_K视频为流量池视频的概率 type-float
         :return: rank_result
         """
-    add_flow_set = set('')
-    if not data or len(data) == 0:
-        return [], add_flow_set
+    if not data['rov_pool_recall'] and not data['flow_pool_recall']:
+        return []
 
     redisObj = RedisHelper()
     vidKeys = []
-    for vid in videoIds:
-        vidKeys.append("k_p:" + str(vid))
+    recall_list = []
+    for recalls in data['rov_pool_recall']:
+        if len(recalls)<=0:
+            continue
+        for recall_item in recalls:
+            vid = recall_item.get("videoId",0)
+            vidKeys.append("k_p:" + str(vid))
+            recall_list.append(recall_item)
     # print("vidKeys:", vidKeys)
     video_scores = redisObj.get_batch_key(vidKeys)
     # print(video_scores)
-    video_items = []
     for i in range(len(video_scores)):
-        try:
+         try:
             # print(video_scores[i])
             if video_scores[i] is None:
-                video_items.append((videoIds[i], 0.0))
+                recall_list[i]['sort_score']= 0.0
             else:
                 video_score_str = json.loads(video_scores[i])
-                # print("video_score_str:",video_score_str)
-                video_items.append((videoIds[i], video_score_str[0]))
+                recall_list[i]['sort_score'] = video_score_str
         except Exception:
-            video_items.append((videoIds[i], 0.0))
-    sort_items = sorted(video_items, key=lambda k: k[1], reverse=True)
-    # print("sort_items:", sort_items)
-    rov_recall_rank = sort_items
-    fast_flow_recall_rank = []
-    flow_recall_rank = []
-    for item in sort_items:
-        if item[0] in fast_flow_set:
-            fast_flow_recall_rank.append(item)
-        elif item[0] in flow_set:
-            flow_recall_rank.append(item)
-    # all flow result
-    all_flow_recall_rank = fast_flow_recall_rank + flow_recall_rank
+                recall_list[i]['sort_score'] = 0.0
+    #sort_items = sorted(video_items, key=lambda k: k[1], reverse=True)
+    rov_recall_rank =sorted(recall_list, key=lambda k: k.get('sort_score', 0), reverse=True)
+    flow_recall_rank = sorted(data['flow_pool_recall'], key=lambda k: k.get('rovScore', 0), reverse=True)
+    rov_recall_rank, flow_recall_rank = remove_duplicate(rov_recall=rov_recall_rank, flow_recall=flow_recall_rank,
+                                                     top_K=top_K)
     rank_result = []
     rank_set = set('')
 
@@ -644,48 +640,30 @@ def video_new_rank2(data, fast_flow_set, flow_set, size, top_K, flow_pool_P):
         rank_result.extend(rov_recall_rank[:top_K])
         rov_recall_rank = rov_recall_rank[top_K:]
     else:
-        rank_result.extend(all_flow_recall_rank[:top_K])
-        all_flow_recall_rank = all_flow_recall_rank[top_K:]
-
-    for rank_item in rank_result:
-        rank_set.add(rank_item[0])
-    # print("rank_result:", rank_result)
-    # 按概率 p 及score排序获取 size - k 个视频, 第4个位置按概率取流量池
+        rank_result.extend(flow_recall_rank[:top_K])
+        flow_recall_rank = flow_recall_rank[top_K:]
+        # 按概率 p 及score排序获取 size - k 个视频
     i = 0
-    left_quato = size - top_K
-    j = 0
-    jj = 0
-    while i < left_quato and (j < len(all_flow_recall_rank) or jj < len(rov_recall_rank)):
+    while i < size - top_K:
         # 随机生成[0, 1)浮点数
         rand = random.random()
         # log_.info('rand: {}'.format(rand))
         if rand < flow_pool_P:
-            for flow_item in all_flow_recall_rank:
-                j += 1
-                if flow_item[0] in rank_set:
-                    continue
-                else:
-                    rank_result.append(flow_item)
-                    rank_set.add(flow_item[0])
-                    add_flow_set.add(flow_item[0])
-                i += 1
-                if i >= left_quato:
-                    break
-
+            if flow_recall_rank:
+                rank_result.append(flow_recall_rank[0])
+                flow_recall_rank.remove(flow_recall_rank[0])
+            else:
+                rank_result.extend(rov_recall_rank[:size - top_K - i])
+                return rank_result[:size]
         else:
-            for recall_item in rov_recall_rank:
-                jj += 1
-                if recall_item[0] in rank_set:
-                    continue
-                else:
-                    rank_result.append(recall_item)
-                    rank_set.add(recall_item[0])
-                i += 1
-                if i >= left_quato:
-                    break
-    # print("rank_result:", rank_result)
-    # print("add_flow_set:", add_flow_set)
-    return rank_result[:size], add_flow_set
+            if rov_recall_rank:
+                rank_result.append(rov_recall_rank[0])
+                rov_recall_rank.remove(rov_recall_rank[0])
+            else:
+                rank_result.extend(flow_recall_rank[:size - top_K - i])
+                return rank_result[:size]
+        i += 1
+    return rank_result[:size]
 
 
 if __name__ == '__main__':