liqian 3 years ago
parent
commit
358f1da0e8
1 changed files with 83 additions and 0 deletions
  1. 83 0
      video_rank.py

+ 83 - 0
video_rank.py

@@ -0,0 +1,83 @@
+import random
+
+from log import Log
+from config import set_config
+
+log_ = Log()
+config_ = set_config()
+
+
+def video_rank(data, size):
+    """
+    视频分发排序
+    :param data: 各路召回的视频 type-dict {'rov_pool_recall': [], 'flow_pool_recall': []}
+    :param size: 请求数
+    :return: rank_result
+    """
+    # 将各路召回的视频按照score从大到小排序
+    # ROV召回池
+    rov_recall_rank = sorted(data['rov_pool_recall'], key=lambda k: (k.get('rovScore'), 0), reverse=True)
+    # 流量池
+    flow_recall_rank = sorted(data['flow_pool_recall'], key=lambda k: (k.get('rovScore'), 0), reverse=True)
+    # 对各路召回的视频进行去重
+    rov_recall_rank, flow_recall_rank = remove_duplicate(rov_recall=rov_recall_rank, flow_recall=flow_recall_rank)
+    # 从ROV召回池中获取top k
+    if len(rov_recall_rank) > 0:
+        rank_result = rov_recall_rank[:config_.K]
+        rov_recall_rank = rov_recall_rank[config_.K:]
+    elif len(flow_recall_rank) > 0:
+        rank_result = flow_recall_rank[:config_.K]
+        flow_recall_rank = flow_recall_rank[config_.K:]
+    else:
+        # 兜底策略
+        return None
+    if not rov_recall_rank and not flow_recall_rank:
+        # 兜底策略
+        return None
+    # 按概率 p 及score排序获取 size - k 个视频
+    i = 0
+    while i < size - config_.K:
+        # 随机生成[0, 1)浮点数
+        rand = random.random()
+        if rand < config_.P:
+            if flow_recall_rank:
+                rank_result.append(flow_recall_rank[0])
+                flow_recall_rank.remove(flow_recall_rank[0])
+            else:
+                rank_result.append(rov_recall_rank[:size - config_.K - i])
+                return rank_result
+        else:
+            if rov_recall_rank:
+                rank_result.append(rov_recall_rank[0])
+                rov_recall_rank.remove(rov_recall_rank[0])
+            else:
+                rank_result.append(flow_recall_rank[:size - config_.K - i])
+                return rank_result
+        i += 1
+    return rank_result
+
+
+def remove_duplicate(rov_recall, flow_recall):
+    """
+    对多路召回的视频去重
+    去重原则:
+        如果视频在ROV召回池topK,则保留ROV召回池,否则保留流量池
+    :param rov_recall: ROV召回池-已排序
+    :param flow_recall: 流量池-已排序
+    :return:
+    """
+    flow_recall_remove = []
+    flow_recall_video_ids = [item[0] for item in flow_recall]
+    # rov_recall topK
+    for item in rov_recall[:config_.K]:
+        if item[0] in flow_recall_video_ids:
+            flow_recall_remove.append(item[0])
+    # other
+    for item in rov_recall[config_.K:]:
+        if item[0] in flow_recall_video_ids:
+            rov_recall.remove(item)
+    # flow recall remove
+    for item in flow_recall:
+        if item[0] in flow_recall_remove:
+            flow_recall.remove(item)
+    return rov_recall, flow_recall