import random from log import Log from config import set_config log_ = Log() config_ = set_config() def video_rank(data, size): """ 视频分发排序 :param data: 各路召回的视频 type-dict {'rov_pool_recall': [], 'flow_pool_recall': []} :param size: 请求数 :return: rank_result """ # 将各路召回的视频按照score从大到小排序 # ROV召回池 rov_recall_rank = sorted(data['rov_pool_recall'], key=lambda k: (k.get('rovScore'), 0), reverse=True) # 流量池 flow_recall_rank = sorted(data['flow_pool_recall'], key=lambda k: (k.get('rovScore'), 0), reverse=True) # 对各路召回的视频进行去重 rov_recall_rank, flow_recall_rank = remove_duplicate(rov_recall=rov_recall_rank, flow_recall=flow_recall_rank) # 从ROV召回池中获取top k if len(rov_recall_rank) > 0: rank_result = rov_recall_rank[:config_.K] rov_recall_rank = rov_recall_rank[config_.K:] elif len(flow_recall_rank) > 0: rank_result = flow_recall_rank[:config_.K] flow_recall_rank = flow_recall_rank[config_.K:] else: # 兜底策略 return None if not rov_recall_rank and not flow_recall_rank: # 兜底策略 return None # 按概率 p 及score排序获取 size - k 个视频 i = 0 while i < size - config_.K: # 随机生成[0, 1)浮点数 rand = random.random() if rand < config_.P: if flow_recall_rank: rank_result.append(flow_recall_rank[0]) flow_recall_rank.remove(flow_recall_rank[0]) else: rank_result.append(rov_recall_rank[:size - config_.K - i]) return rank_result else: if rov_recall_rank: rank_result.append(rov_recall_rank[0]) rov_recall_rank.remove(rov_recall_rank[0]) else: rank_result.append(flow_recall_rank[:size - config_.K - i]) return rank_result i += 1 return rank_result def remove_duplicate(rov_recall, flow_recall): """ 对多路召回的视频去重 去重原则: 如果视频在ROV召回池topK,则保留ROV召回池,否则保留流量池 :param rov_recall: ROV召回池-已排序 :param flow_recall: 流量池-已排序 :return: """ flow_recall_remove = [] flow_recall_video_ids = [item[0] for item in flow_recall] # rov_recall topK for item in rov_recall[:config_.K]: if item[0] in flow_recall_video_ids: flow_recall_remove.append(item[0]) # other for item in rov_recall[config_.K:]: if item[0] in flow_recall_video_ids: rov_recall.remove(item) # flow recall remove for item in flow_recall: if item[0] in flow_recall_remove: flow_recall.remove(item) return rov_recall, flow_recall