|
@@ -719,10 +719,9 @@ def video_sanke_rank(data, size, top_K, flow_pool_P, ab_Code='', exp_config=None
|
|
|
:param flow_pool_P: size-top_K视频为流量池视频的概率 type-float
|
|
|
:return: rank_result
|
|
|
"""
|
|
|
- if not data['rov_pool_recall'] and not data['flow_pool_recall']:
|
|
|
- # and not data['u2i_recall'] and not data['u2i_recall'] \
|
|
|
- # and not data['w2v_recall'] and not data['w2v_recall'] \
|
|
|
- # and not data['sim_recall'] and not data['sim_recall']:
|
|
|
+ if not data['rov_pool_recall'] and not data['flow_pool_recall'] \
|
|
|
+ and len(data['u2i_recall'])==0 and len(data['w2v_recall'])==0 \
|
|
|
+ and len(data['sim_recall']) == 0 and len(data['u2u2i_recall']) == 0 :
|
|
|
return [], 0
|
|
|
# 地域分组小时级规则更新数据
|
|
|
recall_dict = {}
|
|
@@ -749,6 +748,7 @@ def video_sanke_rank(data, size, top_K, flow_pool_P, ab_Code='', exp_config=None
|
|
|
hot_recall = []
|
|
|
w2v_recall =[]
|
|
|
sim_recall = []
|
|
|
+ u2u2i_recall = []
|
|
|
if ab_Code==60058:
|
|
|
if len(data['u2i_recall'])>0:
|
|
|
hot_recall = sorted(data['u2i_recall'], key=lambda k: k.get('rovScore', 0), reverse=True)
|
|
@@ -763,6 +763,11 @@ def video_sanke_rank(data, size, top_K, flow_pool_P, ab_Code='', exp_config=None
|
|
|
recall_dict['sim_recall'] = data['sim_recall']
|
|
|
else:
|
|
|
recall_dict['sim_recall'] = sim_recall
|
|
|
+ elif ab_Code==60062:
|
|
|
+ if len(data['u2u2i_recall'])>0:
|
|
|
+ recall_dict['u2u2i_recall'] = data['u2u2i_recall']
|
|
|
+ else:
|
|
|
+ recall_dict['u2u2i_recall'] = u2u2i_recall
|
|
|
|
|
|
recall_list = [('rov_recall_region_h',1, 1),('rov_recall_region_h',0.5, 1),('rov_recall_region_24h',1,1),
|
|
|
('u2i_recall',0.5,1), ('w2v_recall',0.5,1),('rov_recall_24h',1,1), ('rov_recall_24h_dup',0.5,1)]
|
|
@@ -875,6 +880,191 @@ def video_sanke_rank(data, size, top_K, flow_pool_P, ab_Code='', exp_config=None
|
|
|
i += 1
|
|
|
return rank_result[:size], flow_num
|
|
|
|
|
|
+def video_sank_pos_rank(data, size, top_K, flow_pool_P, ab_Code='', exp_config=None):
|
|
|
+ """
|
|
|
+ 视频分发排序
|
|
|
+ :param data: 各路召回的视频 type-dict {'rov_pool_recall': [], 'flow_pool_recall': []}
|
|
|
+ :param size: 请求数
|
|
|
+ :param top_K: 保证topK为召回池视频 type-int
|
|
|
+ :param flow_pool_P: size-top_K视频为流量池视频的概率 type-float
|
|
|
+ :return: rank_result
|
|
|
+ """
|
|
|
+ if not data['rov_pool_recall'] and not data['flow_pool_recall'] \
|
|
|
+ and len(data['u2i_recall'])==0 and len(data['w2v_recall'])==0 \
|
|
|
+ and len(data['sim_recall']) == 0 and len(data['u2u2i_recall']) == 0 :
|
|
|
+ return [], 0
|
|
|
+ # 地域分组小时级规则更新数据
|
|
|
+ recall_dict = {}
|
|
|
+ region_h_recall = [item for item in data['rov_pool_recall']
|
|
|
+ if item.get('pushFrom') == config_.PUSH_FROM['rov_recall_region_h']]
|
|
|
+ region_h_recall_rank = sorted(region_h_recall, key=lambda k: k.get('rovScore', 0), reverse=True)
|
|
|
+ recall_dict['rov_recall_region_h'] = region_h_recall_rank
|
|
|
+ # 地域分组小时级更新24h规则更新数据
|
|
|
+ region_24h_recall = [item for item in data['rov_pool_recall']
|
|
|
+ if item.get('pushFrom') == config_.PUSH_FROM['rov_recall_region_24h']]
|
|
|
+ region_24h_recall_rank = sorted(region_24h_recall, key=lambda k: k.get('rovScore', 0), reverse=True)
|
|
|
+ recall_dict['rov_recall_region_24h'] = region_24h_recall_rank
|
|
|
+
|
|
|
+ # 相对24h规则更新数据
|
|
|
+ rule_24h_recall = [item for item in data['rov_pool_recall']
|
|
|
+ if item.get('pushFrom') == config_.PUSH_FROM['rov_recall_24h']]
|
|
|
+ rule_24h_recall_rank = sorted(rule_24h_recall, key=lambda k: k.get('rovScore', 0), reverse=True)
|
|
|
+ recall_dict['rov_recall_24h'] = rule_24h_recall_rank
|
|
|
+ # 相对24h规则筛选后剩余更新数据
|
|
|
+ rule_24h_dup_recall = [item for item in data['rov_pool_recall']
|
|
|
+ if item.get('pushFrom') == config_.PUSH_FROM['rov_recall_24h_dup']]
|
|
|
+ rule_24h_dup_recall_rank = sorted(rule_24h_dup_recall, key=lambda k: k.get('rovScore', 0), reverse=True)
|
|
|
+ recall_dict['rov_recall_24h_dup'] = rule_24h_dup_recall_rank
|
|
|
+ u2i_recall = []
|
|
|
+ w2v_recall =[]
|
|
|
+ sim_recall = []
|
|
|
+ u2u2i_recall = []
|
|
|
+ if ab_Code==60058:
|
|
|
+ if len(data['u2i_recall'])>0:
|
|
|
+ u2i_recall = sorted(data['u2i_recall'], key=lambda k: k.get('rovScore', 0), reverse=True)
|
|
|
+ recall_dict['u2i_recall'] = u2i_recall
|
|
|
+ elif ab_Code==60059:
|
|
|
+ if len(data['w2v_recall'])>0:
|
|
|
+ recall_dict['w2v_recall'] = data['w2v_recall']
|
|
|
+ else:
|
|
|
+ recall_dict['w2v_recall'] = w2v_recall
|
|
|
+ elif ab_Code==60061:
|
|
|
+ if len(data['sim_recall'])>0:
|
|
|
+ recall_dict['sim_recall'] = data['sim_recall']
|
|
|
+ else:
|
|
|
+ recall_dict['sim_recall'] = sim_recall
|
|
|
+ elif ab_Code==60062:
|
|
|
+ if len(data['u2u2i_recall'])>0:
|
|
|
+ recall_dict['u2u2i_recall'] = data['u2u2i_recall']
|
|
|
+ else:
|
|
|
+ recall_dict['u2u2i_recall'] = u2u2i_recall
|
|
|
+
|
|
|
+ recall_pos1 = [('rov_recall_region_h',0.98),('rov_recall_24h',0.2),('rov_recall_region_24h',1),
|
|
|
+ ('rov_recall_24h',1),('rov_recall_24h_dup',1)]
|
|
|
+ recall_pos2 = [('rov_recall_region_h',0.98),('rov_recall_24h',0.2),('rov_recall_region_24h',1),
|
|
|
+ ('rov_recall_24h',1),('rov_recall_24h_dup',1)]
|
|
|
+ recall_pos3 = [('rov_recall_region_h', 0.98), ('rov_recall_24h', 0.2), ('rov_recall_region_24h', 1),
|
|
|
+ ('rov_recall_24h', 1), ('rov_recall_24h_dup', 1)]
|
|
|
+ recall_pos4 = [('rov_recall_region_h', 0.98), ('rov_recall_24h', 0.2), ('rov_recall_region_24h', 1),
|
|
|
+ ('rov_recall_24h', 1), ('rov_recall_24h_dup', 1)]
|
|
|
+ if exp_config and exp_config['recall_list']:
|
|
|
+ recall_pos1 = exp_config['recall_pos1']
|
|
|
+ recall_pos2 = exp_config['recall_pos2']
|
|
|
+ recall_pos3 = exp_config['recall_pos3']
|
|
|
+ recall_pos4 = exp_config['recall_pos4']
|
|
|
+ #print("recall_config:", recall_list)
|
|
|
+ rov_recall_rank = []
|
|
|
+ recall_list = []
|
|
|
+ recall_list.append(recall_pos1, recall_pos2, recall_pos3,recall_pos4)
|
|
|
+ select_ids = set('')
|
|
|
+ for j in range(3):
|
|
|
+ if len(rov_recall_rank)>12:
|
|
|
+ break
|
|
|
+ for recall_pos_config in recall_list:
|
|
|
+ rand_num = random.random()
|
|
|
+ for per_recall_item in recall_pos_config:
|
|
|
+ per_recall_name = per_recall_item[0]
|
|
|
+ per_recall_freq = per_recall_item[1]
|
|
|
+ if rand_num < per_recall_freq and per_recall_name in recall_dict
|
|
|
+ per_recall = recall_dict[per_recall_name]
|
|
|
+ for recall_item in per_recall:
|
|
|
+ vid = recall_item['videoId']
|
|
|
+ if vid in select_ids:
|
|
|
+ continue
|
|
|
+ rov_recall_rank.append(recall_item)
|
|
|
+ select_ids.add(vid)
|
|
|
+ break
|
|
|
+
|
|
|
+ if len(rov_recall_rank)<4:
|
|
|
+ rov_doudi_rank = region_h_recall_rank + sim_recall + u2i_recall + u2u2i_recall + w2v_recall + region_24h_recall_rank + rule_24h_recall_rank + rule_24h_dup_recall_rank
|
|
|
+ for recall_item in rov_doudi_rank:
|
|
|
+ vid = recall_item['videoId']
|
|
|
+ if vid in select_ids:
|
|
|
+ continue
|
|
|
+ rov_recall_rank.append(recall_item)
|
|
|
+ select_ids.add(vid)
|
|
|
+ if len(rov_recall_rank)>12:
|
|
|
+ break
|
|
|
+ # print("rov_recall_rank:")
|
|
|
+ print(rov_recall_rank)
|
|
|
+ # 流量池
|
|
|
+ flow_recall_rank = sorted(data['flow_pool_recall'], key=lambda k: k.get('rovScore', 0), reverse=True)
|
|
|
+ # 对各路召回的视频进行去重
|
|
|
+ rov_recall_rank, flow_recall_rank = remove_duplicate(rov_recall=rov_recall_rank, flow_recall=flow_recall_rank,
|
|
|
+ top_K=top_K)
|
|
|
+ # log_.info('remove_duplicate finished! rov_recall_rank = {}, flow_recall_rank = {}'.format(
|
|
|
+ # rov_recall_rank, flow_recall_rank))
|
|
|
+
|
|
|
+ # rank_result = relevant_recall_rank
|
|
|
+ rank_result = []
|
|
|
+
|
|
|
+ # 从ROV召回池中获取top k
|
|
|
+ if len(rov_recall_rank) > 0:
|
|
|
+ rank_result.extend(rov_recall_rank[:top_K])
|
|
|
+ rov_recall_rank = rov_recall_rank[top_K:]
|
|
|
+ else:
|
|
|
+ rank_result.extend(flow_recall_rank[:top_K])
|
|
|
+ flow_recall_rank = flow_recall_rank[top_K:]
|
|
|
+ flow_num = 0
|
|
|
+ flowConfig =0
|
|
|
+ if exp_config and exp_config['flowConfig']:
|
|
|
+ flowConfig = exp_config['flowConfig']
|
|
|
+ if flowConfig == 1 and len(rov_recall_rank) > 0:
|
|
|
+ rank_result.extend(rov_recall_rank[:top_K])
|
|
|
+ for recall_item in rank_result:
|
|
|
+ flow_recall_name = recall_item.get("flowPool", '')
|
|
|
+ if flow_recall_name is not None and flow_recall_name.find("#") > -1:
|
|
|
+ flow_num = flow_num + 1
|
|
|
+ all_recall_rank = rov_recall_rank + flow_recall_rank
|
|
|
+ if flow_num > 0:
|
|
|
+ rank_result.extend(all_recall_rank[:size - top_K])
|
|
|
+ return rank_result[:size], flow_num
|
|
|
+ else:
|
|
|
+ # 按概率 p 及score排序获取 size - k 个视频
|
|
|
+ i = 0
|
|
|
+ while i < size - top_K:
|
|
|
+ # 随机生成[0, 1)浮点数
|
|
|
+ rand = random.random()
|
|
|
+ # log_.info('rand: {}'.format(rand))
|
|
|
+ if rand < flow_pool_P:
|
|
|
+ if flow_recall_rank:
|
|
|
+ rank_result.append(flow_recall_rank[0])
|
|
|
+ flow_recall_rank.remove(flow_recall_rank[0])
|
|
|
+ else:
|
|
|
+ rank_result.extend(rov_recall_rank[:size - top_K - i])
|
|
|
+ return rank_result[:size], flow_num
|
|
|
+ else:
|
|
|
+ if rov_recall_rank:
|
|
|
+ rank_result.append(rov_recall_rank[0])
|
|
|
+ rov_recall_rank.remove(rov_recall_rank[0])
|
|
|
+ else:
|
|
|
+ rank_result.extend(flow_recall_rank[:size - top_K - i])
|
|
|
+ return rank_result[:size], flow_num
|
|
|
+ i += 1
|
|
|
+ else:
|
|
|
+ # 按概率 p 及score排序获取 size - k 个视频
|
|
|
+ i = 0
|
|
|
+ while i < size - top_K:
|
|
|
+ # 随机生成[0, 1)浮点数
|
|
|
+ rand = random.random()
|
|
|
+ # log_.info('rand: {}'.format(rand))
|
|
|
+ if rand < flow_pool_P:
|
|
|
+ if flow_recall_rank:
|
|
|
+ rank_result.append(flow_recall_rank[0])
|
|
|
+ flow_recall_rank.remove(flow_recall_rank[0])
|
|
|
+ else:
|
|
|
+ rank_result.extend(rov_recall_rank[:size - top_K - i])
|
|
|
+ return rank_result[:size], flow_num
|
|
|
+ else:
|
|
|
+ if rov_recall_rank:
|
|
|
+ rank_result.append(rov_recall_rank[0])
|
|
|
+ rov_recall_rank.remove(rov_recall_rank[0])
|
|
|
+ else:
|
|
|
+ rank_result.extend(flow_recall_rank[:size - top_K - i])
|
|
|
+ return rank_result[:size],flow_num
|
|
|
+ i += 1
|
|
|
+ return rank_result[:size], flow_num
|
|
|
+
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|