|
@@ -942,16 +942,17 @@ def video_new_rank3_4density(data, size, top_K, flow_pool_P, rank_key_prefix='ra
|
|
|
rov_recall_rank, flow_recall_rank = remove_duplicate(
|
|
|
rov_recall=rov_recall_rank, flow_recall=flow_recall_rank, top_K=top_K
|
|
|
)
|
|
|
- # 2 多样性需求,给video添加tag todo
|
|
|
- video_ids = []
|
|
|
- video_ids.extend([v["videoId"] for v in rov_recall_rank])
|
|
|
- video_ids.extend([v["videoId"] for v in flow_recall_rank])
|
|
|
- video_ids = list(set(video_ids))
|
|
|
- video_tag_dict = get_video_tags(redis_helper, video_ids)
|
|
|
- for v in rov_recall_rank:
|
|
|
- v["tags"] = video_tag_dict.get(v["videoId"], [])
|
|
|
- for v in flow_recall_rank:
|
|
|
- v["tags"] = video_tag_dict.get(v["videoId"], [])
|
|
|
+ # 2 多样性需求,给video添加tag 无规则时 不需要取tag
|
|
|
+ if len(density_rules) != 0:
|
|
|
+ video_ids = []
|
|
|
+ video_ids.extend([v["videoId"] for v in rov_recall_rank])
|
|
|
+ video_ids.extend([v["videoId"] for v in flow_recall_rank])
|
|
|
+ video_ids = list(set(video_ids))
|
|
|
+ video_tag_dict = get_video_tags(redis_helper, video_ids)
|
|
|
+ for v in rov_recall_rank:
|
|
|
+ v["tags"] = video_tag_dict.get(v["videoId"], [])
|
|
|
+ for v in flow_recall_rank:
|
|
|
+ v["tags"] = video_tag_dict.get(v["videoId"], [])
|
|
|
|
|
|
rank_result = []
|
|
|
flow_pool_recall_process['recall_duplicate_res'] = {'rov_recall_rank': rov_recall_rank,
|
|
@@ -983,13 +984,15 @@ def video_new_rank3_4density(data, size, top_K, flow_pool_P, rank_key_prefix='ra
|
|
|
else:
|
|
|
rank_result.extend(rov_recall_rank[:size - top_K - i])
|
|
|
# todo zhangbo rank
|
|
|
+ rov_recall_rank_new = [i for i in rov_recall_rank if i["videoId"] not in [j["videoId"] for j in rank_result[:size]]]
|
|
|
+ flow_recall_rank_new = [i for i in flow_recall_rank if i["videoId"] not in [j["videoId"] for j in rank_result[:size]]]
|
|
|
result = merge_density_control(
|
|
|
rank_result[:size],
|
|
|
- [i for i in rov_recall_rank if i["videoId"] not in [j["videoId"] for j in rank_result[:size]]],
|
|
|
- [i for i in flow_recall_rank if i["videoId"] not in [j["videoId"] for j in rank_result[:size]]],
|
|
|
+ rov_recall_rank_new,
|
|
|
+ flow_recall_rank_new,
|
|
|
density_rules
|
|
|
)
|
|
|
- return result, flow_num, flow_pool_recall_process
|
|
|
+ return result[:size], flow_num, flow_pool_recall_process
|
|
|
else:
|
|
|
if rov_recall_rank:
|
|
|
rank_result.append(rov_recall_rank[0])
|
|
@@ -997,22 +1000,29 @@ def video_new_rank3_4density(data, size, top_K, flow_pool_P, rank_key_prefix='ra
|
|
|
else:
|
|
|
rank_result.extend(flow_recall_rank[:size - top_K - i])
|
|
|
# todo zhangbo rank
|
|
|
+ rov_recall_rank_new = [i for i in rov_recall_rank if
|
|
|
+ i["videoId"] not in [j["videoId"] for j in rank_result[:size]]]
|
|
|
+ flow_recall_rank_new = [i for i in flow_recall_rank if
|
|
|
+ i["videoId"] not in [j["videoId"] for j in rank_result[:size]]]
|
|
|
result = merge_density_control(
|
|
|
rank_result[:size],
|
|
|
- [i for i in rov_recall_rank if i["videoId"] not in [j["videoId"] for j in rank_result[:size]]],
|
|
|
- [i for i in flow_recall_rank if i["videoId"] not in [j["videoId"] for j in rank_result[:size]]],
|
|
|
+ rov_recall_rank_new,
|
|
|
+ flow_recall_rank_new,
|
|
|
density_rules
|
|
|
)
|
|
|
- return result, flow_num, flow_pool_recall_process
|
|
|
+ return result[:size], flow_num, flow_pool_recall_process
|
|
|
i += 1
|
|
|
# todo zhangbo rank
|
|
|
+ rov_recall_rank_new = [i for i in rov_recall_rank if i["videoId"] not in [j["videoId"] for j in rank_result[:size]]]
|
|
|
+ flow_recall_rank_new = [i for i in flow_recall_rank if
|
|
|
+ i["videoId"] not in [j["videoId"] for j in rank_result[:size]]]
|
|
|
result = merge_density_control(
|
|
|
rank_result[:size],
|
|
|
- [i for i in rov_recall_rank if i["videoId"] not in [j["videoId"] for j in rank_result[:size]]],
|
|
|
- [i for i in flow_recall_rank if i["videoId"] not in [j["videoId"] for j in rank_result[:size]]],
|
|
|
+ rov_recall_rank_new,
|
|
|
+ flow_recall_rank_new,
|
|
|
density_rules
|
|
|
)
|
|
|
- return result, flow_num, flow_pool_recall_process
|
|
|
+ return result[:size], flow_num, flow_pool_recall_process
|
|
|
# 排序服务兜底
|
|
|
def sup_rank(video_scores, recall_list):
|
|
|
if video_scores and len(recall_list) > 0:
|
|
@@ -1434,6 +1444,8 @@ def video_sank_pos_rank(data, size, top_K, flow_pool_P, ab_Code='', exp_config=N
|
|
|
def merge_density_control(
|
|
|
data: list, rov: list, flow: list, rule: dict
|
|
|
) -> list:
|
|
|
+ if len(rule) == 0:
|
|
|
+ return data
|
|
|
# 1 判断是否满足规则
|
|
|
status_cur: Dict[str, int] = {}
|
|
|
for d in data:
|
|
@@ -1441,6 +1453,8 @@ def merge_density_control(
|
|
|
for t in d["tags"]:
|
|
|
if t in rule.keys():
|
|
|
status_cur[t] = 1 + status_cur[t] if t in status_cur.keys() else 1
|
|
|
+ if len(status_cur) == 0:
|
|
|
+ return data
|
|
|
status_cur_illegal: Dict[str, int] = {}
|
|
|
for k, v in status_cur.items():
|
|
|
if k in rule.keys() and rule[k] < v:
|
|
@@ -1514,7 +1528,8 @@ def get_video_tags(redis_helper, video_ids) -> dict:
|
|
|
if __name__ == '__main__':
|
|
|
data: list = [
|
|
|
{'videoId': 1, 'flowPool': '', 'tags': ['下午好','元旦','祝福']},
|
|
|
- {'videoId': 2, 'flowPool': '', 'tags': ['下午好','祝福']},
|
|
|
+ {'videoId': 2, 'flowPool': ''},
|
|
|
+ # {'videoId': 2, 'flowPool': '', 'tags': ['下午好','祝福']},
|
|
|
{'videoId': 3, 'flowPool': '', 'tags': ['早上好']},
|
|
|
{'videoId': 4, 'flowPool': 'flow', 'tags': ['下午好','元旦','祝福']},
|
|
|
]
|
|
@@ -1533,11 +1548,11 @@ if __name__ == '__main__':
|
|
|
{'videoId': 23, 'flowPool': 'flow', 'tags': []}
|
|
|
]
|
|
|
rule = {
|
|
|
- '下午好': 2,
|
|
|
- '早上好': 1,
|
|
|
- '祝福': 1
|
|
|
+ '祝福': 1,
|
|
|
+ "下午好": 1
|
|
|
}
|
|
|
result = merge_density_control(data, rov, flow, rule)
|
|
|
+ print(len(result))
|
|
|
print(result)
|
|
|
|
|
|
# d_test = [{'videoId': 10028734, 'rovScore': 99.977, 'pushFrom': 'recall_pool', 'abCode': 10000},
|