瀏覽代碼

多样性需求

zhangbo 1 年之前
父節點
當前提交
406dcee119
共有 4 個文件被更改,包括 46 次插入35 次删除
  1. 1 1
      config.py
  2. 7 8
      recommend.py
  3. 38 23
      video_rank.py
  4. 0 3
      video_recall.py

+ 1 - 1
config.py

@@ -199,7 +199,7 @@ class BaseConfig(object):
             # 趋势性召回子策略,ab实验号533,推荐服务内实验号60098
             'abtest_533': 60098,
             'abtest_536': 60099,
-            'abtest_537': 70000,
+            'abtest_537': 60100,
         },  # 地域分组小时级规则实验
 
         'rank_by_24h': {

+ 7 - 8
recommend.py

@@ -537,7 +537,7 @@ def video_old_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
     elif ab_code == 60068 or ab_code == 60070 or ab_code == 60080 or ab_code == 60081 or ab_code == 60082 \
             or ab_code == 60083 or ab_code == 60084 or ab_code == 60085 or ab_code == 60086 \
             or ab_code == 60092 or ab_code == 60093 or ab_code == 60094 or ab_code == 60095 or ab_code == 60096\
-            or ab_code == 60097 or ab_code == 60098 or ab_code == 60099 or ab_code == 70000:
+            or ab_code == 60097 or ab_code == 60098 or ab_code == 60099 or ab_code == 60100:
         t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter))
         t.append(gevent.spawn(pool_recall.get_return_video_reall, 'rv2:'))
     elif ab_code == 1000000:
@@ -629,7 +629,7 @@ def video_old_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
             or ab_code == 60080 or ab_code == 60081 or ab_code == 60082 or ab_code == 60083 or ab_code == 60084\
             or ab_code == 60085 or ab_code == 60086 \
             or ab_code == 60092 or ab_code == 60093 or ab_code == 60094 or ab_code == 60095 or ab_code == 60096\
-            or ab_code == 60097 or ab_code == 60098 or ab_code == 60099 or ab_code == 70000:
+            or ab_code == 60097 or ab_code == 60098 or ab_code == 60099 or ab_code == 60100:
         rov_pool_recall = []
         if len(recall_result_list)>=2:
             region_recall = recall_result_list[0]
@@ -666,8 +666,7 @@ def video_old_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
                         now_video_ids.add(video_id)
             if len(rov_pool_recall)>0:
                 recall_result_list[0] = rov_pool_recall
-    # merge新增的recall_strategy_trend_v1 60098
-    if ab_code == 60099:
+    if ab_code == 1000000:
         rov_pool_recall = []
         if len(recall_result_list) >= 2:
             region_recall = recall_result_list[0]
@@ -790,13 +789,13 @@ def video_old_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
         env_json = env_dict
     #4.
     # rank_result, flow_num = video_new_rank2(data=data, size=size, top_K=top_K, flow_pool_P=float(flow_pool_P), ab_code=ab_code, mid=mid, exp_config=exp_config, env_dict=env_dict)
-    if ab_code == 60098 or ab_code == 60099 or ab_code == 70000:
+    if ab_code == 60098 or ab_code == 60099 or ab_code == 60100:
         rule_key_str = "TAGS_FILTER_RULE_V1_JSON"
         if ab_code == 60098:
             rule_key_str = "TAGS_FILTER_RULE_V1_JSON"
         elif ab_code == 60099:
             rule_key_str = "TAGS_FILTER_RULE_V2_JSON"
-        elif ab_code == 70000:
+        elif ab_code == 60100:
             rule_key_str = "TAGS_FILTER_RULE_V3_JSON"
 
         rank_result, flow_num, flow_pool_recall_process = video_new_rank3_4density(
@@ -2075,7 +2074,7 @@ def video_homepage_recommend(request_id, mid, uid, size, app_type, algo_type,
             or ab_code == 60083 or ab_code == 60084 or ab_code == 60085 or ab_code == 60086 \
             or ab_code == 60087 or ab_code == 60088 or ab_code == 60089 or ab_code == 60090 \
             or ab_code == 60091 or ab_code == 60092 or ab_code == 60093 or ab_code == 60094 or ab_code == 60095 \
-            or ab_code == 60096 or ab_code == 60097 or ab_code == 60098 or ab_code == 60099 or ab_code == 70000:
+            or ab_code == 60096 or ab_code == 60097 or ab_code == 60098 or ab_code == 60099 or ab_code == 60100:
         result, fea_info = video_old_recommend(request_id=request_id, mid=mid, uid=uid, app_type=app_type, size=size,
                                                top_K=top_K, flow_pool_P=flow_pool_P, algo_type='',
                                                client_info=client_info, ab_code=ab_code, expire_time=expire_time,
@@ -2203,7 +2202,7 @@ def video_relevant_recommend(request_id, video_id, mid, uid, size, app_type, ab_
             or ab_code == 60083 or ab_code == 60084 or ab_code == 60085 or ab_code == 60086 \
             or ab_code == 60087 or ab_code == 60088 or ab_code == 60089 or ab_code == 60090 \
             or ab_code == 60091 or ab_code == 60092 or ab_code == 60093 or ab_code == 60094 or ab_code == 60095 \
-            or ab_code == 60096 or ab_code == 60097 or ab_code == 60098 or ab_code == 60099 or ab_code == 70000:
+            or ab_code == 60096 or ab_code == 60097 or ab_code == 60098 or ab_code == 60099 or ab_code == 60100:
         result, fea_info = video_old_recommend(request_id=request_id, mid=mid, uid=uid, app_type=app_type, size=size,
                                                top_K=top_K, flow_pool_P=flow_pool_P, algo_type='',
                                                client_info=client_info, ab_code=ab_code, expire_time=expire_time,

+ 38 - 23
video_rank.py

@@ -942,16 +942,17 @@ def video_new_rank3_4density(data, size, top_K, flow_pool_P, rank_key_prefix='ra
     rov_recall_rank, flow_recall_rank = remove_duplicate(
         rov_recall=rov_recall_rank, flow_recall=flow_recall_rank, top_K=top_K
     )
-    # 2 多样性需求,给video添加tag todo
-    video_ids = []
-    video_ids.extend([v["videoId"] for v in rov_recall_rank])
-    video_ids.extend([v["videoId"] for v in flow_recall_rank])
-    video_ids = list(set(video_ids))
-    video_tag_dict = get_video_tags(redis_helper, video_ids)
-    for v in rov_recall_rank:
-        v["tags"] = video_tag_dict.get(v["videoId"], [])
-    for v in flow_recall_rank:
-        v["tags"] = video_tag_dict.get(v["videoId"], [])
+    # 2 多样性需求,给video添加tag 无规则时 不需要取tag
+    if len(density_rules) != 0:
+        video_ids = []
+        video_ids.extend([v["videoId"] for v in rov_recall_rank])
+        video_ids.extend([v["videoId"] for v in flow_recall_rank])
+        video_ids = list(set(video_ids))
+        video_tag_dict = get_video_tags(redis_helper, video_ids)
+        for v in rov_recall_rank:
+            v["tags"] = video_tag_dict.get(v["videoId"], [])
+        for v in flow_recall_rank:
+            v["tags"] = video_tag_dict.get(v["videoId"], [])
 
     rank_result = []
     flow_pool_recall_process['recall_duplicate_res'] = {'rov_recall_rank': rov_recall_rank,
@@ -983,13 +984,15 @@ def video_new_rank3_4density(data, size, top_K, flow_pool_P, rank_key_prefix='ra
             else:
                 rank_result.extend(rov_recall_rank[:size - top_K - i])
                 # todo zhangbo rank
+                rov_recall_rank_new = [i for i in rov_recall_rank if i["videoId"] not in [j["videoId"] for j in rank_result[:size]]]
+                flow_recall_rank_new = [i for i in flow_recall_rank if i["videoId"] not in [j["videoId"] for j in rank_result[:size]]]
                 result = merge_density_control(
                     rank_result[:size],
-                    [i for i in rov_recall_rank if i["videoId"] not in [j["videoId"] for j in rank_result[:size]]],
-                    [i for i in flow_recall_rank if i["videoId"] not in [j["videoId"] for j in rank_result[:size]]],
+                    rov_recall_rank_new,
+                    flow_recall_rank_new,
                     density_rules
                 )
-                return result, flow_num, flow_pool_recall_process
+                return result[:size], flow_num, flow_pool_recall_process
         else:
             if rov_recall_rank:
                 rank_result.append(rov_recall_rank[0])
@@ -997,22 +1000,29 @@ def video_new_rank3_4density(data, size, top_K, flow_pool_P, rank_key_prefix='ra
             else:
                 rank_result.extend(flow_recall_rank[:size - top_K - i])
                 # todo zhangbo rank
+                rov_recall_rank_new = [i for i in rov_recall_rank if
+                                       i["videoId"] not in [j["videoId"] for j in rank_result[:size]]]
+                flow_recall_rank_new = [i for i in flow_recall_rank if
+                                        i["videoId"] not in [j["videoId"] for j in rank_result[:size]]]
                 result = merge_density_control(
                     rank_result[:size],
-                    [i for i in rov_recall_rank if i["videoId"] not in [j["videoId"] for j in rank_result[:size]]],
-                    [i for i in flow_recall_rank if i["videoId"] not in [j["videoId"] for j in rank_result[:size]]],
+                    rov_recall_rank_new,
+                    flow_recall_rank_new,
                     density_rules
                 )
-                return result, flow_num, flow_pool_recall_process
+                return result[:size], flow_num, flow_pool_recall_process
         i += 1
     # todo zhangbo rank
+    rov_recall_rank_new = [i for i in rov_recall_rank if i["videoId"] not in [j["videoId"] for j in rank_result[:size]]]
+    flow_recall_rank_new = [i for i in flow_recall_rank if
+                            i["videoId"] not in [j["videoId"] for j in rank_result[:size]]]
     result = merge_density_control(
         rank_result[:size],
-        [i for i in rov_recall_rank if i["videoId"] not in [j["videoId"] for j in rank_result[:size]]],
-        [i for i in flow_recall_rank if i["videoId"] not in [j["videoId"] for j in rank_result[:size]]],
+        rov_recall_rank_new,
+        flow_recall_rank_new,
         density_rules
     )
-    return result, flow_num, flow_pool_recall_process
+    return result[:size], flow_num, flow_pool_recall_process
 # 排序服务兜底
 def sup_rank(video_scores, recall_list):
     if video_scores and len(recall_list) > 0:
@@ -1434,6 +1444,8 @@ def video_sank_pos_rank(data, size, top_K, flow_pool_P, ab_Code='', exp_config=N
 def merge_density_control(
         data: list, rov: list, flow: list, rule: dict
 ) -> list:
+    if len(rule) == 0:
+        return data
     # 1 判断是否满足规则
     status_cur: Dict[str, int] = {}
     for d in data:
@@ -1441,6 +1453,8 @@ def merge_density_control(
             for t in d["tags"]:
                 if t in rule.keys():
                     status_cur[t] = 1 + status_cur[t] if t in status_cur.keys() else 1
+    if len(status_cur) == 0:
+        return data
     status_cur_illegal: Dict[str, int] = {}
     for k, v in status_cur.items():
         if k in rule.keys() and rule[k] < v:
@@ -1514,7 +1528,8 @@ def get_video_tags(redis_helper, video_ids) -> dict:
 if __name__ == '__main__':
     data: list = [
         {'videoId': 1, 'flowPool': '', 'tags': ['下午好','元旦','祝福']},
-        {'videoId': 2, 'flowPool': '', 'tags': ['下午好','祝福']},
+        {'videoId': 2, 'flowPool': ''},
+        # {'videoId': 2, 'flowPool': '', 'tags': ['下午好','祝福']},
         {'videoId': 3, 'flowPool': '', 'tags': ['早上好']},
         {'videoId': 4, 'flowPool': 'flow', 'tags': ['下午好','元旦','祝福']},
     ]
@@ -1533,11 +1548,11 @@ if __name__ == '__main__':
         {'videoId': 23, 'flowPool': 'flow', 'tags': []}
     ]
     rule = {
-        '下午好': 2,
-        '早上好': 1,
-        '祝福': 1
+        '祝福': 1,
+        "下午好": 1
     }
     result = merge_density_control(data, rov, flow, rule)
+    print(len(result))
     print(result)
 
     # d_test = [{'videoId': 10028734, 'rovScore': 99.977, 'pushFrom': 'recall_pool', 'abCode': 10000},

+ 0 - 3
video_recall.py

@@ -2071,9 +2071,6 @@ class PoolRecall(object):
         # if recall_num<size:
         #     recall_num = size
 
-        # todo zhangbo rank 放开地域的截断,给后续排序更多供给 做规则调控
-        if self.ab_code == 60098:
-            recall_num = size
 
         for region_result in region_recall_result_list:
             for video in region_result: