瀏覽代碼

Merge branch 'deepfm_20230725' of algorithm/rov-server into master

linfan 1 年之前
父節點
當前提交
744973dab2
共有 3 個文件被更改,包括 80 次插入91 次删除
  1. 36 39
      rank_service.py
  2. 27 25
      recommend.py
  3. 17 27
      video_rank.py

+ 36 - 39
rank_service.py

@@ -9,8 +9,6 @@ log_ = Log()
 
 config_ = set_config()
 def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
-    recall_list = []
-    vidKeys = []
     feature_dict = {}
     # defult value
     apptype = 4
@@ -21,6 +19,8 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
     machineinfo_platform = 'android'
     sencetype = 100078
     machineinfo_model ='M2006C3LC'
+    city_code = -1
+    province_code = -1
     if env_dict and len(env_dict)>0:
         apptype = env_dict.get('app_type',4)
         pagesource = env_dict.get('pagesource', '')
@@ -30,10 +30,8 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
         recommendsource = env_dict.get('recommendsource', '0')
         machineinfo_platform = env_dict.get('machineinfo_platform', '')
         sencetype = env_dict.get('sencetype', '')
-    redisObj = RedisHelper()
-    pre_str = "v_ctr:"
-    hour_pre_str = "v_hour_ctr:"
-    hour_vidKeys = []
+        city_code = env_dict.get('city_code', -1)
+        province_code = env_dict.get('province_code', -1)
     mid_list = []
     videoid_list = []
     apptype_list = []
@@ -41,7 +39,6 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
     versioncode_list = []
     machineinfo_brand_list = []
     machineinfo_model_list = []
-    recommendsource_list = []
     machineinfo_platform_list = []
     sencetype_list = []
     day_rov_list = []
@@ -56,33 +53,31 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
     day_view_pv_list = []
     day_view_users_list = []
     day_share_users = []
-    for recall_item in data['rov_pool_recall']:
-        if len(recall_item)<=0:
-            continue
-        vid = recall_item.get("videoId",0)
-        mid_list.append(mid)
-        videoid_list.append(int(vid))
-        apptype_list.append(apptype)
-        pagesource_list.append(pagesource)
-        versioncode_list.append(versioncode)
-        machineinfo_brand_list.append(machineinfo_brand)
-        machineinfo_model_list.append(machineinfo_model)
-        recommendsource_list.append(recommendsource)
-        machineinfo_platform_list.append(machineinfo_platform)
-        sencetype_list.append(sencetype)
-        vidKeys.append(pre_str + str(vid))
-        hour_vidKeys.append(hour_pre_str+str(vid))
-        recall_list.append(recall_item)
-    #print("vidKeys:", vidKeys)
-    video_static_info = redisObj.get_batch_key(vidKeys)
-    video_hour_static_info = redisObj.get_batch_key(hour_vidKeys)
+    recommendsource_list = []
+    recall_list = env_dict.get('recall_list', [])
+    city_list = []
+    province_list = []
+    if recall_list and len(recall_list)>0:
+        for i in range(len(recall_list)):
+            mid_list.append(mid)
+            videoid_list.append(int(recall_list[i]))
+            apptype_list.append(apptype)
+            pagesource_list.append(pagesource)
+            versioncode_list.append(versioncode)
+            machineinfo_brand_list.append(machineinfo_brand)
+            machineinfo_model_list.append(machineinfo_model)
+            recommendsource_list.append(recommendsource)
+            machineinfo_platform_list.append(machineinfo_platform)
+            sencetype_list.append(sencetype)
+            city_list.append(int(city_code))
+            province_list.append(int(province_code))
+    video_static_info = env_dict.get('vid_day_fea_list', [])
+    video_hour_static_info = env_dict.get('vid_hour_fea_list', [])
     #print("video_static_info:",video_static_info)
-    if video_static_info:
+    if video_static_info and len(video_static_info)>0:
         for i in range(len(video_static_info)):
             try:
-                # print(video_scores[i])
-                vid = vidKeys[i].replace(pre_str,"")
-                if video_static_info[i] :
+                if video_static_info[i]  and len(video_static_info[i])>0:
                     per_video_staic = json.loads(video_static_info[i])
                     day_rov_list.append(float(per_video_staic[0]))
                     day_share_return_score_list.append(float(per_video_staic[1]))
@@ -118,12 +113,10 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
     hour_return_rate_list = []
     hour_ctr_score_list = []
 
-    if video_hour_static_info:
+    if video_hour_static_info and len(video_hour_static_info)>0:
         for i in range(len(video_hour_static_info)):
             try:
-                # print(video_scores[i])
-                vid = hour_vidKeys[i].replace(hour_pre_str,"")
-                if video_hour_static_info[i] :
+                if video_hour_static_info[i] and len(video_hour_static_info[i])>0:
                     per_hour_video_staic = json.loads(video_hour_static_info[i])
                     hour_rov_list.append(float(per_hour_video_staic[0]))
                     hour_share_return_score_list.append(float(per_hour_video_staic[1]))
@@ -177,7 +170,9 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
     feature_dict["hour_share_score_list"] = hour_share_score_list
     feature_dict["hour_return_rate_list"] = hour_return_rate_list
     feature_dict["hour_ctr_score_list"] = hour_ctr_score_list
-    return feature_dict, recall_list
+    feature_dict["city_code"] = city_list
+    feature_dict["province_code"] = province_list
+    return feature_dict
 
 
 def insert_static_default_fea(day_ctr_score_list, day_play_pv_list, day_play_users_list, day_return_rate_list,
@@ -224,9 +219,11 @@ def get_tf_serving_sores(feature_dict):
                     # "day_view_users":feature_dict["day_view_users_list"],
                     "hour_rov": feature_dict["hour_rov_list"],
                     "hour_share_score": feature_dict["hour_share_score_list"],
-                    "hour_share_return_score": feature_dict["hour_share_return_score_list"],
-                    "hour_return_rate": feature_dict["hour_return_rate_list"],
-                    "hour_ctr_score": feature_dict["hour_ctr_score_list"]
+                    #"hour_share_return_score": feature_dict["hour_share_return_score_list"],
+                    #"hour_return_rate": feature_dict["hour_return_rate_list"],
+                    #"hour_ctr_score": feature_dict["hour_ctr_score_list"],
+                    "city_code": feature_dict['city_code'],
+                    "province_code": feature_dict['province_code']
     }
     request_data_dict= {}
     request_data_dict["inputs"] = inputs_data

+ 27 - 25
recommend.py

@@ -665,40 +665,42 @@ def video_old_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
                 'flow_pool_recall': recall_result_list[2]
             }
     # 3. 特征回流
-    #print(env_dict)
+    rec_recall_list = []
+    vidKeys = []
+    hour_vidKeys = []
+    pre_str = "v_ctr:"
+    pre_hour_str = "v_hour_ctr:"
+    rec_recall_item_list = []
+    for recall_item in data['rov_pool_recall']:
+        if len(recall_item) <= 0:
+            continue
+        vid = recall_item.get("videoId", 0)
+        rec_recall_list.append(vid)
+        vidKeys.append(pre_str + str(vid))
+        hour_vidKeys.append(pre_hour_str + str(vid))
+        rec_recall_item_list.append(recall_item)
+    redisObj = RedisHelper()
+    video_static_info = redisObj.get_batch_key(vidKeys)
+    video_hour_static_info = redisObj.get_batch_key(hour_vidKeys)
+    vid_day_fea_list = []
+    vid_hour_fea_list = []
+    if video_static_info:
+        vid_day_fea_list = video_static_info
+    if video_hour_static_info:
+        vid_hour_fea_list = video_hour_static_info
     if env_dict:
-        province_code = client_info.get('provinceCode', '-1')
-        city_code = client_info.get('cityCode', '-1')
+        province_code = client_info.get('provinceCode', -1)
+        city_code = client_info.get('cityCode', -1)
         env_dict['mid'] = mid
         env_dict['province_code'] = province_code
         env_dict['city_code'] = city_code
-        rec_recall_list = []
-        vidKeys = []
-        hour_vidKeys = []
-        pre_str = "v_ctr:"
-        pre_hour_str = "v_hour_ctr:"
-        for recall_item in data['rov_pool_recall']:
-            if len(recall_item) <= 0:
-                continue
-            vid = recall_item.get("videoId", 0)
-            rec_recall_list.append(vid)
-            vidKeys.append(pre_str + str(vid))
-            hour_vidKeys.append(pre_hour_str + str(vid))
-        redisObj = RedisHelper()
-        video_static_info = redisObj.get_batch_key(vidKeys)
-        video_hour_static_info = redisObj.get_batch_key(hour_vidKeys)
-        vid_day_fea_list = []
-        vid_hour_fea_list = []
-        if video_static_info:
-            vid_day_fea_list = video_static_info
-        if video_hour_static_info:
-            vid_hour_fea_list = video_hour_static_info
+
         env_dict['recall_list'] = rec_recall_list
         env_dict['vid_day_fea_list'] = vid_day_fea_list
         env_dict['vid_hour_fea_list'] = vid_hour_fea_list
         env_json = env_dict
     #4.
-    rank_result, flow_num  = video_new_rank2(data=data, size=size, top_K=top_K, flow_pool_P=float(flow_pool_P), ab_code=ab_code, mid=mid, exp_config=exp_config, env_dict=env_dict)
+    rank_result, flow_num  = video_new_rank2(data=data, size=size, top_K=top_K, flow_pool_P=float(flow_pool_P), ab_code=ab_code, mid=mid, exp_config=exp_config, env_dict=env_dict, rec_recall_item_list=rec_recall_item_list)
     #print(rank_result)
     if rank_result:
         result['rank_num'] = len(rank_result)

+ 17 - 27
video_rank.py

@@ -593,7 +593,7 @@ def video_rank_with_old_video(rank_result, old_video_recall, size, top_K, old_vi
     return new_rank_result[:size]
 
 
-def video_new_rank2(data, size, top_K, flow_pool_P, ab_code, mid, exp_config=None, env_dict=None):
+def video_new_rank2(data, size, top_K, flow_pool_P, ab_code, mid, exp_config=None, env_dict=None, rec_recall_item_list=None):
     """
         视频分发排序
         :param data: 各路召回的视频 type-dict {'rov_pool_recall': [], 'flow_pool_recall': []}
@@ -602,10 +602,9 @@ def video_new_rank2(data, size, top_K, flow_pool_P, ab_code, mid, exp_config=Non
         :param flow_pool_P: size-top_K视频为流量池视频的概率 type-float
         :return: rank_result
         """
-    if not data['rov_pool_recall'] and not data['flow_pool_recall']:
+    if not rec_recall_item_list and not data['flow_pool_recall']:
         return [], 0
 
-
     #全量的是vlog,票圈精选, 334,60057,
     # 60054: simrecall,
     # 60052: 票圈精选,融合排序,60053:空置
@@ -625,42 +624,38 @@ def video_new_rank2(data, size, top_K, flow_pool_P, ab_code, mid, exp_config=Non
     elif ab_code == 60056:
         pre_str = "k_p7:"
     #print("pre_str:", pre_str)
-    recall_list = []
-    rov_recall_rank = data['rov_pool_recall']
+    rov_recall_rank = rec_recall_item_list
     #call rank service
     #flag_call_service = 0
     if ab_code == 60066:
-        feature_dict, recall_list = get_featurs(mid, data, size, top_K, flow_pool_P, env_dict)
+        feature_dict = get_featurs(mid, data, size, top_K, flow_pool_P, env_dict)
         score_result = get_tf_serving_sores(feature_dict)
-        if score_result and len(score_result) > 0 and len(score_result) == len(recall_list):
+        if rec_recall_item_list and score_result and len(score_result) > 0 and len(score_result) == len(rec_recall_item_list):
             for i in range(len(score_result)):
-                recall_list[i]['sort_score'] = score_result[i][0]
-                recall_list[i]['flag_call_service'] = 1
-            rov_recall_rank = sorted(recall_list, key=lambda k: k.get('sort_score', 0), reverse=True)
+                rec_recall_item_list[i]['sort_score'] = score_result[i][0]
+                rec_recall_item_list[i]['flag_call_service'] = 1
+            rov_recall_rank = sorted(rec_recall_item_list, key=lambda k: k.get('sort_score', 0), reverse=True)
         else:
-            rov_recall_rank = sup_rank(data, pre_str, recall_list, rov_recall_rank)
+            rov_recall_rank = sup_rank(pre_str, rec_recall_item_list)
     else:
         redisObj = RedisHelper()
         vidKeys = []
-        for recall_item in data['rov_pool_recall']:
-            if len(recall_item) <= 0:
-                continue
+        for recall_item in rec_recall_item_list:
             vid = recall_item.get("videoId", 0)
             vidKeys.append(pre_str + str(vid))
-            recall_list.append(recall_item)
         video_scores = redisObj.get_batch_key(vidKeys)
-        if video_scores and len(recall_list) > 0:
+        if video_scores and len(rec_recall_item_list) > 0 and len(video_scores)>0:
             for i in range(len(video_scores)):
                 try:
                     if video_scores[i] is None:
-                        recall_list[i]['sort_score'] = 0.0
+                        rec_recall_item_list[i]['sort_score'] = 0.0
                     else:
                         video_score_str = json.loads(video_scores[i])
                         # print("video_score_str:", video_score_str)
-                        recall_list[i]['sort_score'] = video_score_str[0]
+                        rec_recall_item_list[i]['sort_score'] = video_score_str[0]
                 except Exception:
-                    recall_list[i]['sort_score'] = 0.0
-            rov_recall_rank = sorted(recall_list, key=lambda k: k.get('sort_score', 0), reverse=True)
+                    rec_recall_item_list[i]['sort_score'] = 0.0
+            rov_recall_rank = sorted(rec_recall_item_list, key=lambda k: k.get('sort_score', 0), reverse=True)
     #print(rov_recall_rank)
     flow_recall_rank = sorted(data['flow_pool_recall'], key=lambda k: k.get('rovScore', 0), reverse=True)
     rov_recall_rank, flow_recall_rank = remove_duplicate(rov_recall=rov_recall_rank, flow_recall=flow_recall_rank,
@@ -734,18 +729,13 @@ def video_new_rank2(data, size, top_K, flow_pool_P, ab_code, mid, exp_config=Non
 
 
 # 排序服务兜底
-def sup_rank(data, pre_str, recall_list, rov_recall_rank):
+def sup_rank(pre_str, recall_list):
     redisObj = RedisHelper()
     vidKeys = []
-    for recall_item in data['rov_pool_recall']:
-        if len(recall_item) <= 0:
-            continue
+    for recall_item in recall_list:
         vid = recall_item.get("videoId", 0)
         vidKeys.append(pre_str + str(vid))
-        recall_list.append(recall_item)
     video_scores = redisObj.get_batch_key(vidKeys)
-    #print("vidKeys:", video_scores, "\t", vidKeys)
-    #print(len(video_scores), len(recall_list))
     if video_scores and len(recall_list) > 0:
         for i in range(len(video_scores)):
             try: