2 years ago · 744973dab2
--- a/rank_service.py
+++ b/rank_service.py
@@ -9,8 +9,6 @@ log_ = Log()
 
				 
			
 
				 config_ = set_config()
			
 
				 def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
			
 
				-    recall_list = []
			
 
				-    vidKeys = []
			
 
				     feature_dict = {}
			
 
				     # defult value
			
 
				     apptype = 4
			
@@ -21,6 +19,8 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
 
				     machineinfo_platform = 'android'
			
 
				     sencetype = 100078
			
 
				     machineinfo_model ='M2006C3LC'
			
 
				+    city_code = -1
			
 
				+    province_code = -1
			
 
				     if env_dict and len(env_dict)>0:
			
 
				         apptype = env_dict.get('app_type',4)
			
 
				         pagesource = env_dict.get('pagesource', '')
			
@@ -30,10 +30,8 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
 
				         recommendsource = env_dict.get('recommendsource', '0')
			
 
				         machineinfo_platform = env_dict.get('machineinfo_platform', '')
			
 
				         sencetype = env_dict.get('sencetype', '')
			
 
				-    redisObj = RedisHelper()
			
 
				-    pre_str = "v_ctr:"
			
 
				-    hour_pre_str = "v_hour_ctr:"
			
 
				-    hour_vidKeys = []
			
 
				+        city_code = env_dict.get('city_code', -1)
			
 
				+        province_code = env_dict.get('province_code', -1)
			
 
				     mid_list = []
			
 
				     videoid_list = []
			
 
				     apptype_list = []
			
@@ -41,7 +39,6 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
 
				     versioncode_list = []
			
 
				     machineinfo_brand_list = []
			
 
				     machineinfo_model_list = []
			
 
				-    recommendsource_list = []
			
 
				     machineinfo_platform_list = []
			
 
				     sencetype_list = []
			
 
				     day_rov_list = []
			
@@ -56,33 +53,31 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
 
				     day_view_pv_list = []
			
 
				     day_view_users_list = []
			
 
				     day_share_users = []
			
 
				-    for recall_item in data['rov_pool_recall']:
			
 
				-        if len(recall_item)<=0:
			
 
				-            continue
			
 
				-        vid = recall_item.get("videoId",0)
			
 
				-        mid_list.append(mid)
			
 
				-        videoid_list.append(int(vid))
			
 
				-        apptype_list.append(apptype)
			
 
				-        pagesource_list.append(pagesource)
			
 
				-        versioncode_list.append(versioncode)
			
 
				-        machineinfo_brand_list.append(machineinfo_brand)
			
 
				-        machineinfo_model_list.append(machineinfo_model)
			
 
				-        recommendsource_list.append(recommendsource)
			
 
				-        machineinfo_platform_list.append(machineinfo_platform)
			
 
				-        sencetype_list.append(sencetype)
			
 
				-        vidKeys.append(pre_str + str(vid))
			
 
				-        hour_vidKeys.append(hour_pre_str+str(vid))
			
 
				-        recall_list.append(recall_item)
			
 
				-    #print("vidKeys:", vidKeys)
			
 
				-    video_static_info = redisObj.get_batch_key(vidKeys)
			
 
				-    video_hour_static_info = redisObj.get_batch_key(hour_vidKeys)
			
 
				+    recommendsource_list = []
			
 
				+    recall_list = env_dict.get('recall_list', [])
			
 
				+    city_list = []
			
 
				+    province_list = []
			
 
				+    if recall_list and len(recall_list)>0:
			
 
				+        for i in range(len(recall_list)):
			
 
				+            mid_list.append(mid)
			
 
				+            videoid_list.append(int(recall_list[i]))
			
 
				+            apptype_list.append(apptype)
			
 
				+            pagesource_list.append(pagesource)
			
 
				+            versioncode_list.append(versioncode)
			
 
				+            machineinfo_brand_list.append(machineinfo_brand)
			
 
				+            machineinfo_model_list.append(machineinfo_model)
			
 
				+            recommendsource_list.append(recommendsource)
			
 
				+            machineinfo_platform_list.append(machineinfo_platform)
			
 
				+            sencetype_list.append(sencetype)
			
 
				+            city_list.append(int(city_code))
			
 
				+            province_list.append(int(province_code))
			
 
				+    video_static_info = env_dict.get('vid_day_fea_list', [])
			
 
				+    video_hour_static_info = env_dict.get('vid_hour_fea_list', [])
			
 
				     #print("video_static_info:",video_static_info)
			
 
				-    if video_static_info:
			
 
				+    if video_static_info and len(video_static_info)>0:
			
 
				         for i in range(len(video_static_info)):
			
 
				             try:
			
 
				-                # print(video_scores[i])
			
 
				-                vid = vidKeys[i].replace(pre_str,"")
			
 
				-                if video_static_info[i] :
			
 
				+                if video_static_info[i]  and len(video_static_info[i])>0:
			
 
				                     per_video_staic = json.loads(video_static_info[i])
			
 
				                     day_rov_list.append(float(per_video_staic[0]))
			
 
				                     day_share_return_score_list.append(float(per_video_staic[1]))
			
@@ -118,12 +113,10 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
 
				     hour_return_rate_list = []
			
 
				     hour_ctr_score_list = []
			
 
				 
			
 
				-    if video_hour_static_info:
			
 
				+    if video_hour_static_info and len(video_hour_static_info)>0:
			
 
				         for i in range(len(video_hour_static_info)):
			
 
				             try:
			
 
				-                # print(video_scores[i])
			
 
				-                vid = hour_vidKeys[i].replace(hour_pre_str,"")
			
 
				-                if video_hour_static_info[i] :
			
 
				+                if video_hour_static_info[i] and len(video_hour_static_info[i])>0:
			
 
				                     per_hour_video_staic = json.loads(video_hour_static_info[i])
			
 
				                     hour_rov_list.append(float(per_hour_video_staic[0]))
			
 
				                     hour_share_return_score_list.append(float(per_hour_video_staic[1]))
			
@@ -177,7 +170,9 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
 
				     feature_dict["hour_share_score_list"] = hour_share_score_list
			
 
				     feature_dict["hour_return_rate_list"] = hour_return_rate_list
			
 
				     feature_dict["hour_ctr_score_list"] = hour_ctr_score_list
			
 
				-    return feature_dict, recall_list
			
 
				+    feature_dict["city_code"] = city_list
			
 
				+    feature_dict["province_code"] = province_list
			
 
				+    return feature_dict
			
 
				 
			
 
				 
			
 
				 def insert_static_default_fea(day_ctr_score_list, day_play_pv_list, day_play_users_list, day_return_rate_list,
			
@@ -224,9 +219,11 @@ def get_tf_serving_sores(feature_dict):
 
				                     # "day_view_users":feature_dict["day_view_users_list"],
			
 
				                     "hour_rov": feature_dict["hour_rov_list"],
			
 
				                     "hour_share_score": feature_dict["hour_share_score_list"],
			
 
				-                    "hour_share_return_score": feature_dict["hour_share_return_score_list"],
			
 
				-                    "hour_return_rate": feature_dict["hour_return_rate_list"],
			
 
				-                    "hour_ctr_score": feature_dict["hour_ctr_score_list"]
			
 
				+                    #"hour_share_return_score": feature_dict["hour_share_return_score_list"],
			
 
				+                    #"hour_return_rate": feature_dict["hour_return_rate_list"],
			
 
				+                    #"hour_ctr_score": feature_dict["hour_ctr_score_list"],
			
 
				+                    "city_code": feature_dict['city_code'],
			
 
				+                    "province_code": feature_dict['province_code']
			
 
				     }
			
 
				     request_data_dict= {}
			
 
				     request_data_dict["inputs"] = inputs_data
			
--- a/recommend.py
+++ b/recommend.py
@@ -665,40 +665,42 @@ def video_old_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
 
				                 'flow_pool_recall': recall_result_list[2]
			
 
				             }
			
 
				     # 3. 特征回流
			
 
				-    #print(env_dict)
			
 
				+    rec_recall_list = []
			
 
				+    vidKeys = []
			
 
				+    hour_vidKeys = []
			
 
				+    pre_str = "v_ctr:"
			
 
				+    pre_hour_str = "v_hour_ctr:"
			
 
				+    rec_recall_item_list = []
			
 
				+    for recall_item in data['rov_pool_recall']:
			
 
				+        if len(recall_item) <= 0:
			
 
				+            continue
			
 
				+        vid = recall_item.get("videoId", 0)
			
 
				+        rec_recall_list.append(vid)
			
 
				+        vidKeys.append(pre_str + str(vid))
			
 
				+        hour_vidKeys.append(pre_hour_str + str(vid))
			
 
				+        rec_recall_item_list.append(recall_item)
			
 
				+    redisObj = RedisHelper()
			
 
				+    video_static_info = redisObj.get_batch_key(vidKeys)
			
 
				+    video_hour_static_info = redisObj.get_batch_key(hour_vidKeys)
			
 
				+    vid_day_fea_list = []
			
 
				+    vid_hour_fea_list = []
			
 
				+    if video_static_info:
			
 
				+        vid_day_fea_list = video_static_info
			
 
				+    if video_hour_static_info:
			
 
				+        vid_hour_fea_list = video_hour_static_info
			
 
				     if env_dict:
			
 
				-        province_code = client_info.get('provinceCode', '-1')
			
 
				-        city_code = client_info.get('cityCode', '-1')
			
 
				+        province_code = client_info.get('provinceCode', -1)
			
 
				+        city_code = client_info.get('cityCode', -1)
			
 
				         env_dict['mid'] = mid
			
 
				         env_dict['province_code'] = province_code
			
 
				         env_dict['city_code'] = city_code
			
 
				-        rec_recall_list = []
			
 
				-        vidKeys = []
			
 
				-        hour_vidKeys = []
			
 
				-        pre_str = "v_ctr:"
			
 
				-        pre_hour_str = "v_hour_ctr:"
			
 
				-        for recall_item in data['rov_pool_recall']:
			
 
				-            if len(recall_item) <= 0:
			
 
				-                continue
			
 
				-            vid = recall_item.get("videoId", 0)
			
 
				-            rec_recall_list.append(vid)
			
 
				-            vidKeys.append(pre_str + str(vid))
			
 
				-            hour_vidKeys.append(pre_hour_str + str(vid))
			
 
				-        redisObj = RedisHelper()
			
 
				-        video_static_info = redisObj.get_batch_key(vidKeys)
			
 
				-        video_hour_static_info = redisObj.get_batch_key(hour_vidKeys)
			
 
				-        vid_day_fea_list = []
			
 
				-        vid_hour_fea_list = []
			
 
				-        if video_static_info:
			
 
				-            vid_day_fea_list = video_static_info
			
 
				-        if video_hour_static_info:
			
 
				-            vid_hour_fea_list = video_hour_static_info
			
 
				+
			
 
				         env_dict['recall_list'] = rec_recall_list
			
 
				         env_dict['vid_day_fea_list'] = vid_day_fea_list
			
 
				         env_dict['vid_hour_fea_list'] = vid_hour_fea_list
			
 
				         env_json = env_dict
			
 
				     #4.
			
 
				-    rank_result, flow_num  = video_new_rank2(data=data, size=size, top_K=top_K, flow_pool_P=float(flow_pool_P), ab_code=ab_code, mid=mid, exp_config=exp_config, env_dict=env_dict)
			
 
				+    rank_result, flow_num  = video_new_rank2(data=data, size=size, top_K=top_K, flow_pool_P=float(flow_pool_P), ab_code=ab_code, mid=mid, exp_config=exp_config, env_dict=env_dict, rec_recall_item_list=rec_recall_item_list)
			
 
				     #print(rank_result)
			
 
				     if rank_result:
			
 
				         result['rank_num'] = len(rank_result)
			
--- a/video_rank.py
+++ b/video_rank.py
@@ -593,7 +593,7 @@ def video_rank_with_old_video(rank_result, old_video_recall, size, top_K, old_vi
 
				     return new_rank_result[:size]
			
 
				 
			
 
				 
			
 
				-def video_new_rank2(data, size, top_K, flow_pool_P, ab_code, mid, exp_config=None, env_dict=None):
			
 
				+def video_new_rank2(data, size, top_K, flow_pool_P, ab_code, mid, exp_config=None, env_dict=None, rec_recall_item_list=None):
			
 
				     """
			
 
				         视频分发排序
			
 
				         :param data: 各路召回的视频 type-dict {'rov_pool_recall': [], 'flow_pool_recall': []}
			
@@ -602,10 +602,9 @@ def video_new_rank2(data, size, top_K, flow_pool_P, ab_code, mid, exp_config=Non
 
				         :param flow_pool_P: size-top_K视频为流量池视频的概率 type-float
			
 
				         :return: rank_result
			
 
				         """
			
 
				-    if not data['rov_pool_recall'] and not data['flow_pool_recall']:
			
 
				+    if not rec_recall_item_list and not data['flow_pool_recall']:
			
 
				         return [], 0
			
 
				 
			
 
				-
			
 
				     #全量的是vlog,票圈精选， 334，60057,
			
 
				     # 60054: simrecall,
			
 
				     # 60052: 票圈精选，融合排序，60053:空置
			
@@ -625,42 +624,38 @@ def video_new_rank2(data, size, top_K, flow_pool_P, ab_code, mid, exp_config=Non
 
				     elif ab_code == 60056:
			
 
				         pre_str = "k_p7:"
			
 
				     #print("pre_str:", pre_str)
			
 
				-    recall_list = []
			
 
				-    rov_recall_rank = data['rov_pool_recall']
			
 
				+    rov_recall_rank = rec_recall_item_list
			
 
				     #call rank service
			
 
				     #flag_call_service = 0
			
 
				     if ab_code == 60066:
			
 
				-        feature_dict, recall_list = get_featurs(mid, data, size, top_K, flow_pool_P, env_dict)
			
 
				+        feature_dict = get_featurs(mid, data, size, top_K, flow_pool_P, env_dict)
			
 
				         score_result = get_tf_serving_sores(feature_dict)
			
 
				-        if score_result and len(score_result) > 0 and len(score_result) == len(recall_list):
			
 
				+        if rec_recall_item_list and score_result and len(score_result) > 0 and len(score_result) == len(rec_recall_item_list):
			
 
				             for i in range(len(score_result)):
			
 
				-                recall_list[i]['sort_score'] = score_result[i][0]
			
 
				-                recall_list[i]['flag_call_service'] = 1
			
 
				-            rov_recall_rank = sorted(recall_list, key=lambda k: k.get('sort_score', 0), reverse=True)
			
 
				+                rec_recall_item_list[i]['sort_score'] = score_result[i][0]
			
 
				+                rec_recall_item_list[i]['flag_call_service'] = 1
			
 
				+            rov_recall_rank = sorted(rec_recall_item_list, key=lambda k: k.get('sort_score', 0), reverse=True)
			
 
				         else:
			
 
				-            rov_recall_rank = sup_rank(data, pre_str, recall_list, rov_recall_rank)
			
 
				+            rov_recall_rank = sup_rank(pre_str, rec_recall_item_list)
			
 
				     else:
			
 
				         redisObj = RedisHelper()
			
 
				         vidKeys = []
			
 
				-        for recall_item in data['rov_pool_recall']:
			
 
				-            if len(recall_item) <= 0:
			
 
				-                continue
			
 
				+        for recall_item in rec_recall_item_list:
			
 
				             vid = recall_item.get("videoId", 0)
			
 
				             vidKeys.append(pre_str + str(vid))
			
 
				-            recall_list.append(recall_item)
			
 
				         video_scores = redisObj.get_batch_key(vidKeys)
			
 
				-        if video_scores and len(recall_list) > 0:
			
 
				+        if video_scores and len(rec_recall_item_list) > 0 and len(video_scores)>0:
			
 
				             for i in range(len(video_scores)):
			
 
				                 try:
			
 
				                     if video_scores[i] is None:
			
 
				-                        recall_list[i]['sort_score'] = 0.0
			
 
				+                        rec_recall_item_list[i]['sort_score'] = 0.0
			
 
				                     else:
			
 
				                         video_score_str = json.loads(video_scores[i])
			
 
				                         # print("video_score_str:", video_score_str)
			
 
				-                        recall_list[i]['sort_score'] = video_score_str[0]
			
 
				+                        rec_recall_item_list[i]['sort_score'] = video_score_str[0]
			
 
				                 except Exception:
			
 
				-                    recall_list[i]['sort_score'] = 0.0
			
 
				-            rov_recall_rank = sorted(recall_list, key=lambda k: k.get('sort_score', 0), reverse=True)
			
 
				+                    rec_recall_item_list[i]['sort_score'] = 0.0
			
 
				+            rov_recall_rank = sorted(rec_recall_item_list, key=lambda k: k.get('sort_score', 0), reverse=True)
			
 
				     #print(rov_recall_rank)
			
 
				     flow_recall_rank = sorted(data['flow_pool_recall'], key=lambda k: k.get('rovScore', 0), reverse=True)
			
 
				     rov_recall_rank, flow_recall_rank = remove_duplicate(rov_recall=rov_recall_rank, flow_recall=flow_recall_rank,
			
@@ -734,18 +729,13 @@ def video_new_rank2(data, size, top_K, flow_pool_P, ab_code, mid, exp_config=Non
 
				 
			
 
				 
			
 
				 # 排序服务兜底
			
 
				-def sup_rank(data, pre_str, recall_list, rov_recall_rank):
			
 
				+def sup_rank(pre_str, recall_list):
			
 
				     redisObj = RedisHelper()
			
 
				     vidKeys = []
			
 
				-    for recall_item in data['rov_pool_recall']:
			
 
				-        if len(recall_item) <= 0:
			
 
				-            continue
			
 
				+    for recall_item in recall_list:
			
 
				         vid = recall_item.get("videoId", 0)
			
 
				         vidKeys.append(pre_str + str(vid))
			
 
				-        recall_list.append(recall_item)
			
 
				     video_scores = redisObj.get_batch_key(vidKeys)
			
 
				-    #print("vidKeys:", video_scores, "\t", vidKeys)
			
 
				-    #print(len(video_scores), len(recall_list))
			
 
				     if video_scores and len(recall_list) > 0:
			
 
				         for i in range(len(video_scores)):
			
 
				             try: