Bläddra i källkod

add day rule2

liqian 3 år sedan
förälder
incheckning
357336765f
2 ändrade filer med 24 tillägg och 20 borttagningar
  1. 1 4
      config.py
  2. 23 16
      rule_rank_day.py

+ 1 - 4
config.py

@@ -101,10 +101,7 @@ class BaseConfig(object):
     # 小程序天级规则参数
     RULE_PARAMS_DAY = {
         'rule1': {'return_count': 200},
-        # 'rule2': {'return_count': 20, 'score_rule': 0.001},
-        # 'rule3': {'view_type': 'pre-view', 'return_count': 20, 'score_rule': 0.005},
-        # 'rule4': {'cal_score_func': 2, 'return_count': 20, 'score_rule': 0},
-        # 'rule5': {'cal_score_func': 3, 'return_count': 20, 'score_rule': 0},
+        'rule2': {'cal_score_func': 2},
     }
 
     # 小程序离线ROV模型结果存放 redis key前缀,完整格式:com.weiqu.video.recall.hot.item.score.{date}

+ 23 - 16
rule_rank_day.py

@@ -84,7 +84,13 @@ def cal_score1(df):
 
 
 def cal_score2(df):
-    pass
+    # score2计算公式: score = share次数/(view+1000)+0.01*return/(share次数+100)
+    df = df.fillna(0)
+    df['share_rate'] = df['share次数'] / (df['view人数'] + 1000)
+    df['back_rate'] = df['回流人数'] / (df['share次数'] + 100)
+    df['score'] = df['share_rate'] + 0.01 * df['back_rate']
+    df = df.sort_values(by=['score'], ascending=False)
+    return df
 
 
 def video_rank_day(df, now_date, rule_key, param):
@@ -104,25 +110,28 @@ def video_rank_day(df, now_date, rule_key, param):
 
     # 获取符合进入召回源条件的视频
     return_count = param.get('return_count')
-    h_recall_df = df[df['回流人数'] > return_count]
-    h_recall_videos = h_recall_df['videoid'].to_list()
-    log_.info(f'h_recall videos count = {len(h_recall_videos)}')
+    if return_count:
+        day_recall_df = df[df['回流人数'] > return_count]
+    else:
+        day_recall_df = df
+    day_recall_videos = day_recall_df['videoid'].to_list()
+    log_.info(f'day_recall videos count = {len(day_recall_videos)}')
     # 写入对应的redis
-    h_video_ids =[]
-    h_recall_result = {}
-    for video_id in h_recall_videos:
-        score = h_recall_df[h_recall_df['videoid'] == video_id]['score']
-        h_recall_result[int(video_id)] = float(score)
-        h_video_ids.append(int(video_id))
-    h_recall_key_name = \
+    day_video_ids =[]
+    day_recall_result = {}
+    for video_id in day_recall_videos:
+        score = day_recall_df[day_recall_df['videoid'] == video_id]['score']
+        day_recall_result[int(video_id)] = float(score)
+        day_video_ids.append(int(video_id))
+    day_recall_key_name = \
         f"{config_.RECALL_KEY_NAME_PREFIX_BY_DAY}{rule_key}.{datetime.strftime(now_date, '%Y%m%d')}"
-    if len(h_recall_result) > 0:
-        redis_helper.add_data_with_zset(key_name=h_recall_key_name, data=h_recall_result, expire_time=7 * 24 * 3600)
+    if len(day_recall_result) > 0:
+        redis_helper.add_data_with_zset(key_name=day_recall_key_name, data=day_recall_result, expire_time=7 * 24 * 3600)
 
     # 去重更新rov模型结果,并另存为redis中
     initial_data_dup = {}
     for video_id, score in initial_data:
-        if int(video_id) not in h_video_ids:
+        if int(video_id) not in day_video_ids:
             initial_data_dup[int(video_id)] = score
     log_.info(f"initial data dup count = {len(initial_data_dup)}")
 
@@ -177,10 +186,8 @@ def day_timer_check():
     project = config_.PROJECT_DAY
     table = config_.TABLE_DAY
     rule_params = config_.RULE_PARAMS_DAY
-    # return_count_list = [20, 10]
     now_date = datetime.today()
     log_.info(f"now_date: {datetime.strftime(now_date, '%Y%m%d')}")
-    now_h = datetime.now().hour
     now_min = datetime.now().minute
     # 查看当前天级更新的数据是否已准备好
     h_data_count = day_data_check(project=project, table=table, now_date=now_date)