Просмотр исходного кода

regoin_24h_sort_rank_add_weight 20231123 by mz

sunmingze 1 год назад
Родитель
Сommit
cf0129dec1
2 измененных файлов с 53 добавлено и 0 удалено
  1. 8 0
      config.py
  2. 45 0
      region_rule_rank_h.py

+ 8 - 0
config.py

@@ -446,6 +446,11 @@ class BaseConfig(object):
                        'region_24h_rule_key': 'rule4', '24h_rule_key': 'rule4', 'merge_func': 2,
                        'score_func': 'back_rate_exponential_weighting2'},
 
+            'rule29': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+                       'region_24h_rule_key': 'rule4', '24h_rule_key': 'rule4', 'merge_func': 2,
+                       'score_func': 'back_rate_rank_weighting'},
+
+
         },
         'data_params': DATA_PARAMS,
         'params_list': [
@@ -485,6 +490,9 @@ class BaseConfig(object):
             # {'data': 'data10', 'rule': 'rule26'},  # 501
             {'data': 'data10', 'rule': 'rule27'},  # 502
             {'data': 'data10', 'rule': 'rule28'},  # 503
+            {'data': 'data10', 'rule': 'rule29'},  # 509
+
+
         ],
         'params_list_new': [
             # {'data': 'data10', 'rule': 'rule19'},  # 316 票圈视频 + 召回在线去重

+ 45 - 0
region_rule_rank_h.py

@@ -442,6 +442,49 @@ def cal_score_with_back_rate_exponential_weighting2(df, param):
     df = df.sort_values(by=['score'], ascending=False)
     return df
 
+def cal_score_with_back_rate_by_rank_weighting(df, param):
+    """
+    add by sunmingze 20231123
+    计算score
+    :param df: 特征数据
+    :param param: 规则参数
+    :return:
+    """
+    # score计算公式: score =  1 / sharerate(rank)^0.5 + 5 / backrate(rank)^0.5 + 10 / LOG(lastonehour_return +1)(rank) ^0.5
+    #   +  1 / K2(rank)^0.5
+    # sharerate = lastonehour_share / (lastonehour_play + 1000)
+    # backrate = lastonehour_return / (lastonehour_share + 10)
+    # ctr = lastonehour_play / (lastonehour_show + 1000), 对ctr限最大值:K2 = 0.6 if ctr > 0.6 else ctr
+
+    df = df.fillna(0)
+    df['share_rate'] = df['lastonehour_share'] / (df['lastonehour_play'] + 1000)
+    df['back_rate'] = df['lastonehour_return'] / (df['lastonehour_share'] + 10)
+    df['log_back'] = (df['lastonehour_return'] + 1).apply(math.log)
+    if param.get('view_type', None) == 'video-show':
+        df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show'] + 1000)
+    elif param.get('view_type', None) == 'video-show-region':
+        df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show_region'] + 1000)
+    else:
+        df['ctr'] = df['lastonehour_play'] / (df['lastonehour_preview'] + 1000)
+    df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
+
+    df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
+
+    # 分别的得到sharerate、backrate、K值、return人数的序关系
+    df['rank_by_sharerate'] = df['share_rate'].rank(ascending=0, method='dense')
+    df['rank_by_backrate'] = df['backrate'].rank(ascending=0, method='dense')
+    df['rank_by_K2'] = df['K2'].rank(ascending=0, method='dense')
+    df['rank_by_logback'] = df['log_back'].rank(ascending=0, method='dense')
+
+    # 计算基于序的加法关系函数
+    df['score'] = 1/(df['rank_by_sharerate'] + 10) + 5/(df['rank_by_sharerate'] + 10)
+    df['score'] = df['score'] + 5/(df['rank_by_logback'] + 10) + 1/(df['rank_by_K2'] + 10)
+
+    df = df.sort_values(by=['score'], ascending=False)
+    return df
+
+
+
 
 def cal_score(df, param):
     if param.get('return_data', None) == 'share_region_return':
@@ -464,6 +507,8 @@ def cal_score(df, param):
             df = cal_score_with_back_rate_exponential_weighting1(df=df, param=param)
         elif param.get('score_func', None) == 'back_rate_exponential_weighting2':
             df = cal_score_with_back_rate_exponential_weighting2(df=df, param=param)
+        elif param.get('score_func', None) == 'back_rate_rank_weighting':
+            df = cal_score_with_back_rate_by_rank_weighting(df=df, param=param)
         else:
             df = cal_score_initial(df=df, param=param)
     return df