Przeglądaj źródła

add recall-update-h rule3

liqian 3 lat temu
rodzic
commit
af06d38705
3 zmienionych plików z 21 dodań i 16 usunięć
  1. 7 0
      config.py
  2. 13 11
      rule_rank_h.py
  3. 1 5
      videos_filter.py

+ 7 - 0
config.py

@@ -70,6 +70,13 @@ class BaseConfig(object):
     APP_OP_PROJECT = 'loghubods'
     APP_OP_TABLE = 'category_video_list_test1'
 
+    # 小程序小时级规则参数
+    RULE_PARAMS = {
+        'rule1': {'return_count': 20, 'score_rule': 0.005},
+        'rule2': {'return_count': 20, 'score_rule': 0.001},
+        'rule3': {'view_type': 'pre-view', 'return_count': 20, 'score_rule': 0.005}
+    }
+
     # app_type: [18, 19]预测表名
     PREDICT_PROJECT_18_19 = {
         '18': 'loghubods',  # 老好看

+ 13 - 11
rule_rank_h.py

@@ -15,6 +15,7 @@ project = 'loghubods'
 table = 'video_each_hour_update'
 features = [
     'videoid',
+    'lastonehour_preview',  # 过去1小时预曝光
     'lastonehour_view',  # 过去1小时曝光
     'lastonehour_play',  # 过去1小时播放
     'lastonehour_share',  # 过去1小时分享
@@ -71,10 +72,11 @@ def get_feature_data(now_date):
     return feature_df
 
 
-def cal_score(df):
+def cal_score(df, param):
     """
     计算score
     :param df: 特征数据
+    :param param: 规则参数
     :return:
     """
     # score计算公式: sharerate*backrate*logback*ctr
@@ -87,7 +89,10 @@ def cal_score(df):
     df['share_rate'] = df['lastonehour_share'] / (df['lastonehour_play'] + 1000)
     df['back_rate'] = df['lastonehour_return'] / (df['lastonehour_share'] + 10)
     df['log_back'] = (df['lastonehour_return'] + 1).apply(math.log)
-    df['ctr'] = df['lastonehour_play'] / (df['lastonehour_view'] + 1000)
+    if param.get('view_type', None) == 'pre-view':
+        df['ctr'] = df['lastonehour_play'] / (df['lastonehour_preview'] + 1000)
+    else:
+        df['ctr'] = df['lastonehour_play'] / (df['lastonehour_view'] + 1000)
     df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
     df['score'] = df['share_rate'] * df['back_rate'] * df['log_back'] * df['K2']
     df = df.sort_values(by=['score'], ascending=False)
@@ -162,15 +167,15 @@ def video_rank(df, now_date, now_h, rule_key, param):
 def rank_by_h(now_date, now_h, rule_params):
     # 获取特征数据
     feature_df = get_feature_data(now_date=now_date)
-    # 计算score
-    score_df = cal_score(df=feature_df)
     # rank
     for key, value in rule_params.items():
         log_.info(f"rule = {key}, param = {value}")
+        # 计算score
+        score_df = cal_score(df=feature_df, param=value)
         video_rank(df=score_df, now_date=now_date, now_h=now_h, rule_key=key, param=value)
-    # to-csv
-    score_filename = f"score_{datetime.datetime.strftime(now_date, '%Y%m%d%H')}.csv"
-    score_df.to_csv(f'./data/{score_filename}')
+        # to-csv
+        score_filename = f"score_{key}_{datetime.datetime.strftime(now_date, '%Y%m%d%H')}.csv"
+        score_df.to_csv(f'./data/{score_filename}')
 
 
 def h_rank_bottom(now_date, now_h, rule_key):
@@ -201,10 +206,7 @@ def h_rank_bottom(now_date, now_h, rule_key):
 
 
 def h_timer_check():
-    rule_params = {
-        'rule1': {'return_count': 20, 'score_rule': 0.005},
-        'rule2': {'return_count': 20, 'score_rule': 0.001}
-    }
+    rule_params = config_.RULE_PARAMS
     # return_count_list = [20, 10]
     now_date = datetime.datetime.today()
     log_.info(f"now_date: {datetime.datetime.strftime(now_date, '%Y%m%d%H')}")

+ 1 - 5
videos_filter.py

@@ -389,11 +389,7 @@ def filter_app_pool():
 
 def filter_rov_h():
     """过滤小程序小时级数据"""
-    rule_params = {
-        'rule1': {'return_count': 20, 'score_rule': 0.005},
-        'rule2': {'return_count': 20, 'score_rule': 0.001}
-    }
-    return_count_list = [20, 10]
+    rule_params = config_.RULE_PARAMS
     log_.info("rov_h pool filter start ...")
     redis_helper = RedisHelper()
     # 获取当前日期