|
@@ -41,6 +41,9 @@ RDS_RANK_RESULT_TABLE = 'alg_gh_autoreply_video_rank_data'
|
|
|
STATS_PERIOD_DAYS = 5
|
|
|
SEND_N = 1
|
|
|
|
|
|
+unsafe_videos = [13817005, 14403867]
|
|
|
+unsafe_video_condition = ','.join([str(x) for x in unsafe_videos])
|
|
|
+
|
|
|
def get_and_update_gh_ids(run_dt):
|
|
|
db = MysqlHelper(CONFIG.MYSQL_GROWTH_INFO)
|
|
|
gh_type = AutoReplyAccountType.EXTERNAL_GZH.value
|
|
@@ -95,6 +98,7 @@ def process_reply_stats(project, table, period, run_dt):
|
|
|
|
|
|
df['video_id'] = df['video_id'].astype('int64')
|
|
|
df = df[['gh_id', 'video_id', 'send_count', 'first_visit_uv', 'day0_return']]
|
|
|
+ df = df.query(f'video_id not in ({unsafe_video_condition})')
|
|
|
|
|
|
# 账号内聚合
|
|
|
df = df.groupby(['video_id', 'gh_id']).agg({
|
|
@@ -124,6 +128,8 @@ def rank_for_layer1(run_dt, run_hour, project, table, gh):
|
|
|
# 确保重跑时可获得一致结果
|
|
|
dt_version = f'{run_dt}{run_hour}'
|
|
|
np.random.seed(int(dt_version) + 1)
|
|
|
+ df = df.query(f'video_id not in ({unsafe_video_condition})')
|
|
|
+ print(df)
|
|
|
|
|
|
# TODO: 修改权重计算策略
|
|
|
df['score'] = 1.0
|
|
@@ -272,6 +278,9 @@ def postprocess_override_by_config(df, gh_df, dt_version):
|
|
|
override_data['score'] = [0.0] * n_records
|
|
|
df_to_append = pd.DataFrame(override_data)
|
|
|
df = pd.concat([df, df_to_append], ignore_index=True)
|
|
|
+ # 强制更换不安全视频
|
|
|
+ idx = df[df['video_id'] == 14403867].index
|
|
|
+ df.loc[idx, 'video_id'] = 20463342
|
|
|
return df
|
|
|
|
|
|
def build_and_transfer_base_mode(gh_df, run_dt, run_hour, dt_version, dry_run):
|