ソースを参照

指定账号过滤审核不通过视频

xueyiming 7 ヶ月 前
コミット
e6b36417ff
2 ファイル変更4 行追加24 行削除
  1. 4 3
      alg_growth_3rd_gh_reply_video_v1.py
  2. 0 21
      alg_growth_common.py

+ 4 - 3
alg_growth_3rd_gh_reply_video_v1.py

@@ -18,7 +18,7 @@ from log import Log
 import os
 from argparse import ArgumentParser
 from constants import AutoReplyAccountType
-from alg_growth_common import check_unsafe_video, filter_unsafe_video
+from alg_growth_common import check_unsafe_video, filter_unsafe_video, filter_audit_failed_video
 
 CONFIG, _ = set_config()
 LOGGER = Log()
@@ -191,10 +191,11 @@ def rank_for_layer2(run_dt, run_hour, project, stats_table, rank_table):
 
     # 基础过滤for账号
     df = stats_df.query('send_count > 200 and score > 0')
-
+    df = filter_audit_failed_video(df)
     # fallback to base if necessary
     base_strategy_df, _ = get_last_strategy_result(
         project, rank_table, dt_version, BASE_GROUP_NAME)
+    base_strategy_df = filter_audit_failed_video(base_strategy_df)
 
     for gh_id in GH_IDS:
         if gh_id == 'default':
@@ -267,7 +268,7 @@ def rank_for_base(run_dt, run_hour, project, stats_table, rank_table, stg_key):
 
     # 合并default和分账号数据
     grouped_stats_df = pd.concat([default_stats_df, stats_with_strategy_df]).reset_index()
-
+    grouped_stats_df = filter_audit_failed_video(grouped_stats_df)
     def set_top_n(group, n=2):
         group_sorted = group.sort_values(by='score', ascending=False)
         top_n = group_sorted.head(n)

+ 0 - 21
alg_growth_common.py

@@ -34,15 +34,11 @@ def check_unsafe_video(df, force_replace=True):
             raise Exception("video unsafe")
         df.loc[unsafe_rows.index, 'video_id'] = 20463342
 
-
 def filter_unsafe_video(df):
     unsafe_video_condition = ','.join([str(x) for x in UNSAFE_VIDEO_IDS])
     df = df.query(f'video_id not in ({unsafe_video_condition})')
     return df
 
-
-
-
 def filter_audit_failed_video(df):
     video_id_list = df['video_id'].tolist()
     chunk_size = 20
@@ -66,20 +62,3 @@ def filter_audit_failed_video(df):
     return filtered_df
 
 
-if __name__ == '__main__':
-    # 定义包含 ID 的字符串
-    id_str = '20463342,12794884,13788955,13586800,4780859,33332362,19175397,4555247,14403867,12117356,14050873,14142458,17638023,14945734,13680796,13042177,10587660,14552795,12418493,12700411,13671819,13825547,12166346,13587868,19096953,14095344,13817005,1275943,13437896,12492103'
-
-    # 将字符串按逗号分割成列表
-    id_list = id_str.split(',')
-
-    # 将列表中的元素转换为整数(如果需要)
-    id_list = [int(id) for id in id_list]
-
-    # 创建 DataFrame 并将 ID 列表放入 'video_id' 列
-    df = pd.DataFrame({'video_id': id_list})
-    video_audit_failure = filter_audit_failed_video(df)
-    print(df)
-    print(video_audit_failure)
-    filtered_df = df[~df['video_id'].isin(video_audit_failure)]
-    print(filtered_df)