| 
					
				 | 
			
			
				@@ -18,6 +18,7 @@ from log import Log 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import os 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from argparse import ArgumentParser 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from constants import AutoReplyAccountType 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from alg_growth_common import check_unsafe_video, filter_unsafe_video 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 CONFIG, _ = set_config() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 LOGGER = Log() 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -41,9 +42,6 @@ RDS_RANK_RESULT_TABLE = 'alg_gh_autoreply_video_rank_data' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 STATS_PERIOD_DAYS = 5 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 SEND_N = 1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-unsafe_videos = [13817005, 14403867] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-unsafe_video_condition = ','.join([str(x) for x in unsafe_videos]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def get_and_update_gh_ids(run_dt): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     db = MysqlHelper(CONFIG.MYSQL_GROWTH_INFO) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     gh_type = AutoReplyAccountType.EXTERNAL_GZH.value 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -98,7 +96,9 @@ def process_reply_stats(project, table, period, run_dt): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     df['video_id'] = df['video_id'].astype('int64') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     df = df[['gh_id', 'video_id', 'send_count', 'first_visit_uv', 'day0_return']] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    df = df.query(f'video_id not in ({unsafe_video_condition})') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # 获取统计数据时统一去除不安全视频 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    df = filter_unsafe_video(df) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # 账号内聚合 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     df = df.groupby(['video_id', 'gh_id']).agg({ 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -125,11 +125,10 @@ def rank_for_layer1(run_dt, run_hour, project, table, gh): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # TODO: 加审核&退场 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     df = get_odps_df_of_max_partition(project, table, {'dt': run_dt}) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     df = df.to_pandas() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    df = filter_unsafe_video(df) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # 确保重跑时可获得一致结果 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     dt_version = f'{run_dt}{run_hour}' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     np.random.seed(int(dt_version) + 1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    df = df.query(f'video_id not in ({unsafe_video_condition})') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    print(df) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # TODO: 修改权重计算策略 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     df['score'] = 1.0 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -180,7 +179,7 @@ def rank_for_layer2(run_dt, run_hour, project, stats_table, rank_table): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             LOGGER.warning( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 "gh_id[{}] rows[{}] not enough for layer2, fallback to base" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 .format(gh_id, len(sub_df))) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            sub_df = base_strategy_df.query(f'gh_id == "{gh_id}"') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            sub_df = base_strategy_df.query(f'gh_id == "{gh_id}"').copy() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             sub_df['score'] = sub_df['sort'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         sampled_df = sub_df.sample(n=SEND_N, weights=sub_df['score']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         sampled_df['sort'] = range(1, len(sampled_df) + 1) 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -237,6 +236,7 @@ def rank_for_base(run_dt, run_hour, project, stats_table, rank_table, stg_key): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def check_result_data(df): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    check_unsafe_video(df) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     for gh_id in GH_IDS: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         for key in (EXPLORE1_GROUP_NAME, EXPLORE2_GROUP_NAME, BASE_GROUP_NAME): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             sub_df = df.query(f'gh_id == "{gh_id}" and strategy_key == "{key}"') 
			 |