| 
					
				 | 
			
			
				@@ -29,24 +29,6 @@ GH_IDS = ('default',) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 pd.set_option('display.max_rows', None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-TARGET_GH_IDS = ( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    'gh_250c51d5ce69', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    'gh_8a29eebc2012', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    'gh_ff16c412ab97', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    'gh_1014734791e0', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    'gh_570967881eae', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    'gh_a7c21403c493', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    'gh_7f062810b4e7', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    'gh_c8060587e6d1', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    'gh_1da8f62f4a0d', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    'gh_56b65b7d4520', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    'gh_eeec7c2e28a5', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    'gh_7c89d5a3e745', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    'gh_ee5b4b07ed8b', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    'gh_0d3c97cc30cc', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    'gh_c783350a9660', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 CDN_IMG_OPERATOR = "?x-oss-process=image/resize,m_fill,w_600,h_480,limit_0/format,jpg/watermark,image_eXNoL3BpYy93YXRlcm1hcmtlci9pY29uX3BsYXlfd2hpdGUucG5nP3gtb3NzLXByb2Nlc3M9aW1hZ2UvcmVzaXplLHdfMTQ0,g_center" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 ODS_PROJECT = "loghubods" 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -276,33 +258,6 @@ def postprocess_override_by_config(df, dt_version): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     return df 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-def rank_for_base_designate(run_dt, run_hour, stg_key): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    dt_version = f'{run_dt}{run_hour}' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    ranked_df = pd.DataFrame()  # 初始化一个空的 DataFrame 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    for gh_id in GH_IDS: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if gh_id in TARGET_GH_IDS: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            temp_df = pd.DataFrame({ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                'strategy_key': [stg_key], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                'dt_version': [dt_version], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                'gh_id': [gh_id], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                'sort': [1], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                'video_id': [13586800], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                'score': [0.5] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            temp_df = pd.DataFrame({ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                'strategy_key': [stg_key], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                'dt_version': [dt_version], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                'gh_id': [gh_id], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                'sort': [1], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                'video_id': [20463342], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                'score': [0.5] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        ranked_df = pd.concat([ranked_df, temp_df], ignore_index=True) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    return ranked_df 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def build_and_transfer_data(run_dt, run_hour, project, **kwargs): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     dt_version = f'{run_dt}{run_hour}' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     dry_run = kwargs.get('dry_run', False) 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -312,8 +267,6 @@ def build_and_transfer_data(run_dt, run_hour, project, **kwargs): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     layer1_rank = rank_for_layer1(run_dt, run_hour, ODS_PROJECT, EXPLORE_POOL_TABLE, gh_df) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     layer2_rank = rank_for_layer2(run_dt, run_hour, ODS_PROJECT, GH_REPLY_STATS_TABLE, ODPS_RANK_RESULT_TABLE) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     base_rank = rank_for_base(run_dt, run_hour, ODS_PROJECT, GH_REPLY_STATS_TABLE, ODPS_RANK_RESULT_TABLE,BASE_GROUP_NAME) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # layer2_rank = rank_for_base_designate(run_dt, run_hour, EXPLORE2_GROUP_NAME) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # base_rank = rank_for_base_designate(run_dt, run_hour, BASE_GROUP_NAME) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     final_rank_df = pd.concat([layer1_rank, layer2_rank, base_rank]).reset_index(drop=True) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -349,9 +302,23 @@ def build_and_transfer_data(run_dt, run_hour, project, **kwargs): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # sync to MySQL 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     data_to_insert = [tuple(row) for row in final_df.itertuples(index=False)] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     data_columns = list(final_df.columns) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    max_time_to_delete = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     mysql = MysqlHelper(CONFIG.MYSQL_GROWTH_INFO) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     mysql.batch_insert(RDS_RANK_RESULT_TABLE, data_to_insert, data_columns) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # remove old data of same version 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    for key in final_df['strategy_key'].unique(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        sql = f""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            update {RDS_RANK_RESULT_TABLE} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            set is_delete = 1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            where 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                dt_version = '{dt_version}' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                and strategy_key = '{key}' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                and create_time < '{max_time_to_delete}' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                and is_delete = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        rows = mysql.execute(sql) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def main_loop(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     argparser = ArgumentParser() 
			 |