|  | @@ -1,8 +1,6 @@
 | 
	
		
			
				|  |  |  #coding utf-8
 | 
	
		
			
				|  |  | -from tqdm import tqdm
 | 
	
		
			
				|  |  |  import sys
 | 
	
		
			
				|  |  | -import json
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | +import datetime
 | 
	
		
			
				|  |  |  import traceback
 | 
	
		
			
				|  |  |  from threading import Timer
 | 
	
		
			
				|  |  |  from tqdm import tqdm
 | 
	
	
		
			
				|  | @@ -17,10 +15,8 @@ from feature import get_item_features
 | 
	
		
			
				|  |  |  from lr_model import LrModel
 | 
	
		
			
				|  |  |  from utils import exe_sql
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -if __name__ == "__main__":
 | 
	
		
			
				|  |  | +def update_offline_score_item(dt):
 | 
	
		
			
				|  |  |      project = 'loghubods'
 | 
	
		
			
				|  |  | -    datetime = sys.argv[1]
 | 
	
		
			
				|  |  |      sql = """
 | 
	
		
			
				|  |  |  --odps sql 
 | 
	
		
			
				|  |  |  --********************************************************************--
 | 
	
	
		
			
				|  | @@ -127,7 +123,7 @@ mid AS u_id
 | 
	
		
			
				|  |  |  ,round(if(i_ros_3month > 10.0, 10.0, i_ros_3month) / 10.0, 6) as i_ros_3month
 | 
	
		
			
				|  |  |  from
 | 
	
		
			
				|  |  |  loghubods.user_video_features_data_final
 | 
	
		
			
				|  |  | -where dt='{datetime}'
 | 
	
		
			
				|  |  | +where dt='{dt}'
 | 
	
		
			
				|  |  |  and ad_ornot = '0'
 | 
	
		
			
				|  |  |  and apptype != '13'
 | 
	
		
			
				|  |  |  ), candidate_user as (
 | 
	
	
		
			
				|  | @@ -223,8 +219,8 @@ and apptype != '13'
 | 
	
		
			
				|  |  |  SELECT
 | 
	
		
			
				|  |  |  *
 | 
	
		
			
				|  |  |  from candidate_item
 | 
	
		
			
				|  |  | -    """.format(datetime=datetime)
 | 
	
		
			
				|  |  | -    # print(sql)
 | 
	
		
			
				|  |  | +    """.format(dt=dt)
 | 
	
		
			
				|  |  | +    # log_.info(sql)
 | 
	
		
			
				|  |  |      data = exe_sql(project, sql)
 | 
	
		
			
				|  |  |      print('sql done')
 | 
	
		
			
				|  |  |      # data.to_csv('./data/ad_out_sample_v2_item.{datetime}'.format(datetime=datetime), sep='\t')
 | 
	
	
		
			
				|  | @@ -232,8 +228,6 @@ from candidate_item
 | 
	
		
			
				|  |  |      model_key = 'ad_out_v1'
 | 
	
		
			
				|  |  |      lr_model = LrModel('model/{}.json'.format(model_key))
 | 
	
		
			
				|  |  |      item_h_dict = {}
 | 
	
		
			
				|  |  | -    k_col = 'i_id'
 | 
	
		
			
				|  |  | -    dt = datetime
 | 
	
		
			
				|  |  |      key_name_prefix = f"{config_.KEY_NAME_PREFIX_AD_OUT_MODEL_SCORE_ITEM}{model_key}"
 | 
	
		
			
				|  |  |      print(key_name_prefix)
 | 
	
		
			
				|  |  |      mean_item_h = 0.0
 | 
	
	
		
			
				|  | @@ -251,8 +245,8 @@ from candidate_item
 | 
	
		
			
				|  |  |              count_item_h += 1
 | 
	
		
			
				|  |  |              # print(item_features)
 | 
	
		
			
				|  |  |              # print(item_h)
 | 
	
		
			
				|  |  | -    mean_item_h = mean_item_h / count_item_h 
 | 
	
		
			
				|  |  | -    item_h_dict['mean'] = mean_item_h 
 | 
	
		
			
				|  |  | +    mean_item_h = mean_item_h / count_item_h
 | 
	
		
			
				|  |  | +    item_h_dict['mean'] = mean_item_h
 | 
	
		
			
				|  |  |      print(mean_item_h)
 | 
	
		
			
				|  |  |      print(count_item_h)
 | 
	
		
			
				|  |  |      k = 'mean'
 | 
	
	
		
			
				|  | @@ -260,3 +254,51 @@ from candidate_item
 | 
	
		
			
				|  |  |      # with open('{}.json'.format(key_name_prefix), 'w') as fout:
 | 
	
		
			
				|  |  |      #     json.dump(item_h_dict, fout, indent=2, ensure_ascii=False, sort_keys=True)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def timer_check(dt):
 | 
	
		
			
				|  |  | +    try:
 | 
	
		
			
				|  |  | +        project = config_.ad_model_data['ad_out_v1'].get('project')
 | 
	
		
			
				|  |  | +        table = config_.ad_model_data['ad_out_v1'].get('table')
 | 
	
		
			
				|  |  | +        now_date = datetime.datetime.today()
 | 
	
		
			
				|  |  | +        yesterday_date = now_date - datetime.timedelta(days=1)
 | 
	
		
			
				|  |  | +        now_dt = datetime.datetime.strftime(now_date, '%Y%m%d')
 | 
	
		
			
				|  |  | +        yesterday_dt = datetime.datetime.strftime(yesterday_date, '%Y%m%d')
 | 
	
		
			
				|  |  | +        log_.info(f"now_dt: {now_dt}")
 | 
	
		
			
				|  |  | +        if dt is not None:
 | 
	
		
			
				|  |  | +            yesterday_dt = dt
 | 
	
		
			
				|  |  | +        log_.info(f"update_dt: {yesterday_dt}")
 | 
	
		
			
				|  |  | +        now_min = datetime.datetime.now().minute
 | 
	
		
			
				|  |  | +        # 查看当前更新的数据是否已准备好
 | 
	
		
			
				|  |  | +        data_count = data_check(project=project, table=table, dt=yesterday_dt)
 | 
	
		
			
				|  |  | +        if data_count > 0:
 | 
	
		
			
				|  |  | +            log_.info('update_offline_score_item start! {}'.format(data_count))
 | 
	
		
			
				|  |  | +            # 数据准备好,进行更新
 | 
	
		
			
				|  |  | +            update_offline_score_item(dt=yesterday_dt)
 | 
	
		
			
				|  |  | +            log_.info('update_offline_score_item end!')
 | 
	
		
			
				|  |  | +        else:
 | 
	
		
			
				|  |  | +            # 数据没准备好,5分钟后重新检查
 | 
	
		
			
				|  |  | +            wait_seconds = 5 * 60
 | 
	
		
			
				|  |  | +            log_.info('data not ready, wait {}s'.format(wait_seconds))
 | 
	
		
			
				|  |  | +            Timer(wait_seconds, timer_check).start()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    except Exception as e:
 | 
	
		
			
				|  |  | +        log_.error(f"用户广告跳出率预估离线item数据更新失败 exception: {e}, traceback: {traceback.format_exc()}")
 | 
	
		
			
				|  |  | +        send_msg_to_feishu(
 | 
	
		
			
				|  |  | +            webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),
 | 
	
		
			
				|  |  | +            key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),
 | 
	
		
			
				|  |  | +            msg_text=f"rov-offline{config_.ENV_TEXT} - 用户广告跳出率预估离线item数据更新失败\n"
 | 
	
		
			
				|  |  | +                     f"exception: {e}\n"
 | 
	
		
			
				|  |  | +                     f"traceback: {traceback.format_exc()}"
 | 
	
		
			
				|  |  | +        )
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +if __name__ == "__main__":
 | 
	
		
			
				|  |  | +    dt = None
 | 
	
		
			
				|  |  | +    if len(sys.argv) > 1:
 | 
	
		
			
				|  |  | +        dt = sys.argv[1]
 | 
	
		
			
				|  |  | +        log_.info('## 手动更新:{}'.format(dt))
 | 
	
		
			
				|  |  | +    else:
 | 
	
		
			
				|  |  | +        log_.info('## 自动更新')
 | 
	
		
			
				|  |  | +    timer_check(dt)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 |