|
@@ -49,8 +49,7 @@ CDN_IMG_OPERATOR = "?x-oss-process=image/resize,m_fill,w_600,h_480,limit_0/forma
|
|
|
ODS_PROJECT = "loghubods"
|
|
|
EXPLORE_POOL_TABLE = 'alg_growth_video_return_stats_history'
|
|
|
GH_REPLY_STATS_TABLE = 'alg_growth_3rd_gh_reply_video_stats'
|
|
|
-# ODPS_RANK_RESULT_TABLE = 'alg_gh_autoreply_video_rank_data'
|
|
|
-ODPS_3RD_RANK_RESULT_TABLE = 'alg_3rd_gh_autoreply_video_rank_data'
|
|
|
+ODPS_RANK_RESULT_TABLE = 'alg_3rd_gh_autoreply_video_rank_data'
|
|
|
GH_DETAIL = 'gh_detail'
|
|
|
RDS_RANK_RESULT_TABLE = 'alg_gh_autoreply_video_rank_data'
|
|
|
STATS_PERIOD_DAYS = 5
|
|
@@ -276,8 +275,8 @@ def build_and_transfer_data(run_dt, run_hour, project, **kwargs):
|
|
|
gh_df = get_and_update_gh_ids(run_dt)
|
|
|
|
|
|
layer1_rank = rank_for_layer1(run_dt, run_hour, ODS_PROJECT, EXPLORE_POOL_TABLE, ,gh_df)
|
|
|
- # layer2_rank = rank_for_layer2(run_dt, run_hour, ODS_PROJECT, GH_REPLY_STATS_TABLE, ODPS_3RD_RANK_RESULT_TABLE)
|
|
|
- # base_rank = rank_for_base(run_dt, run_hour, ODS_PROJECT, GH_REPLY_STATS_TABLE, ODPS_3RD_RANK_RESULT_TABLE,BASE_GROUP_NAME)
|
|
|
+ # layer2_rank = rank_for_layer2(run_dt, run_hour, ODS_PROJECT, GH_REPLY_STATS_TABLE, ODPS_RANK_RESULT_TABLE)
|
|
|
+ # base_rank = rank_for_base(run_dt, run_hour, ODS_PROJECT, GH_REPLY_STATS_TABLE, ODPS_RANK_RESULT_TABLE,BASE_GROUP_NAME)
|
|
|
layer2_rank = rank_for_base_designate(run_dt, run_hour, EXPLORE2_GROUP_NAME)
|
|
|
base_rank = rank_for_base_designate(run_dt, run_hour, BASE_GROUP_NAME)
|
|
|
|
|
@@ -303,7 +302,7 @@ def build_and_transfer_data(run_dt, run_hour, project, **kwargs):
|
|
|
return
|
|
|
|
|
|
# save to ODPS
|
|
|
- t = odps_instance.get_table(ODPS_3RD_RANK_RESULT_TABLE)
|
|
|
+ t = odps_instance.get_table(ODPS_RANK_RESULT_TABLE)
|
|
|
part_spec_dict = {'dt': run_dt, 'hour': run_hour, 'ctime': dt_version}
|
|
|
part_spec = ','.join(['{}={}'.format(k, part_spec_dict[k]) for k in part_spec_dict.keys()])
|
|
|
with t.open_writer(partition=part_spec, create_partition=True, overwrite=True) as writer:
|
|
@@ -319,23 +318,29 @@ def build_and_transfer_data(run_dt, run_hour, project, **kwargs):
|
|
|
def main_loop():
|
|
|
argparser = ArgumentParser()
|
|
|
argparser.add_argument('-n', '--dry-run', action='store_true')
|
|
|
+ argparser.add_argument('--run-at',help='assume to run at date and hour, yyyyMMddHH')
|
|
|
args = argparser.parse_args()
|
|
|
|
|
|
+ run_date = datetime.today()
|
|
|
+ if args.run_at:
|
|
|
+ run_date = datetime.strptime(args.run_at, "%Y%m%d%H")
|
|
|
+ LOGGER.info(f"Assume to run at {run_date.strftime('%Y-%m-%d %H:00')}")
|
|
|
+
|
|
|
try:
|
|
|
now_date = datetime.today()
|
|
|
LOGGER.info(f"开始执行: {datetime.strftime(now_date, '%Y-%m-%d %H:%M')}")
|
|
|
- now_hour = now_date.strftime("%H")
|
|
|
|
|
|
- last_date = now_date - timedelta(1)
|
|
|
+ last_date = run_date - timedelta(1)
|
|
|
last_dt = last_date.strftime("%Y%m%d")
|
|
|
# 查看当前天级更新的数据是否已准备好
|
|
|
# 当前上游统计表为天级更新,但字段设计为兼容小时级
|
|
|
h_data_count = check_data_partition(ODS_PROJECT, GH_REPLY_STATS_TABLE, last_dt, '00')
|
|
|
if h_data_count > 0:
|
|
|
LOGGER.info('上游数据表查询数据条数={},开始计算'.format(h_data_count))
|
|
|
- run_dt = now_date.strftime("%Y%m%d")
|
|
|
- LOGGER.info(f'run_dt: {run_dt}, run_hour: {now_hour}')
|
|
|
- build_and_transfer_data(run_dt, now_hour, ODS_PROJECT,
|
|
|
+ run_dt = run_date.strftime("%Y%m%d")
|
|
|
+ run_hour = run_date.strftime("%H")
|
|
|
+ LOGGER.info(f'run_dt: {run_dt}, run_hour: {run_hour}')
|
|
|
+ build_and_transfer_data(run_dt, run_hour, ODS_PROJECT,
|
|
|
dry_run=args.dry_run)
|
|
|
LOGGER.info('数据更新完成')
|
|
|
else:
|