浏览代码

Merge branch 'feature/20241128-auto-configure-gh'

StrayWarrior 4 月之前
父节点
当前提交
f2f2e0f6ca
共有 1 个文件被更改,包括 14 次插入3 次删除
  1. 14 3
      alg_growth_3rd_gh_reply_video_v1.py

+ 14 - 3
alg_growth_3rd_gh_reply_video_v1.py

@@ -240,6 +240,7 @@ def check_result_data(df):
 
 
 def postprocess_override_by_config(df, dt_version):
+    return df
     config = json.load(open("configs/3rd_gh_reply_video.json"))
     override_data = {
         'strategy_key': [],
@@ -274,7 +275,6 @@ def build_and_transfer_base_mode(gh_df, run_dt, run_hour, dt_version, dry_run):
     base_rank = rank_for_base(run_dt, run_hour, ODS_PROJECT, GH_REPLY_STATS_TABLE, ODPS_RANK_RESULT_TABLE,BASE_GROUP_NAME)
 
     final_rank_df = pd.concat([layer1_rank, layer2_rank, base_rank]).reset_index(drop=True)
-
     final_rank_df = postprocess_override_by_config(final_rank_df, dt_version)
     check_result_data(final_rank_df)
 
@@ -284,11 +284,23 @@ def build_and_transfer_base_mode(gh_df, run_dt, run_hour, dt_version, dry_run):
     final_df['sort'] = SEND_N + 1 - final_df['sort']
 
     if dry_run:
+        print("==== ALL DATA ====")
         print(final_df[['strategy_key', 'gh_id', 'sort', 'video_id', 'score', 'title']]
               .sort_values(by=['strategy_key', 'gh_id', 'sort']))
+
+        last_odps_df = get_odps_df_of_max_partition(
+            ODS_PROJECT, ODPS_RANK_RESULT_TABLE, {'ctime': dt_version}
+        ).to_pandas()
+        merged_df = last_odps_df.merge(
+            final_df, on=['strategy_key', 'gh_id', 'sort'], how='outer')
+        delta_df = merged_df.query('title_x != title_y')
+        delta_df = delta_df[['strategy_key', 'gh_id', 'sort',
+                             'title_x', 'score_x', 'title_y', 'score_y']]
+        delta_df.to_csv('tmp_delta_data.csv')
         return
 
     # save to ODPS
+    odps_instance = get_odps_instance(ODS_PROJECT)
     t = odps_instance.get_table(ODPS_RANK_RESULT_TABLE)
     part_spec_dict = {'dt': run_dt, 'hour': run_hour, 'ctime': dt_version}
     part_spec = ','.join(['{}={}'.format(k, part_spec_dict[k]) for k in part_spec_dict.keys()])
@@ -452,8 +464,7 @@ def main():
                 time.sleep(60)
     except Exception as e:
         LOGGER.error(f"数据更新失败, exception: {e}, traceback: {traceback.format_exc()}")
-        return
-        if CONFIG.ENV_TEXT == '开发环境':
+        if CONFIG.ENV_TEXT == '开发环境' or args.dry_run:
             return
         send_msg_to_feishu(
             webhook=CONFIG.FEISHU_ROBOT['growth_task_robot'].get('webhook'),