浏览代码

Update alg_growth_3rd_gh_reply_video_v1: output delta data in dry run

StrayWarrior 4 月之前
父节点
当前提交
5f16c81621
共有 1 个文件被更改,包括 11 次插入0 次删除
  1. 11 0
      alg_growth_3rd_gh_reply_video_v1.py

+ 11 - 0
alg_growth_3rd_gh_reply_video_v1.py

@@ -284,8 +284,19 @@ def build_and_transfer_base_mode(gh_df, run_dt, run_hour, dt_version, dry_run):
     final_df['sort'] = SEND_N + 1 - final_df['sort']
 
     if dry_run:
+        print("==== ALL DATA ====")
         print(final_df[['strategy_key', 'gh_id', 'sort', 'video_id', 'score', 'title']]
               .sort_values(by=['strategy_key', 'gh_id', 'sort']))
+
+        last_odps_df = get_odps_df_of_max_partition(
+            ODS_PROJECT, ODPS_RANK_RESULT_TABLE, {'ctime': dt_version}
+        ).to_pandas()
+        merged_df = last_odps_df.merge(
+            final_df, on=['strategy_key', 'gh_id', 'sort'], how='outer')
+        delta_df = merged_df.query('title_x != title_y')
+        delta_df = delta_df[['strategy_key', 'gh_id', 'sort',
+                             'title_x', 'score_x', 'title_y', 'score_y']]
+        delta_df.to_csv('tmp_delta_data.csv')
         return
 
     # save to ODPS