Bläddra i källkod

Merge branch 'test' of https://git.yishihui.com/algorithm/rov-offline into test

caida 1 år sedan
förälder
incheckning
f61d7cb6e8
6 ändrade filer med 23 tillägg och 24 borttagningar
  1. 3 4
      cal_24h_score.py
  2. 3 3
      cal_hour_score.py
  3. 7 7
      compose_score.py
  4. 6 6
      config.py
  5. 2 2
      export_24h_vid.py
  6. 2 2
      export_hour_vid.py

+ 3 - 4
cal_24h_score.py

@@ -9,7 +9,6 @@ config_, _ = set_config()
 log_ = Log()
 
 
-
 features = [
     'apptype',
     'videoid',
@@ -80,14 +79,14 @@ def cal_score(data_df):
 if __name__ == "__main__":
     try:
         now_date = sys.argv[1]
-        print(f"now_date: {now_date}")
+        log_.info(f"now_date: {now_date}")
         data_path = f"./data/24h_video_data_{now_date}.csv"
         data_df = data_group(data_path=data_path)
-        print(f"24h data_df shape: {data_df.shape}")
+        log_.info(f"24h data_df shape: {data_df.shape}")
         hour_score_path = f"./data/24h_score_{now_date}.csv"
         score_df = cal_score(data_df=data_df)
         score_df.to_csv(hour_score_path, index=False)
-        print(f"24h score_df shape: {score_df.shape}")
+        log_.info(f"24h score_df shape: {score_df.shape}")
     except Exception as e:
         log_.error(f"rank 24h分值更新失败, exception: {e}, traceback: {traceback.format_exc()}")
         send_msg_to_feishu(

+ 3 - 3
cal_hour_score.py

@@ -124,14 +124,14 @@ def cal_score(data_df):
 if __name__ == "__main__":
     try:
         now_date = sys.argv[1]
-        print(f"now_date: {now_date}")
+        log_.info(f"now_date: {now_date}")
         data_path = f"./data/hour_video_data_{now_date}.csv"
         data_df = data_group(data_path=data_path)
-        print(f"hour data_df shape: {data_df.shape}")
+        log_.info(f"hour data_df shape: {data_df.shape}")
         hour_score_path = f"./data/hour_score_{now_date}.csv"
         score_df = cal_score(data_df=data_df)
         score_df.to_csv(hour_score_path, index=False)
-        print(f"hour score_df shape: {score_df.shape}")
+        log_.info(f"hour score_df shape: {score_df.shape}")
     except Exception as e:
         log_.error(f"rank 小时级分值更新失败, exception: {e}, traceback: {traceback.format_exc()}")
         send_msg_to_feishu(

+ 7 - 7
compose_score.py

@@ -23,19 +23,19 @@ def cal_compose_score(score_hour_path, score_24h_path, merge_score_path):
     score_merge_df = pd.merge(score_hour_df, score_24h_df, on='videoid', how='outer')
     score_merge_df.fillna(0, inplace=True)
     # print(score_merge_df)
-    print(f"score_hour_df shape: {score_hour_df.shape}")
-    print(f"score_24h_df shape: {score_24h_df.shape}")
-    print(f"score_merge_df shape: {score_merge_df.shape}")
+    log_.info(f"score_hour_df shape: {score_hour_df.shape}")
+    log_.info(f"score_24h_df shape: {score_24h_df.shape}")
+    log_.info(f"score_merge_df shape: {score_merge_df.shape}")
     score_merge_df['score1'] = score_merge_df['24h_score1'] + score_merge_df['hour_score1']
     score_merge_df['score2'] = score_merge_df['24h_score1'] + score_merge_df['hour_score2']
     # score_merge_df['score3'] = score_merge_df['24h_score1'] + score_merge_df['hour_score3']
     score_merge_df['score4'] = score_merge_df['24h_score1'] + score_merge_df['hour_score4']
     score_merge_df['score5'] = score_merge_df['24h_score1'] + score_merge_df['hour_score5']
     # print(score_merge_df)
-    print(f"score_merge_df shape: {score_merge_df.shape}")
+    log_.info(f"score_merge_df shape: {score_merge_df.shape}")
     score_merge_df.to_csv(merge_score_path, index=False)
     score_df = score_merge_df[['videoid', 'score1', 'score2', 'score4', 'score5']]
-    print(f"score_df shape: {score_merge_df.shape}")
+    log_.info(f"score_df shape: {score_merge_df.shape}")
     return score_df
 
 
@@ -64,7 +64,7 @@ def score_to_redis(score_df):
 if __name__ == '__main__':
     try:
         now_date = sys.argv[1]
-        print("now date:", now_date)
+        log_.info(f"now date: {now_date}")
         score_hour_path = f"./data/hour_score_{now_date}.csv"
         score_24h_path = f"./data/24h_score_{now_date}.csv"
         merge_score_path = f"./data/merge_score_{now_date}.csv"
@@ -72,7 +72,7 @@ if __name__ == '__main__':
             score_hour_path=score_hour_path, score_24h_path=score_24h_path, merge_score_path=merge_score_path
         )
         score_to_redis(score_df=score_df)
-        print("rank score update finished!")
+        log_.info("rank score update finished!")
     except Exception as e:
         log_.error(f"rank 分值合并更新失败, exception: {e}, traceback: {traceback.format_exc()}")
         send_msg_to_feishu(

+ 6 - 6
config.py

@@ -423,7 +423,7 @@ class BaseConfig(object):
         'params_list': [
             {'data': 'data1', 'rule': 'rule4'},  # 095 vlog
             {'data': 'data1', 'rule': 'rule4-1'},  # 095-1
-            {'data': 'data1', 'rule': 'rule4-2'},  # 262 特殊地域屏蔽危险视频
+            # {'data': 'data1', 'rule': 'rule4-2'},  # 262 特殊地域屏蔽危险视频
             # {'data': 'data2', 'rule': 'rule4'},
             {'data': 'data2', 'rule': 'rule7-1'},  # 121 内容精选
             # {'data': 'data3', 'rule': 'rule7'},
@@ -448,14 +448,14 @@ class BaseConfig(object):
             # {'data': 'data1', 'rule': 'rule17'},  # 215 vlog
             # {'data': 'data1', 'rule': 'rule18'},  # 224 vlog
             {'data': 'videos5', 'rule': 'rule7-1'},  # 428 [内容精选]
-            {'data': 'data1', 'rule': 'rule20'},  # 461 vlog 分值计算公式 增加h-2分享当前小时回流数据、h-3分享当前小时回流数据特征
-            {'data': 'data1', 'rule': 'rule21'},  # 462 vlog 分值计算公式 增加[h-3,h-2]之间的回流留存特征
+            # {'data': 'data1', 'rule': 'rule20'},  # 461 vlog 分值计算公式 增加h-2分享当前小时回流数据、h-3分享当前小时回流数据特征
+            # {'data': 'data1', 'rule': 'rule21'},  # 462 vlog 分值计算公式 增加[h-3,h-2]之间的回流留存特征
             {'data': 'data1', 'rule': 'rule22'},  # 463 vlog 分值计算公式 增加h-2分享当前小时回流/h-2分享、h-3分享当前小时回流/h-3分享 特征
-            {'data': 'data1', 'rule': 'rule23'},  # 465 vlog 回流数据使用 分享限制地域,回流不限制地域 统计数据
-            {'data': 'data1', 'rule': 'rule24'},  # 466 vlog 分值计算公式 增加[h-3,h-2]之间的回流留存特征 + 回流数据使用 分享限制地域,回流不限制地域 统计数据
+            # {'data': 'data1', 'rule': 'rule23'},  # 465 vlog 回流数据使用 分享限制地域,回流不限制地域 统计数据
+            # {'data': 'data1', 'rule': 'rule24'},  # 466 vlog 分值计算公式 增加[h-3,h-2]之间的回流留存特征 + 回流数据使用 分享限制地域,回流不限制地域 统计数据
         ],
         'params_list_new': [
-            {'data': 'data10', 'rule': 'rule19'},  # 316 票圈视频 + 召回在线去重
+            # {'data': 'data10', 'rule': 'rule19'},  # 316 票圈视频 + 召回在线去重
         ]
     }
 

+ 2 - 2
export_24h_vid.py

@@ -49,11 +49,11 @@ if __name__ == "__main__":
         project = config_.PROJECT_24H_APP_TYPE
         table = config_.TABLE_24H_APP_TYPE
         now_date = sys.argv[1]
-        print("now date:", now_date)
+        log_.info(f"now date: {now_date}")
         data = get_feature_data(project=project, table=table, now_date=now_date)
         data = data.fillna(0)
         data.to_csv(f"./data/24h_video_data_{now_date}.csv", index=False)
-        print(f"24h video data shape: {data.shape}")
+        log_.info(f"24h video data shape: {data.shape}")
     except Exception as e:
         log_.error(f"rank 24h数据下载失败, exception: {e}, traceback: {traceback.format_exc()}")
         send_msg_to_feishu(

+ 2 - 2
export_hour_vid.py

@@ -59,11 +59,11 @@ if __name__ == "__main__":
         project = config_.PROJECT_REGION_APP_TYPE
         table = config_.TABLE_REGION_APP_TYPE
         now_date = sys.argv[1]
-        print("now date:", now_date)
+        log_.info(f"now date: {now_date}")
         data = get_feature_data(project=project, table=table, now_date=now_date)
         data = data.fillna(0)
         data.to_csv(f"./data/hour_video_data_{now_date}.csv", index=False)
-        print(f"hour video data shape: {data.shape}")
+        log_.info(f"hour video data shape: {data.shape}")
     except Exception as e:
         log_.error(f"rank 小时级数据下载失败, exception: {e}, traceback: {traceback.format_exc()}")
         send_msg_to_feishu(