liqian 3 years ago
parent
commit
6769a3a217
4 changed files with 35 additions and 34 deletions
  1. 3 3
      pool_predict.py
  2. 16 9
      rov_data_check.py
  3. 15 21
      rov_train.py
  4. 1 1
      rov_train_recall_pool_update.sh

+ 3 - 3
pool_predict.py

@@ -130,6 +130,6 @@ if __name__ == '__main__':
         log_.info('{} predict end...'.format(app_name))
         log_.info('{} predict end...'.format(app_name))
     log_.info('flow pool predict end...')
     log_.info('flow pool predict end...')
     # 将日志上传到oss
     # 将日志上传到oss
-    log_cmd = "ossutil cp -r -f {} oss://{}/{}".format(log_.logname, config_.BUCKET_NAME,
-                                                       config_.OSS_FOLDER_LOGS + 'flow_pool/')
-    os.system(log_cmd)
+    # log_cmd = "ossutil cp -r -f {} oss://{}/{}".format(log_.logname, config_.BUCKET_NAME,
+    #                                                    config_.OSS_FOLDER_LOGS + 'flow_pool/')
+    # os.system(log_cmd)

+ 16 - 9
rov_data_check.py

@@ -1,11 +1,13 @@
 import datetime
 import datetime
 import os
 import os
+import traceback
 
 
 from odps import ODPS
 from odps import ODPS
 from datetime import datetime as dt
 from datetime import datetime as dt
 from threading import Timer
 from threading import Timer
 from config import set_config
 from config import set_config
 from log import Log
 from log import Log
+from utils import send_msg_to_feishu
 
 
 config_ = set_config()
 config_ = set_config()
 log_ = Log()
 log_ = Log()
@@ -14,14 +16,14 @@ log_ = Log()
 def rov_train_recall_pool_update():
 def rov_train_recall_pool_update():
     # 训练数据和预测数据都准备好时,更新模型,预测
     # 训练数据和预测数据都准备好时,更新模型,预测
     os.system('sh /data/rov-offline/rov_train_recall_pool_update.sh')
     os.system('sh /data/rov-offline/rov_train_recall_pool_update.sh')
-    # 将日志上传到oss
-    log_cmd = "ossutil cp -r -f {} oss://{}/{}".format(log_.logname, config_.BUCKET_NAME,
-                                                       config_.OSS_FOLDER_LOGS + 'rov_recall_pool/')
-    os.system(log_cmd)
-    # 将data上传到oss
-    data_cmd = "ossutil cp -r -f {} oss://{}/{}".format("/data/rov-offline/data", config_.BUCKET_NAME,
-                                                        config_.OSS_FOLDER_DATA)
-    os.system(data_cmd)
+    # # 将日志上传到oss
+    # log_cmd = "ossutil cp -r -f {} oss://{}/{}".format(log_.logname, config_.BUCKET_NAME,
+    #                                                    config_.OSS_FOLDER_LOGS + 'rov_recall_pool/')
+    # os.system(log_cmd)
+    # # 将data上传到oss
+    # data_cmd = "ossutil cp -r -f {} oss://{}/{}".format("/data/rov-offline/data", config_.BUCKET_NAME,
+    #                                                     config_.OSS_FOLDER_DATA)
+    # os.system(data_cmd)
 
 
 
 
 def data_check(project, table, date):
 def data_check(project, table, date):
@@ -69,4 +71,9 @@ def timer_check():
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
-    timer_check()
+    try:
+        timer_check()
+    except Exception as e:
+        log_.error('ROV召回池更新失败, exception: {}, traceback: {}'.format(e, traceback.format_exc()))
+        send_msg_to_feishu('rov-offline生产环境 - ROV召回池更新失败, exception: {}'.format(e))
+

+ 15 - 21
rov_train.py

@@ -1,7 +1,6 @@
 import os
 import os
 import random
 import random
 import time
 import time
-import traceback
 
 
 import lightgbm as lgb
 import lightgbm as lgb
 import pandas as pd
 import pandas as pd
@@ -11,7 +10,7 @@ from sklearn.metrics import mean_absolute_error, r2_score, mean_absolute_percent
 
 
 from config import set_config
 from config import set_config
 from utils import read_from_pickle, write_to_pickle, data_normalization, \
 from utils import read_from_pickle, write_to_pickle, data_normalization, \
-    request_post, filter_video_status, send_msg_to_feishu
+    request_post, filter_video_status
 from log import Log
 from log import Log
 from db_helper import RedisHelper, MysqlHelper
 from db_helper import RedisHelper, MysqlHelper
 
 
@@ -211,22 +210,17 @@ def predict_test():
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
-    try:
-        log_.info('rov model train start...')
-        train_start = time.time()
-        train_filename = config_.TRAIN_DATA_FILENAME
-        X, Y, videos, fea = process_data(filename=train_filename)
-        log_.info('X_shape = {}, Y_sahpe = {}'.format(X.shape, Y.shape))
-        train(X, Y, features=fea)
-        train_end = time.time()
-        log_.info('rov model train end, execute time = {}ms'.format((train_end - train_start)*1000))
-
-        log_.info('rov model predict start...')
-        predict_start = time.time()
-        predict()
-        predict_end = time.time()
-        log_.info('rov model predict end, execute time = {}ms'.format((predict_end - predict_start)*1000))
-    except Exception as e:
-        log_.error('ROV召回池更新失败, exception: {}, traceback: {}'.format(e, traceback.format_exc()))
-        send_msg_to_feishu('rov-offline生产环境 - ROV召回池更新失败, exception: {}'.format(e))
-
+    log_.info('rov model train start...')
+    train_start = time.time()
+    train_filename = config_.TRAIN_DATA_FILENAME
+    X, Y, videos, fea = process_data(filename=train_filename)
+    log_.info('X_shape = {}, Y_sahpe = {}'.format(X.shape, Y.shape))
+    train(X, Y, features=fea)
+    train_end = time.time()
+    log_.info('rov model train end, execute time = {}ms'.format((train_end - train_start)*1000))
+
+    log_.info('rov model predict start...')
+    predict_start = time.time()
+    predict()
+    predict_end = time.time()
+    log_.info('rov model predict end, execute time = {}ms'.format((predict_end - predict_start)*1000))

+ 1 - 1
rov_train_recall_pool_update.sh

@@ -1 +1 @@
-python /data/rov-offline/get_data.py && python /data/rov-offline/rov_train.py
+/root/anaconda3/bin/python /data/rov-offline/get_data.py && /root/anaconda3/bin/python /data/rov-offline/rov_train.py