Explorar o código

add rov data check

liqian %!s(int64=3) %!d(string=hai) anos
pai
achega
20f0096f8e
Modificáronse 1 ficheiros con 62 adicións e 0 borrados
  1. 62 0
      rov_data_check.py

+ 62 - 0
rov_data_check.py

@@ -0,0 +1,62 @@
+import datetime
+import os
+
+from odps import ODPS
+from datetime import datetime as dt
+from threading import Timer
+from config import set_config
+
+config_ = set_config()
+
+
+def rov_train_recall_pool_update():
+    # 训练数据和预测数据都准备好时,更新模型,预测
+    os.system('sh /data/rov-offline/rov_train_recall_pool_update.sh')
+
+
+def data_check(project, table, date):
+    odps = ODPS(
+        access_id='LTAI4FtW5ZzxMvdw35aNkmcp',
+        secret_access_key='0VKnydcaHK3ITjylbgUsLubX6rwiwc',
+        project=project,
+        endpoint='http://service.cn.maxcompute.aliyun.com/api',
+        connect_timeout=3000,
+        read_timeout=500000,
+        pool_maxsize=1000,
+        pool_connections=1000
+    )
+
+    try:
+        sql = "select * from {}.{} where dt = {}".format(project, table, date)
+        with odps.execute_sql(sql=sql).open_reader() as reader:
+            feature_count = reader.count
+    except Exception as e:
+        feature_count = 0
+    return feature_count
+
+
+def timer_check():
+    # 当前日期
+    now_date = datetime.datetime.today()
+    # 训练数据 最近日期分区
+    train_dt = now_date - datetime.timedelta(days=config_.TRAIN_DIFF)
+    train_date = dt.strftime(train_dt, '%Y%m%d')
+    # 预测数据 最近日期分区
+    predict_dt = now_date - datetime.timedelta(days=config_.PREDICT_DIFF)
+    predict_date = dt.strftime(predict_dt, '%Y%m%d')
+    # 查看训练数据特征是否准备好
+    train_feature_count = data_check(config_.TRAIN_PROJECT, config_.TRAIN_TABLE, train_date)
+    # 查看训练数据特征是否准备好
+    predict_feature_count = data_check(config_.PREDICT_PROJECT, config_.PREDICT_TABLE, predict_date)
+
+    # 数据未准备好,1分钟后重新检查
+    if train_feature_count == 0 or predict_feature_count == 0:
+        # 数据未准备好,1分钟后重新检查
+        Timer(60, timer_check).start()
+    else:
+        # 数据准备好,更新模型,预测
+        rov_train_recall_pool_update()
+
+
+if __name__ == '__main__':
+    timer_check()