algorithm
/
rov-offline


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
							import datetime
import os

from odps import ODPS
from datetime import datetime as dt
from threading import Timer
from config import set_config

config_ = set_config()


def rov_train_recall_pool_update():
    # 训练数据和预测数据都准备好时，更新模型，预测
    os.system('sh /data/rov-offline/rov_train_recall_pool_update.sh')


def data_check(project, table, date):
    odps = ODPS(
        access_id='LTAI4FtW5ZzxMvdw35aNkmcp',
        secret_access_key='0VKnydcaHK3ITjylbgUsLubX6rwiwc',
        project=project,
        endpoint='http://service.cn.maxcompute.aliyun.com/api',
        connect_timeout=3000,
        read_timeout=500000,
        pool_maxsize=1000,
        pool_connections=1000
    )

    try:
        sql = "select * from {}.{} where dt = {}".format(project, table, date)
        with odps.execute_sql(sql=sql).open_reader() as reader:
            feature_count = reader.count
    except Exception as e:
        feature_count = 0
    return feature_count


def timer_check():
    # 当前日期
    now_date = datetime.datetime.today()
    # 训练数据 最近日期分区
    train_dt = now_date - datetime.timedelta(days=config_.TRAIN_DIFF)
    train_date = dt.strftime(train_dt, '%Y%m%d')
    # 预测数据 最近日期分区
    predict_dt = now_date - datetime.timedelta(days=config_.PREDICT_DIFF)
    predict_date = dt.strftime(predict_dt, '%Y%m%d')
    # 查看训练数据特征是否准备好
    train_feature_count = data_check(config_.TRAIN_PROJECT, config_.TRAIN_TABLE, train_date)
    # 查看训练数据特征是否准备好
    predict_feature_count = data_check(config_.PREDICT_PROJECT, config_.PREDICT_TABLE, predict_date)

    # 数据未准备好，1分钟后重新检查
    if train_feature_count == 0 or predict_feature_count == 0:
        # 数据未准备好，1分钟后重新检查
        Timer(60, timer_check).start()
    else:
        # 数据准备好，更新模型，预测
        rov_train_recall_pool_update()


if __name__ == '__main__':
    timer_check()