import datetime from datetime import datetime as dt from odps import ODPS import process_feature import pandas as pd def getRovfeaturetable(dt, table): odps = ODPS('LTAI4FtW5ZzxMvdw35aNkmcp', '0VKnydcaHK3ITjylbgUsLubX6rwiwc', 'usercdm', endpoint='http://service.cn.maxcompute.aliyun.com/api', connect_timeout=3000, \ read_timeout=500000, pool_maxsize=1000, pool_connections=1000) featureArray = [] for record in odps.read_table(table, partition='dt=%s' % dt): valueFeature = {} for i in process_feature.featurename: if i == 'dt': valueFeature[i] = dt else: valueFeature[i] = record[i] featureArray.append(valueFeature) featureArray = pd.DataFrame(featureArray) print(dt, table, 'feature table finish') return featureArray def getdatasample(date, max_range, table): new_date = dt.strptime(date, '%Y%m%d') datelist = [] testlist = [] for i in range(0, max_range): delta = datetime.timedelta(days=i) tar_dt = new_date - delta datelist.append(tar_dt.strftime("%Y%m%d")) print(datelist) for tm in datelist: testlist.append(getRovfeaturetable(tm, table)) testdata = pd.concat(testlist) testdata.reset_index(inplace=True) testdata = testdata.drop(axis=1, columns='index') return testdata def process_train_predict_data(): now_date = datetime.date.today() # day = datetime.datetime.strftime(now_date, '%Y%m%d') DIFF1 = 1 DIFF7= 7 diff_1 = datetime.timedelta(days=DIFF1) diff_5 = datetime.timedelta(days=DIFF7) predict_dt = now_date - diff_1 predict_day = datetime.datetime.strftime(predict_dt, '%Y%m%d') train_dt = now_date - diff_5 train_day = datetime.datetime.strftime(train_dt, '%Y%m%d') #read data from ali train_data = getdatasample(train_day, 30, 'rov_feature_add_v1') predict_data = getdatasample(predict_day, 1, 'rov_predict_table_add_v1') #pickle for test import _pickle as cPickle with open('train_data_all.pickle','wb') as output_file: cPickle.dump(train_data, output_file) with open('predict_data_all.pickle','wb') as output_file: cPickle.dump(predict_data, output_file) if __name__ == '__main__' : process_train_predict_data()