import pickle import os from odps import ODPS from config import set_config config_ = set_config() def get_data_from_odps(date, project, table, connect_timeout=3000, read_timeout=500000, pool_maxsize=1000, pool_connections=1000): """ 从odps获取数据 :param date: 日期 type-string '%Y%m%d' :param project: type-string :param table: 表名 type-string :param connect_timeout: 连接超时设置 :param read_timeout: 读取超时设置 :param pool_maxsize: :param pool_connections: :return: records """ odps = ODPS( access_id='LTAI4FtW5ZzxMvdw35aNkmcp', secret_access_key='0VKnydcaHK3ITjylbgUsLubX6rwiwc', project=project, endpoint='http://service.cn.maxcompute.aliyun.com/api', connect_timeout=connect_timeout, read_timeout=read_timeout, pool_maxsize=pool_maxsize, pool_connections=pool_connections ) records = odps.read_table(name=table, partition='dt=%s' % date) return records def write_to_pickle(data, filename, filepath=config_.DATA_DIR_PATH): """ 将数据写入pickle文件中 :param data: 数据 :param filename: 写入的文件名 :param filepath: 文件存放路径,默认为config_.DATA_DIR_PATH :return: None """ if not os.path.exists(filepath): os.makedirs(filepath) file = os.path.join(filepath, filename) with open(file, 'wb') as wf: pickle.dump(data, wf) def read_from_pickle(filename, filepath=config_.DATA_DIR_PATH): """ 从pickle文件读取数据 :param filename: 文件名 :param filepath: 文件存放路径,默认为config_.DATA_DIR_PATH :return: data """ file = os.path.join(filepath, filename) if not os.path.exists(file): return None with open(file, 'rb') as rf: data = pickle.load(rf) return data