import sys import traceback import pandas as pd from utils import get_data_from_odps, send_msg_to_feishu from config import set_config from log import Log config_, _ = set_config() log_ = Log() features = [ 'apptype', 'videoid', 'preview人数', # 过去24h预曝光人数 'view人数', # 过去24h曝光人数 'play人数', # 过去24h播放人数 'share人数', # 过去24h分享人数 '回流人数', # 过去24h分享,过去24h回流人数 'preview次数', # 过去24h预曝光次数 'view次数', # 过去24h曝光次数 'play次数', # 过去24h播放次数 'share次数', # 过去24h分享次数 'platform_return', 'platform_preview', 'platform_preview_total', 'platform_show', 'platform_show_total', 'platform_view', 'platform_view_total', ] def get_feature_data(project, table, now_date): """获取特征数据""" # dt = datetime.datetime.strftime(now_date, '%Y%m%d%H') # dt = '2022041310' records = get_data_from_odps(date=now_date, project=project, table=table) feature_data = [] for record in records: item = {} for feature_name in features: item[feature_name] = record[feature_name] feature_data.append(item) feature_df = pd.DataFrame(feature_data) return feature_df if __name__ == "__main__": try: project = config_.PROJECT_24H_APP_TYPE table = config_.TABLE_24H_APP_TYPE now_date = sys.argv[1] log_.info(f"now date: {now_date}") data = get_feature_data(project=project, table=table, now_date=now_date) data = data.fillna(0) data.to_csv(f"./data/24h_video_data_{now_date}.csv", index=False) log_.info(f"24h video data shape: {data.shape}") except Exception as e: log_.error(f"rank 24h数据下载失败, exception: {e}, traceback: {traceback.format_exc()}") send_msg_to_feishu( webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'), key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'), msg_text=f"rov-offline{config_.ENV_TEXT} - rank 24h数据下载失败\n" f"exception: {e}\n" f"traceback: {traceback.format_exc()}" )