export_24h_vid.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. import sys
  2. import traceback
  3. import pandas as pd
  4. from utils import get_data_from_odps, send_msg_to_feishu
  5. from config import set_config
  6. from log import Log
  7. config_, _ = set_config()
  8. log_ = Log()
  9. features = [
  10. 'apptype',
  11. 'videoid',
  12. 'preview人数', # 过去24h预曝光人数
  13. 'view人数', # 过去24h曝光人数
  14. 'play人数', # 过去24h播放人数
  15. 'share人数', # 过去24h分享人数
  16. '回流人数', # 过去24h分享,过去24h回流人数
  17. 'preview次数', # 过去24h预曝光次数
  18. 'view次数', # 过去24h曝光次数
  19. 'play次数', # 过去24h播放次数
  20. 'share次数', # 过去24h分享次数
  21. 'platform_return',
  22. 'platform_preview',
  23. 'platform_preview_total',
  24. 'platform_show',
  25. 'platform_show_total',
  26. 'platform_view',
  27. 'platform_view_total',
  28. ]
  29. def get_feature_data(project, table, now_date):
  30. """获取特征数据"""
  31. # dt = datetime.datetime.strftime(now_date, '%Y%m%d%H')
  32. # dt = '2022041310'
  33. records = get_data_from_odps(date=now_date, project=project, table=table)
  34. feature_data = []
  35. for record in records:
  36. item = {}
  37. for feature_name in features:
  38. item[feature_name] = record[feature_name]
  39. feature_data.append(item)
  40. feature_df = pd.DataFrame(feature_data)
  41. return feature_df
  42. if __name__ == "__main__":
  43. try:
  44. project = config_.PROJECT_24H_APP_TYPE
  45. table = config_.TABLE_24H_APP_TYPE
  46. now_date = sys.argv[1]
  47. log_.info(f"now date: {now_date}")
  48. data = get_feature_data(project=project, table=table, now_date=now_date)
  49. data = data.fillna(0)
  50. data.to_csv(f"./data/24h_video_data_{now_date}.csv", index=False)
  51. log_.info(f"24h video data shape: {data.shape}")
  52. except Exception as e:
  53. log_.error(f"rank 24h数据下载失败, exception: {e}, traceback: {traceback.format_exc()}")
  54. send_msg_to_feishu(
  55. webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),
  56. key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),
  57. msg_text=f"rov-offline{config_.ENV_TEXT} - rank 24h数据下载失败\n"
  58. f"exception: {e}\n"
  59. f"traceback: {traceback.format_exc()}"
  60. )