cal_24h_score.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. # coding utf-8
  2. import sys
  3. import math
  4. import traceback
  5. import pandas as pd
  6. from utils import send_msg_to_feishu
  7. from config import set_config
  8. from log import Log
  9. config_, _ = set_config()
  10. log_ = Log()
  11. features = [
  12. 'apptype',
  13. 'videoid',
  14. 'preview人数', # 过去24h预曝光人数
  15. 'view人数', # 过去24h曝光人数
  16. 'play人数', # 过去24h播放人数
  17. 'share人数', # 过去24h分享人数
  18. '回流人数', # 过去24h分享,过去24h回流人数
  19. 'preview次数', # 过去24h预曝光次数
  20. 'view次数', # 过去24h曝光次数
  21. 'play次数', # 过去24h播放次数
  22. 'share次数', # 过去24h分享次数
  23. 'platform_return',
  24. 'platform_preview',
  25. 'platform_preview_total',
  26. 'platform_show',
  27. 'platform_show_total',
  28. 'platform_view',
  29. 'platform_view_total',
  30. ]
  31. def data_group(data_path):
  32. """将数据按照videoid聚合(求和)"""
  33. f = open(data_path)
  34. index = 0
  35. data_dict = {}
  36. while True:
  37. line = f.readline()
  38. if not line:
  39. break
  40. if index == 0:
  41. index += 1
  42. continue
  43. index += 1
  44. items = line.strip().split(",")
  45. # print(items)
  46. if len(items) < len(features):
  47. continue
  48. video_id = items[1]
  49. if video_id not in data_dict:
  50. data_dict[video_id] = {'videoid': video_id}
  51. for i, feature in enumerate(features):
  52. if feature in ['apptype', 'videoid']:
  53. continue
  54. data_dict[video_id][feature] = int(float(items[i]))
  55. else:
  56. for i, feature in enumerate(features):
  57. if feature in ['apptype', 'videoid']:
  58. continue
  59. data_dict[video_id][feature] = data_dict[video_id][feature] + int(float(items[i]))
  60. f.close()
  61. data_list = [item for video_id, item in data_dict.items()]
  62. data_df = pd.DataFrame(data_list)
  63. return data_df
  64. def cal_score(data_df):
  65. """计算score"""
  66. df = data_df.copy()
  67. # share_rate_view = (share+1)/(view+1000)
  68. df['share_rate_view'] = (df['share人数'] + 1) / (df['view人数'] + 1000)
  69. # back_rate = (return+1)/(share+10)
  70. df['back_rate'] = (df['回流人数'] + 1) / (df['share人数'] + 10)
  71. df['log_back'] = (df['回流人数'] + 1).apply(math.log)
  72. # score1 = 回流/(view+10)
  73. df['24h_score1'] = df['回流人数'] / (df['view人数'] + 10)
  74. # score2 = share/view * back_rate * logback
  75. df['24h_score2'] = df['share_rate_view'] * df['back_rate'] * df['log_back']
  76. score_df = df[['videoid', '24h_score1', '24h_score2']]
  77. # print(score_df)
  78. return score_df
  79. if __name__ == "__main__":
  80. try:
  81. now_date = sys.argv[1]
  82. log_.info(f"now_date: {now_date}")
  83. data_path = f"./data/24h_video_data_{now_date}.csv"
  84. data_df = data_group(data_path=data_path)
  85. log_.info(f"24h data_df shape: {data_df.shape}")
  86. hour_score_path = f"./data/24h_score_{now_date}.csv"
  87. score_df = cal_score(data_df=data_df)
  88. score_df.to_csv(hour_score_path, index=False)
  89. log_.info(f"24h score_df shape: {score_df.shape}")
  90. except Exception as e:
  91. log_.error(f"rank 24h分值更新失败, exception: {e}, traceback: {traceback.format_exc()}")
  92. send_msg_to_feishu(
  93. webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),
  94. key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),
  95. msg_text=f"rov-offline{config_.ENV_TEXT} - rank 24h分值更新失败\n"
  96. f"exception: {e}\n"
  97. f"traceback: {traceback.format_exc()}"
  98. )