user_group_update.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. import datetime
  2. import multiprocessing
  3. import time
  4. import traceback
  5. import gevent
  6. from threading import Timer
  7. from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
  8. from config import set_config
  9. from log import Log
  10. config_, _ = set_config()
  11. log_ = Log()
  12. redis_helper = RedisHelper()
  13. features = [
  14. 'apptype',
  15. 'return1mids',
  16. 'return2_3mids',
  17. 'return4_8mids',
  18. 'return9_24mids',
  19. 'return25_nmids',
  20. 'return0share1mids',
  21. 'return0share2_nmids'
  22. ]
  23. def to_redis(group, mid_list):
  24. log_.info(f"group = {group} update redis start ...")
  25. start_time = time.time()
  26. log_.info(f"mid count = {len(mid_list)}")
  27. for i in range(len(mid_list) // 100 + 1):
  28. # log_.info(f"i = {i}")
  29. mid_temp_list = mid_list[i * 100:(i + 1) * 100]
  30. task_list = [
  31. gevent.spawn(redis_helper.set_data_to_redis,
  32. f"{config_.KEY_NAME_PREFIX_MID_GROUP}{mid}", group, 25 * 3600)
  33. for mid in mid_temp_list
  34. ]
  35. gevent.joinall(task_list)
  36. log_.info(f"group = {group}, mid count = {len(mid_list)}, update redis finished! "
  37. f"execute time = {(time.time() - start_time) / 60}min")
  38. def update_user_group_to_redis(project, table, dt, app_type):
  39. """更新mid对应分组到redis中"""
  40. # 获取用户分组数据
  41. feature_df = get_feature_data(project=project, table=table, features=features, dt=dt)
  42. feature_df['apptype'] = feature_df['apptype'].astype(int)
  43. feature_df = feature_df[feature_df['apptype'] == app_type]
  44. group_list = features[1:]
  45. pool = multiprocessing.Pool(processes=len(group_list))
  46. for group in group_list:
  47. # log_.info(f"group = {group} update redis start ...")
  48. # start_time = time.time()
  49. mid_list = feature_df[group].tolist()
  50. mid_list = list(set(mid_list))
  51. mid_list = [mid for mid in mid_list if mid is not None]
  52. # log_.info(f"mid count = {len(mid_list)}")
  53. pool.apply_async(func=to_redis, args=(group, mid_list))
  54. # for i in range(len(mid_list)//100+1):
  55. # log_.info(f"i = {i}")
  56. # mid_temp_list = mid_list[i*100:(i+1)*100]
  57. # pool.apply_async(func=to_redis, args=(group, mid_temp_list))
  58. pool.close()
  59. pool.join()
  60. # for mid in mid_list:
  61. # # print(mid)
  62. # key_name = f"{config_.KEY_NAME_PREFIX_MID_GROUP}{mid}"
  63. # redis_helper.set_data_to_redis(key_name=key_name, value=group, expire_time=25 * 3600)
  64. # pool.apply_async(
  65. # func=redis_helper.set_data_to_redis,
  66. # args=(key_name, group, 25 * 3600)
  67. # )
  68. # pool.close()
  69. # pool.join()
  70. # log_.info(f"group = {group}, mid count = {len(mid_list)}, update redis finished! "
  71. # f"execute time = {(time.time()-start_time)/60}min")
  72. def timer_check():
  73. try:
  74. app_type = config_.APP_TYPE['VLOG']
  75. project = config_.ad_model_data['user_group'].get('project')
  76. table = config_.ad_model_data['user_group'].get('table')
  77. now_date = datetime.datetime.today()
  78. dt = datetime.datetime.strftime(now_date, '%Y%m%d')
  79. log_.info(f"now_date: {dt}")
  80. now_min = datetime.datetime.now().minute
  81. # 查看当前更新的数据是否已准备好
  82. data_count = data_check(project=project, table=table, dt=dt)
  83. if data_count > 0:
  84. log_.info(f"user group data count = {data_count}")
  85. # 数据准备好,进行更新
  86. update_user_group_to_redis(project=project, table=table, dt=dt, app_type=app_type)
  87. log_.info(f"user group data update end!")
  88. elif now_min > 45:
  89. log_.info('user group data is None!')
  90. send_msg_to_feishu(
  91. webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),
  92. key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),
  93. msg_text=f"rov-offline{config_.ENV_TEXT} - 用户分组数据未准备好!\n"
  94. f"traceback: {traceback.format_exc()}"
  95. )
  96. else:
  97. # 数据没准备好,1分钟后重新检查
  98. Timer(60, timer_check).start()
  99. except Exception as e:
  100. log_.error(f"用户分组数据更新失败, exception: {e}, traceback: {traceback.format_exc()}")
  101. send_msg_to_feishu(
  102. webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),
  103. key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),
  104. msg_text=f"rov-offline{config_.ENV_TEXT} - 用户分组数据更新失败\n"
  105. f"exception: {e}\n"
  106. f"traceback: {traceback.format_exc()}"
  107. )
  108. if __name__ == '__main__':
  109. timer_check()