user_group_update.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. import datetime
  2. import logging
  3. import multiprocessing
  4. import time
  5. import traceback
  6. import gevent
  7. from threading import Timer
  8. from my_utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
  9. from my_config import set_config
  10. from log import Log
  11. config_, _ = set_config()
  12. log_ = Log()
  13. redis_helper = RedisHelper()
  14. # features = [
  15. # 'apptype',
  16. # 'return1mids',
  17. # 'return2_3mids',
  18. # 'return4_8mids',
  19. # 'return9_24mids',
  20. # 'return25_nmids',
  21. # 'return0share1mids',
  22. # 'return0share2_nmids'
  23. # ]
  24. def to_redis(group, mid_list, class_key_list):
  25. log_.info(f"group = {group} update redis start ...")
  26. start_time = time.time()
  27. log_.info(f"mid count = {len(mid_list)}")
  28. for class_key in class_key_list:
  29. for i in range(len(mid_list) // 100 + 1):
  30. # log_.info(f"i = {i}")
  31. mid_temp_list = mid_list[i * 100:(i + 1) * 100]
  32. # print(mid_temp_list)
  33. task_list = [
  34. gevent.spawn(redis_helper.set_data_to_redis,
  35. f"{config_.KEY_NAME_PREFIX_MID_GROUP}{class_key}:{mid}", group, 28 * 3600)
  36. for mid in mid_temp_list
  37. ]
  38. gevent.joinall(task_list)
  39. log_.info(f"group = {group}, mid count = {len(mid_list)}, update redis finished! "
  40. f"execute time = {(time.time() - start_time) / 60}min")
  41. def update_user_group_to_redis(project, table, dt, app_type_list, features, ad_mid_group_key_params):
  42. """更新mid对应分组到redis中"""
  43. # 获取用户分组数据
  44. feature_df = get_feature_data(project=project, table=table, features=features, dt=dt)
  45. feature_df['apptype'] = feature_df['apptype'].astype(int)
  46. feature_df = feature_df[feature_df['apptype'].isin(app_type_list)]
  47. # print(len(feature_df))
  48. # group_list = features[1:]
  49. pool = multiprocessing.Pool(processes=len(ad_mid_group_key_params))
  50. for group, class_key_list in ad_mid_group_key_params.items():
  51. mid_list = feature_df[group].tolist()
  52. mid_list = list(set(mid_list))
  53. mid_list = [mid for mid in mid_list if mid is not None]
  54. # class_key_list = ad_mid_group_key_params.get(group)
  55. pool.apply_async(func=to_redis, args=(group, mid_list, class_key_list))
  56. pool.close()
  57. pool.join()
  58. def get_group_keys_mapping(ad_mid_group):
  59. ad_mid_group_key_params = {}
  60. features = ['apptype']
  61. for class_key, group_list in ad_mid_group.items():
  62. for group in group_list:
  63. if group not in features:
  64. features.append(group)
  65. ad_mid_group_key_params[group] = [class_key]
  66. else:
  67. ad_mid_group_key_params[group].append(class_key)
  68. return features, ad_mid_group_key_params
  69. def timer_check():
  70. try:
  71. app_type_list = config_.AD_APP_TYPE_LIST
  72. ad_mid_group = config_.AD_MID_GROUP
  73. # 用户分组~
  74. project = config_.ad_model_data['user_group'].get('project')
  75. table = config_.ad_model_data['user_group'].get('table')
  76. now_date = datetime.datetime.today()
  77. dt = datetime.datetime.strftime(now_date, '%Y%m%d')
  78. log_.info(f"now_date: {dt}")
  79. now_min = datetime.datetime.now().minute
  80. # 查看当前更新的数据是否已准备好
  81. data_count = data_check(project=project, table=table, dt=dt)
  82. if data_count > 0:
  83. log_.info(f"user group data count = {data_count}")
  84. # 获取features & 用户分组对应key
  85. features, ad_mid_group_key_params = get_group_keys_mapping(ad_mid_group=ad_mid_group)
  86. log_.info(f"features = {features}, \nad_mid_group_key_params = {ad_mid_group_key_params}")
  87. # 数据准备好,进行更新
  88. update_user_group_to_redis(project=project, table=table, dt=dt, app_type_list=app_type_list,
  89. features=features, ad_mid_group_key_params=ad_mid_group_key_params)
  90. log_.info(f"user group data update end!")
  91. send_msg_to_feishu(
  92. webhook=config_.FEISHU_ROBOT['ad_user_group_update_robot'].get('webhook'),
  93. key_word=config_.FEISHU_ROBOT['ad_user_group_update_robot'].get('key_word'),
  94. msg_text=f"\nrov-offline{config_.ENV_TEXT} - 用户分组数据更新完成\n"
  95. )
  96. else:
  97. # 数据没准备好,1分钟后重新检查
  98. Timer(60, timer_check).start()
  99. except Exception as e:
  100. log_.error(f"用户分组数据更新失败, exception: {e}, traceback: {traceback.format_exc()}")
  101. send_msg_to_feishu(
  102. webhook=config_.FEISHU_ROBOT['ad_user_group_update_robot'].get('webhook'),
  103. key_word=config_.FEISHU_ROBOT['ad_user_group_update_robot'].get('key_word'),
  104. msg_text=f"\nrov-offline{config_.ENV_TEXT} - 用户分组数据更新失败\n"
  105. f"exception: {e}\n"
  106. f"traceback: {traceback.format_exc()}"
  107. )
  108. # 5分钟后重试
  109. Timer(5*60, timer_check).start()
  110. if __name__ == '__main__':
  111. timer_check()