浏览代码

update user_group_update & ad_user_video_predict & config

liqian 2 年之前
父节点
当前提交
64e2bd13bd
共有 3 个文件被更改,包括 82 次插入57 次删除
  1. 4 3
      ad_user_video_predict.py
  2. 29 26
      config.py
  3. 49 28
      user_group_update.py

+ 4 - 3
ad_user_video_predict.py

@@ -64,8 +64,9 @@ def predict_video_share_rate(dt, app_type):
 def predict_ad_group_video(dt, config_key, config_param):
 def predict_ad_group_video(dt, config_key, config_param):
     log_.info(f"config_key = {config_key} update start ...")
     log_.info(f"config_key = {config_key} update start ...")
     # 获取用户组预测值
     # 获取用户组预测值
-    user_data_key = config_param['data'].get('user')
-    group_key_name = f"{config_.KEY_NAME_PREFIX_AD_GROUP}{user_data_key}:{dt}"
+    user_data_key = config_param['user'].get('data')
+    user_rule_key = config_param['user'].get('rule')
+    group_key_name = f"{config_.KEY_NAME_PREFIX_AD_GROUP}{user_data_key}:{user_rule_key}:{dt}"
     group_data = redis_helper.get_all_data_from_zset(key_name=group_key_name, with_scores=True)
     group_data = redis_helper.get_all_data_from_zset(key_name=group_key_name, with_scores=True)
     if group_data is None:
     if group_data is None:
         log_.info(f"group data is None!")
         log_.info(f"group data is None!")
@@ -74,7 +75,7 @@ def predict_ad_group_video(dt, config_key, config_param):
     log_.info(f"group_df count = {len(group_df)}")
     log_.info(f"group_df count = {len(group_df)}")
 
 
     # 获取视频预测值
     # 获取视频预测值
-    video_data_key = config_param['data'].get('video')
+    video_data_key = config_param['video'].get('data')
     video_key_name = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{video_data_key}:{dt}"
     video_key_name = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{video_data_key}:{dt}"
     video_data = redis_helper.get_all_data_from_zset(key_name=video_key_name, with_scores=True)
     video_data = redis_helper.get_all_data_from_zset(key_name=video_key_name, with_scores=True)
     if video_data is None:
     if video_data is None:

+ 29 - 26
config.py

@@ -641,16 +641,6 @@ class BaseConfig(object):
         'data5': APP_TYPE['LAO_HAO_KAN_VIDEO'],  # 老好看视频
         'data5': APP_TYPE['LAO_HAO_KAN_VIDEO'],  # 老好看视频
         'data6': APP_TYPE['ZUI_JING_QI'],  # 票圈最惊奇
         'data6': APP_TYPE['ZUI_JING_QI'],  # 票圈最惊奇
     }
     }
-    # 广告模型用户数据
-    AD_USER_DATA_PARAMS = {
-        'data1': APP_TYPE['VLOG'],  # vlog
-        'data1:1': APP_TYPE['VLOG'],  # vlog 调整未分组用户的阈值(去除不出广告的用户组)
-        'data2': APP_TYPE['LOVE_LIVE'],  # 票圈视频
-        'data3': APP_TYPE['LONG_VIDEO'],  # 内容精选
-        'data4': APP_TYPE['SHORT_VIDEO'],  # 票圈短视频
-        'data5': APP_TYPE['LAO_HAO_KAN_VIDEO'],  # 老好看视频
-        'data6': APP_TYPE['ZUI_JING_QI'],  # 票圈最惊奇
-    }
     # 广告模型用户分组类别
     # 广告模型用户分组类别
     AD_MID_GROUP = {
     AD_MID_GROUP = {
         'class1': [
         'class1': [
@@ -709,44 +699,57 @@ class BaseConfig(object):
             {'data': 'data4', 'rule': 'rule1'},
             {'data': 'data4', 'rule': 'rule1'},
             {'data': 'data1', 'rule': 'rule3'},
             {'data': 'data1', 'rule': 'rule3'},
         ]
         ]
-
     }
     }
     # 广告模型abtest配置
     # 广告模型abtest配置
     AD_ABTEST_CONFIG = {
     AD_ABTEST_CONFIG = {
         # 票圈vlog
         # 票圈vlog
-        '173-a': {'data': {'video': 'data1', 'user': 'data1'},
+        '173-a': {'video': {'data': 'data1'},
+                  'user': {'data': 'data1', 'rule': 'rule1'},
                   'threshold': {'group': 7 / 24, 'mean_group': 7 / 24}},
                   'threshold': {'group': 7 / 24, 'mean_group': 7 / 24}},
-        '173-b': {'data': {'video': 'data1', 'user': 'data1:1'},
+        '173-b': {'video': {'data': 'data1'},
+                  'user': {'data': 'data1', 'rule': 'rule2'},
                   'threshold': {'group': 7 / 24, 'mean_group': 7 / 24}},
                   'threshold': {'group': 7 / 24, 'mean_group': 7 / 24}},
         # 票圈视频+
         # 票圈视频+
-        '190-a': {'data': {'video': 'data1', 'user': 'data1'},
+        '190-a': {'video': {'data': 'data1'},
+                  'user': {'data': 'data1', 'rule': 'rule1'},
                   'threshold': {'group': 25 / 48, 'mean_group': 25 / 48}},
                   'threshold': {'group': 25 / 48, 'mean_group': 25 / 48}},
-        '190-b': {'data': {'video': 'data1', 'user': 'data1'},
+        '190-b': {'video': {'data': 'data1'},
+                  'user': {'data': 'data1', 'rule': 'rule1'},
                   'threshold': {'group': 13 / 24, 'mean_group': 13 / 24}},
                   'threshold': {'group': 13 / 24, 'mean_group': 13 / 24}},
         # 票圈视频
         # 票圈视频
-        '194-a': {'data': {'video': 'data1', 'user': 'data1'},
+        '194-a': {'video': {'data': 'data1'},
+                  'user': {'data': 'data1', 'rule': 'rule1'},
                   'threshold': {'group': 7 / 24, 'mean_group': 7 / 24}},
                   'threshold': {'group': 7 / 24, 'mean_group': 7 / 24}},
-        '194-b': {'data': {'video': 'data1', 'user': 'data1:1'},
+        '194-b': {'video': {'data': 'data1'},
+                  'user': {'data': 'data1', 'rule': 'rule2'},
                   'threshold': {'group': 7 / 24, 'mean_group': 7 / 24}},
                   'threshold': {'group': 7 / 24, 'mean_group': 7 / 24}},
         # 内容精选
         # 内容精选
-        '195-a': {'data': {'video': 'data1', 'user': 'data1'},
+        '195-a': {'video': {'data': 'data1'},
+                  'user': {'data': 'data1', 'rule': 'rule1'},
                   'threshold': {'group': 11 / 24, 'mean_group': 11 / 24}},
                   'threshold': {'group': 11 / 24, 'mean_group': 11 / 24}},
-        '195-b': {'data': {'video': 'data1', 'user': 'data1:1'},
+        '195-b': {'video': {'data': 'data1'},
+                  'user': {'data': 'data1', 'rule': 'rule2'},
                   'threshold': {'group': 11 / 24, 'mean_group': 11 / 24}},
                   'threshold': {'group': 11 / 24, 'mean_group': 11 / 24}},
         # 票圈短视频
         # 票圈短视频
-        '196-a': {'data': {'video': 'data1', 'user': 'data1'},
+        '196-a': {'video': {'data': 'data1'},
+                  'user': {'data': 'data1', 'rule': 'rule1'},
                   'threshold': {'group': 1 / 2, 'mean_group': 1 / 2}},
                   'threshold': {'group': 1 / 2, 'mean_group': 1 / 2}},
-        '196-b': {'data': {'video': 'data4', 'user': 'data4'},
+        '196-b': {'video': {'data': 'data4'},
+                  'user': {'data': 'data4', 'rule': 'rule1'},
                   'threshold': {'group': 25 / 48, 'mean_group': 25 / 48}},
                   'threshold': {'group': 25 / 48, 'mean_group': 25 / 48}},
         # 老好看视频
         # 老好看视频
-        '197-a': {'data': {'video': 'data1', 'user': 'data1'},
+        '197-a': {'video': {'data': 'data1'},
+                  'user': {'data': 'data1', 'rule': 'rule1'},
                   'threshold': {'group': 25 / 48, 'mean_group': 25 / 48}},
                   'threshold': {'group': 25 / 48, 'mean_group': 25 / 48}},
-        '197-b': {'data': {'video': 'data1', 'user': 'data1:1'},
+        '197-b': {'video': {'data': 'data1'},
+                  'user': {'data': 'data1', 'rule': 'rule2'},
                   'threshold': {'group': 25 / 48, 'mean_group': 25 / 48}},
                   'threshold': {'group': 25 / 48, 'mean_group': 25 / 48}},
         # 票圈最惊奇
         # 票圈最惊奇
-        '198-a': {'data': {'video': 'data1', 'user': 'data1'},
+        '198-a': {'video': {'data': 'data1'},
+                  'user': {'data': 'data1', 'rule': 'rule1'},
                   'threshold': {'group': 49 / 96, 'mean_group': 49 / 96}},
                   'threshold': {'group': 49 / 96, 'mean_group': 49 / 96}},
-        '198-b': {'data': {'video': 'data1', 'user': 'data1'},
+        '198-b': {'video': {'data': 'data1'},
+                  'user': {'data': 'data1', 'rule': 'rule1'},
                   'threshold': {'group': 5 / 18, 'mean_group': 5 / 18}},
                   'threshold': {'group': 5 / 18, 'mean_group': 5 / 18}},
     }
     }
 
 
@@ -754,7 +757,7 @@ class BaseConfig(object):
     KEY_NAME_PREFIX_AD_GROUP = 'ad:users:group:predict:share:rate:'
     KEY_NAME_PREFIX_AD_GROUP = 'ad:users:group:predict:share:rate:'
     # 视频有广告时的分享率预测结果存放 redis key 前缀,完整格式:ad:video:predict:share:rate:{video_data_key}:{date}
     # 视频有广告时的分享率预测结果存放 redis key 前缀,完整格式:ad:video:predict:share:rate:{video_data_key}:{date}
     KEY_NAME_PREFIX_AD_VIDEO = 'ad:video:predict:share:rate:'
     KEY_NAME_PREFIX_AD_VIDEO = 'ad:video:predict:share:rate:'
-    # 用户分组结果存放 redis key 前缀,完整格式:mid:group:{mid}
+    # 用户分组结果存放 redis key 前缀,完整格式:mid:group:{class_key}:{mid}
     KEY_NAME_PREFIX_MID_GROUP = 'mid:group:'
     KEY_NAME_PREFIX_MID_GROUP = 'mid:group:'
     # 广告推荐阈值结果存放 redis key 前缀,完整格式:ad:threshold:{abtestId}:{abtestConfigTag}:{group}
     # 广告推荐阈值结果存放 redis key 前缀,完整格式:ad:threshold:{abtestId}:{abtestConfigTag}:{group}
     KEY_NAME_PREFIX_AD_THRESHOLD = 'ad:threshold:'
     KEY_NAME_PREFIX_AD_THRESHOLD = 'ad:threshold:'

+ 49 - 28
user_group_update.py

@@ -1,4 +1,5 @@
 import datetime
 import datetime
+import logging
 import multiprocessing
 import multiprocessing
 import time
 import time
 import traceback
 import traceback
@@ -11,57 +12,73 @@ config_, _ = set_config()
 log_ = Log()
 log_ = Log()
 redis_helper = RedisHelper()
 redis_helper = RedisHelper()
 
 
-features = [
-    'apptype',
-    'return1mids',
-    'return2_3mids',
-    'return4_8mids',
-    'return9_24mids',
-    'return25_nmids',
-    'return0share1mids',
-    'return0share2_nmids'
-]
+# features = [
+#     'apptype',
+#     'return1mids',
+#     'return2_3mids',
+#     'return4_8mids',
+#     'return9_24mids',
+#     'return25_nmids',
+#     'return0share1mids',
+#     'return0share2_nmids'
+# ]
 
 
 
 
-def to_redis(group, mid_list):
+def to_redis(group, mid_list, class_key_list):
     log_.info(f"group = {group} update redis start ...")
     log_.info(f"group = {group} update redis start ...")
     start_time = time.time()
     start_time = time.time()
     log_.info(f"mid count = {len(mid_list)}")
     log_.info(f"mid count = {len(mid_list)}")
-    for i in range(len(mid_list) // 100 + 1):
-        # log_.info(f"i = {i}")
-        mid_temp_list = mid_list[i * 100:(i + 1) * 100]
-        task_list = [
-            gevent.spawn(redis_helper.set_data_to_redis,
-                         f"{config_.KEY_NAME_PREFIX_MID_GROUP}{mid}", group, 26 * 3600)
-            for mid in mid_temp_list
-        ]
-        gevent.joinall(task_list)
+    for class_key in class_key_list:
+        for i in range(len(mid_list) // 100 + 1):
+            # log_.info(f"i = {i}")
+            mid_temp_list = mid_list[i * 100:(i + 1) * 100]
+            print(mid_temp_list)
+            task_list = [
+                gevent.spawn(redis_helper.set_data_to_redis,
+                             f"{config_.KEY_NAME_PREFIX_MID_GROUP}{class_key}:{mid}", group, 26 * 3600)
+                for mid in mid_temp_list
+            ]
+            gevent.joinall(task_list)
     log_.info(f"group = {group}, mid count = {len(mid_list)}, update redis finished! "
     log_.info(f"group = {group}, mid count = {len(mid_list)}, update redis finished! "
               f"execute time = {(time.time() - start_time) / 60}min")
               f"execute time = {(time.time() - start_time) / 60}min")
 
 
 
 
-def update_user_group_to_redis(project, table, dt, app_type_list):
+def update_user_group_to_redis(project, table, dt, app_type_list, features, ad_mid_group_key_params):
     """更新mid对应分组到redis中"""
     """更新mid对应分组到redis中"""
     # 获取用户分组数据
     # 获取用户分组数据
-    feature_df = get_feature_data(project=project, table=table, features=features, dt=dt)
+    feature_df = get_feature_data(project=project, table=table, features=features[:-2], dt=dt)
     feature_df['apptype'] = feature_df['apptype'].astype(int)
     feature_df['apptype'] = feature_df['apptype'].astype(int)
-    # feature_df = feature_df[feature_df['apptype'] == app_type]
     feature_df = feature_df[feature_df['apptype'].isin(app_type_list)]
     feature_df = feature_df[feature_df['apptype'].isin(app_type_list)]
     print(len(feature_df))
     print(len(feature_df))
-    group_list = features[1:]
-    pool = multiprocessing.Pool(processes=len(group_list))
-    for group in group_list:
+    # group_list = features[1:]
+    pool = multiprocessing.Pool(processes=len(ad_mid_group_key_params))
+    for group, class_key_list in ad_mid_group_key_params.items():
         mid_list = feature_df[group].tolist()
         mid_list = feature_df[group].tolist()
         mid_list = list(set(mid_list))
         mid_list = list(set(mid_list))
         mid_list = [mid for mid in mid_list if mid is not None]
         mid_list = [mid for mid in mid_list if mid is not None]
-        pool.apply_async(func=to_redis, args=(group, mid_list))
+        # class_key_list = ad_mid_group_key_params.get(group)
+        pool.apply_async(func=to_redis, args=(group, mid_list, class_key_list))
     pool.close()
     pool.close()
     pool.join()
     pool.join()
 
 
 
 
+def get_group_keys_mapping(ad_mid_group):
+    ad_mid_group_key_params = {}
+    features = ['apptype']
+    for class_key, group_list in ad_mid_group.items():
+        for group in group_list:
+            if group not in features:
+                features.append(group)
+                ad_mid_group_key_params[group] = [class_key]
+            else:
+                ad_mid_group_key_params[group].append(class_key)
+    return features, ad_mid_group_key_params
+
+
 def timer_check():
 def timer_check():
     try:
     try:
         app_type_list = config_.AD_APP_TYPE_LIST
         app_type_list = config_.AD_APP_TYPE_LIST
+        ad_mid_group = config_.AD_MID_GROUP
         project = config_.ad_model_data['user_group'].get('project')
         project = config_.ad_model_data['user_group'].get('project')
         table = config_.ad_model_data['user_group'].get('table')
         table = config_.ad_model_data['user_group'].get('table')
         now_date = datetime.datetime.today()
         now_date = datetime.datetime.today()
@@ -72,8 +89,12 @@ def timer_check():
         data_count = data_check(project=project, table=table, dt=dt)
         data_count = data_check(project=project, table=table, dt=dt)
         if data_count > 0:
         if data_count > 0:
             log_.info(f"user group data count = {data_count}")
             log_.info(f"user group data count = {data_count}")
+            # 获取features & 用户分组对应key
+            features, ad_mid_group_key_params = get_group_keys_mapping(ad_mid_group=ad_mid_group)
+            log_.info(f"features = {features}, \nad_mid_group_key_params = {ad_mid_group_key_params}")
             # 数据准备好,进行更新
             # 数据准备好,进行更新
-            update_user_group_to_redis(project=project, table=table, dt=dt, app_type_list=app_type_list)
+            update_user_group_to_redis(project=project, table=table, dt=dt, app_type_list=app_type_list,
+                                       features=features, ad_mid_group_key_params=ad_mid_group_key_params)
             log_.info(f"user group data update end!")
             log_.info(f"user group data update end!")
         # elif now_min > 45:
         # elif now_min > 45:
         #     log_.info('user group data is None!')
         #     log_.info('user group data is None!')