yangxiaohui vor 1 Jahr
Ursprung
Commit
3ed36ec110
3 geänderte Dateien mit 38 neuen und 19 gelöschten Zeilen
  1. 6 6
      feature.py
  2. 3 1
      get_ad_out_sample_v2_item.py
  3. 29 12
      get_ad_out_sample_v2_user.py

+ 6 - 6
feature.py

@@ -54,6 +54,12 @@ item_dense_conf = [
 ]
 
 user_sparse_conf = [
+'u_brand',
+'u_device',
+'u_system',
+'u_system_ver',
+'ctx_region',
+'ctx_city',
 # 统计特征_用户
 'u_1day_exp_cnt',
 'u_1day_click_cnt',
@@ -128,16 +134,10 @@ label_col = 'ui_is_out'
 
 sparse_fea_cols = [
 # 'u_id',
-'u_brand',
-'u_device',
-'u_system',
-'u_system_ver',
 # 基础特征_场景
 #'ctx_day',
 'ctx_week',
 'ctx_hour',
-'ctx_region',
-'ctx_city',
 # 基础特征_交叉
 #'ui_is_out',
 #'playtime',

+ 3 - 1
get_ad_out_sample_v2_item.py

@@ -5,6 +5,7 @@ import json
 
 import traceback
 from threading import Timer
+from tqdm import tqdm
 from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
 from config import set_config
 from log import Log
@@ -225,7 +226,7 @@ from candidate_item
     mean_item_h = 0.0
     count_item_h = 0
     with data.open_reader() as reader:
-        for row in reader:
+        for row in tqdm(reader):
             k = str(row['i_id'])
             item_features = get_item_features(row)
             item_h = lr_model.predict_h(item_features)
@@ -238,6 +239,7 @@ from candidate_item
     mean_item_h = mean_item_h / count_item_h 
     item_h_dict['mean'] = mean_item_h 
     print(mean_item_h)
+    print(count_item_h)
     k = 'mean'
     redis_helper.set_data_to_redis(f"{key_name_prefix}:{k}", mean_item_h, 28 * 3600)
     with open('{}.json'.format(key_name_prefix), 'w') as fout:

+ 29 - 12
get_ad_out_sample_v2_user.py

@@ -5,6 +5,7 @@ import json
 
 import traceback
 from threading import Timer
+from tqdm import tqdm
 from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
 from config import set_config
 from log import Log
@@ -127,6 +128,12 @@ and apptype != '13'
 ), candidate_user as (
     SELECT 
     u_id,
+    max(u_brand) as u_brand,
+    max(u_device) as u_device,
+    max(u_system) as u_system,
+    max(u_system_ver) as u_system_ver,
+    max(ctx_region) as ctx_region,
+    max(ctx_city) as ctx_city,
     max(u_1day_exp_cnt) as u_1day_exp_cnt,
     max(u_1day_click_cnt) as u_1day_click_cnt,
     max(u_1day_share_cnt) as u_1day_share_cnt,
@@ -214,22 +221,32 @@ from candidate_user
     print('sql done')
     # data.to_csv('./data/ad_out_sample_v2_item.{datetime}'.format(datetime=datetime), sep='\t')
     # data = pd.read_csv('./data/ad_out_sample_v2_item.{datetime}'.format(datetime=datetime), sep='\t', dtype=str)
-    data.fillna('', inplace=True)
     model_key = 'ad_out_v2_model_v1.day'
     lr_model = LrModel('model/{}.json'.format(model_key))
     user_h_dict = {}
     k_col = 'u_id'
     dt = datetime
-    key_name = f"{config_.KEY_NAME_PREFIX_AD_OUT_MODEL_SCORE_USER}{model_key}:{dt}"
-    print(key_name)
-    for index, row in tqdm(data.iterrows()):
-        k = row['u_id']
-        user_features = get_user_features(row)
-        user_h = lr_model.predict_h(user_features)
-        user_h_dict[k] = user_h
-        # print(item_features)
-        # print(item_h)
-    redis_helper.add_data_with_zset(key_name=key_name, data=user_h_dict, expire_time=2 * 24 * 3600)
-    with open('{}.json'.format(key_name), 'w') as fout:
+    key_name_prefix = f"{config_.KEY_NAME_PREFIX_AD_OUT_MODEL_SCORE_USER}{model_key}"
+    print(key_name_prefix)
+    mean_user_h = 0.0
+    count_user_h = 0
+    with data.open_reader() as reader:
+        for row in tqdm(reader):
+            k = str(row['u_id'])
+            user_features = get_user_features(row)
+            user_h = lr_model.predict_h(user_features)
+            redis_helper.set_data_to_redis(f"{key_name_prefix}:{k}", user_h, 28 * 3600)
+            user_h_dict[k] = user_h
+            mean_user_h += user_h
+            count_user_h += 1
+            # print(user_features)
+            # print(user_h)
+    mean_user_h = mean_user_h / count_user_h 
+    user_h_dict['mean'] = mean_user_h 
+    print(mean_user_h)
+    print(count_user_h)
+    k = 'mean'
+    redis_helper.set_data_to_redis(f"{key_name_prefix}:{k}", mean_user_h, 28 * 3600)
+    with open('{}.json'.format(key_name_prefix), 'w') as fout:
         json.dump(user_h_dict, fout, indent=2, ensure_ascii=False, sort_keys=True)