瀏覽代碼

update model_key

yangxiaohui 1 年之前
父節點
當前提交
308dbeb8fc
共有 3 個文件被更改,包括 56 次插入46 次删除
  1. 43 33
      feature.py
  2. 4 4
      get_ad_out_sample_v2_item.py
  3. 9 9
      get_ad_out_sample_v2_user.py

+ 43 - 33
feature.py

@@ -53,6 +53,45 @@ item_dense_conf = [
 'i_ros_3month',
 ]
 
+user_sparse_conf = [
+# 统计特征_用户
+'u_1day_exp_cnt',
+'u_1day_click_cnt',
+'u_1day_share_cnt',
+'u_1day_return_cnt',
+'u_3day_exp_cnt',
+'u_3day_click_cnt',
+'u_3day_share_cnt',
+'u_3day_return_cnt',
+'u_7day_exp_cnt',
+'u_7day_click_cnt',
+'u_7day_share_cnt',
+'u_7day_return_cnt',
+'u_3month_exp_cnt',
+'u_3month_click_cnt',
+'u_3month_share_cnt',
+'u_3month_return_cnt',
+]
+
+user_dense_conf = [
+'u_ctr_1day',
+'u_str_1day',
+'u_rov_1day',
+'u_ros_1day',
+'u_ctr_3day',
+'u_str_3day',
+'u_rov_3day',
+'u_ros_3day',
+'u_ctr_7day',
+'u_str_7day',
+'u_rov_7day',
+'u_ros_7day',
+'u_ctr_3month',
+'u_str_3month',
+'u_rov_3month',
+'u_ros_3month',
+]
+
 def format_x(x):
     return  str(x).replace(' ', '').replace(':', '_')
 def sparse_fea_2_feature(v, k):
@@ -79,8 +118,12 @@ def get_features(sparse_conf, dense_conf, row):
 
 def get_item_features(row):
     return get_features(item_sparse_conf, item_dense_conf, row)
+
+def get_user_features(row):
+    return get_features(user_sparse_conf, user_dense_conf, row)
      
 label_col = 'ui_is_out'
+
 sparse_fea_cols = [
 # 'u_id',
 'u_brand',
@@ -98,42 +141,9 @@ sparse_fea_cols = [
 #'playtime',
 #'ui_root_id',
 #'ui_share_id',
-# 统计特征_用户
-'u_1day_exp_cnt',
-'u_1day_click_cnt',
-'u_1day_share_cnt',
-'u_1day_return_cnt',
-'u_3day_exp_cnt',
-'u_3day_click_cnt',
-'u_3day_share_cnt',
-'u_3day_return_cnt',
-'u_7day_exp_cnt',
-'u_7day_click_cnt',
-'u_7day_share_cnt',
-'u_7day_return_cnt',
-'u_3month_exp_cnt',
-'u_3month_click_cnt',
-'u_3month_share_cnt',
-'u_3month_return_cnt',
 ]
 
 dense_fea_cols = [
-'u_ctr_1day',
-'u_str_1day',
-'u_rov_1day',
-'u_ros_1day',
-'u_ctr_3day',
-'u_str_3day',
-'u_rov_3day',
-'u_ros_3day',
-'u_ctr_7day',
-'u_str_7day',
-'u_rov_7day',
-'u_ros_7day',
-'u_ctr_3month',
-'u_str_3month',
-'u_rov_3month',
-'u_ros_3month',
 
 ]
 

+ 4 - 4
get_ad_out_sample_v2_item.py

@@ -209,17 +209,17 @@ SELECT
 *
 from candidate_item
     """.format(datetime=datetime)
-    print(sql)
+    # print(sql)
     data = exe_sql(project, sql)
     print('sql done')
     # data.to_csv('./data/ad_out_sample_v2_item.{datetime}'.format(datetime=datetime), sep='\t')
     # data = pd.read_csv('./data/ad_out_sample_v2_item.{datetime}'.format(datetime=datetime), sep='\t', dtype=str)
     data.fillna('', inplace=True)
-    lr_model = LrModel('model/ad_out_v2_model_v1.day.json')
+    model_key = 'ad_out_v2_model_v1.day'
+    lr_model = LrModel('model/{}.json'.format(model_key))
     item_h_dict = {}
     k_col = 'i_id'
     dt = datetime
-    model_key = 'test_lr_v1'
     key_name = f"{config_.KEY_NAME_PREFIX_AD_OUT_MODEL_SCORE_ITEM}{model_key}:{dt}"
     print(key_name)
     for index, row in tqdm(data.iterrows()):
@@ -230,6 +230,6 @@ from candidate_item
         # print(item_features)
         # print(item_h)
     redis_helper.add_data_with_zset(key_name=key_name, data=item_h_dict, expire_time=2 * 24 * 3600)
-    with open('test_item.json', 'w') as fout:
+    with open('{}.json'.format(key_name), 'w') as fout:
         json.dump(item_h_dict, fout, indent=2, ensure_ascii=False, sort_keys=True)
 

+ 9 - 9
get_ad_out_sample_v2_user.py

@@ -12,7 +12,7 @@ config_, _ = set_config()
 log_ = Log()
 redis_helper = RedisHelper()
 
-from feature import get_item_features
+from feature import get_user_features
 from lr_model import LrModel
 from utils import exe_sql
 
@@ -209,27 +209,27 @@ SELECT
 *
 from candidate_user
     """.format(datetime=datetime)
-    print(sql)
+    # print(sql)
     data = exe_sql(project, sql)
     print('sql done')
     # data.to_csv('./data/ad_out_sample_v2_item.{datetime}'.format(datetime=datetime), sep='\t')
     # data = pd.read_csv('./data/ad_out_sample_v2_item.{datetime}'.format(datetime=datetime), sep='\t', dtype=str)
     data.fillna('', inplace=True)
-    lr_model = LrModel('model/ad_out_v2_model_v1.day.json')
+    model_key = 'ad_out_v2_model_v1.day'
+    lr_model = LrModel('model/{}.json'.format(model_key))
     item_h_dict = {}
     k_col = 'u_id'
     dt = datetime
-    model_key = 'test_lr_v1'
     key_name = f"{config_.KEY_NAME_PREFIX_AD_OUT_MODEL_SCORE_USER}{model_key}:{dt}"
     print(key_name)
     for index, row in tqdm(data.iterrows()):
         k = row['u_id']
-        item_features = get_item_features(row)
-        item_h = lr_model.predict_h(item_features)
-        item_h_dict[k] = item_h
+        user_features = get_user_features(row)
+        user_h = lr_model.predict_h(user_features)
+        user_h_dict[k] = user_h
         # print(item_features)
         # print(item_h)
-    redis_helper.add_data_with_zset(key_name=key_name, data=item_h_dict, expire_time=2 * 24 * 3600)
-    with open('test_user.json', 'w') as fout:
+    redis_helper.add_data_with_zset(key_name=key_name, data=user_h_dict, expire_time=2 * 24 * 3600)
+    with open('{}.json'.format(key_name), 'w') as fout:
         json.dump(item_h_dict, fout, indent=2, ensure_ascii=False, sort_keys=True)