Browse Source

add user, item to redis

yangxiaohui 1 year ago
parent
commit
17dce5633b
3 changed files with 240 additions and 1 deletions
  1. 4 0
      config.py
  2. 1 1
      get_ad_out_sample_v2_item.py
  3. 235 0
      get_ad_out_sample_v2_user.py

+ 4 - 0
config.py

@@ -2358,6 +2358,10 @@ class BaseConfig(object):
         },
     }
 
+    # 有广告时的用户跳出分数预测结果存放 redis key 前缀,完整格式:ad:out:model:score:user:{model_key}:{date}
+    KEY_NAME_PREFIX_AD_OUT_MODEL_SCORE_USER = 'ad:out:model:score:user:'
+    # 有广告时的视频跳出分数预测结果存放 redis key 前缀,完整格式:ad:out:model:score:video:{model_key}:{date}
+    KEY_NAME_PREFIX_AD_OUT_MODEL_SCORE_ITEM = 'ad:out:model:score:item:'
     # 用户组有广告时的分享率预测结果存放 redis key 前缀,完整格式:ad:users:group:predict:share:rate:{user_data_key}:{user_rule_key}:{date}
     KEY_NAME_PREFIX_AD_GROUP = 'ad:users:group:predict:share:rate:'
     # 视频有广告时的分享率预测结果存放 redis key 前缀,完整格式:ad:video:predict:share:rate:{video_data_key}:{date}

+ 1 - 1
get_ad_out_sample_v2_item.py

@@ -227,7 +227,7 @@ from candidate_item
         # print(item_h)
     dt = datetime
     model_key = 'test_lr_v1'
-    key_name = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{model_key}:{dt}"
+    key_name = f"{config_.KEY_NAME_PREFIX_AD_OUT_MODEL_SCORE_ITEM}{model_key}:{dt}"
     print(key_name)
     redis_helper.add_data_with_zset(key_name=key_name, data=item_h_dict, expire_time=2 * 24 * 3600)
     with open('test_item.json', 'w') as fout:

+ 235 - 0
get_ad_out_sample_v2_user.py

@@ -0,0 +1,235 @@
+#coding utf-8
+from tqdm import tqdm
+import sys
+import json
+
+import traceback
+from threading import Timer
+from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
+from config import set_config
+from log import Log
+config_, _ = set_config()
+log_ = Log()
+redis_helper = RedisHelper()
+
+from feature import get_item_features
+from lr_model import LrModel
+from utils import exe_sql
+
+if __name__ == "__main__":
+    project = 'loghubods'
+    datetime = sys.argv[1]
+    sql = """
+--odps sql 
+--********************************************************************--
+--author:研发
+--create time:2023-12-01 15:48:17
+--********************************************************************--
+with candidate as (
+select
+-- 基础特征_用户
+mid AS u_id
+,machineinfo_brand AS u_brand
+,machineinfo_model AS u_device
+,SPLIT(machineinfo_system,' ')[0] AS u_system
+,machineinfo_system AS u_system_ver
+-- 基础特征_视频
+,videoid AS i_id
+,i_up_id AS i_up_id
+,tags as i_tag
+,title as i_title
+,ceil(log2(i_title_len + 1)) as i_title_len
+,ceil(log2(total_time + 1)) as i_play_len
+,ceil(log2(i_days_since_upload + 1)) as i_days_since_upload -- 发布时间(距离现在天数)
+-- 基础特征_场景
+,ctx_day AS ctx_day
+,ctx_week AS ctx_week
+,ctx_hour AS ctx_hour
+,ctx_region as ctx_region
+,ctx_city as ctx_city
+-- 基础特征_交叉
+,ui_is_out as ui_is_out
+,i_play_len as playtime
+-- ,IF(i_play_len > 1,'0','1') AS ui_is_out_new
+,rootmid AS ui_root_id
+,shareid AS ui_share_id
+-- 统计特征_用户
+,ceil(log2(u_1day_exp_cnt + 1)) as u_1day_exp_cnt
+,ceil(log2(u_1day_click_cnt + 1)) as u_1day_click_cnt
+,ceil(log2(u_1day_share_cnt + 1)) as u_1day_share_cnt
+,ceil(log2(u_1day_return_cnt + 1)) as u_1day_return_cnt
+,ceil(log2(u_3day_exp_cnt + 1)) as u_3day_exp_cnt
+,ceil(log2(u_3day_click_cnt + 1)) as u_3day_click_cnt
+,ceil(log2(u_3day_share_cnt + 1)) as u_3day_share_cnt
+,ceil(log2(u_3day_return_cnt + 1)) as u_3day_return_cnt
+,ceil(log2(u_7day_exp_cnt + 1)) as u_7day_exp_cnt
+,ceil(log2(u_7day_click_cnt + 1)) as u_7day_click_cnt
+,ceil(log2(u_7day_share_cnt + 1)) as u_7day_share_cnt
+,ceil(log2(u_7day_return_cnt + 1)) as u_7day_return_cnt
+,ceil(log2(u_3month_exp_cnt + 1)) as u_3month_exp_cnt
+,ceil(log2(u_3month_click_cnt + 1)) as u_3month_click_cnt
+,ceil(log2(u_3month_share_cnt + 1)) as u_3month_share_cnt
+,ceil(log2(u_3month_return_cnt + 1)) as u_3month_return_cnt
+,round(if(u_ctr_1day > 10.0, 10.0, u_ctr_1day) / 10.0, 6) as u_ctr_1day
+,round(if(u_str_1day > 10.0, 10.0, u_str_1day) / 10.0, 6) as u_str_1day
+,round(if(u_rov_1day > 10.0, 10.0, u_rov_1day) / 10.0, 6) as u_rov_1day
+,round(if(u_ros_1day > 10.0, 10.0, u_ros_1day) / 10.0, 6) as u_ros_1day
+,round(if(u_ctr_3day > 10.0, 10.0, u_ctr_3day) / 10.0, 6) as u_ctr_3day
+,round(if(u_str_3day > 10.0, 10.0, u_str_3day) / 10.0, 6) as u_str_3day
+,round(if(u_rov_3day > 10.0, 10.0, u_rov_3day) / 10.0, 6) as u_rov_3day
+,round(if(u_ros_3day > 10.0, 10.0, u_ros_3day) / 10.0, 6) as u_ros_3day
+,round(if(u_ctr_7day > 10.0, 10.0, u_ctr_7day) / 10.0, 6) as u_ctr_7day
+,round(if(u_str_7day > 10.0, 10.0, u_str_7day) / 10.0, 6) as u_str_7day
+,round(if(u_rov_7day > 10.0, 10.0, u_rov_7day) / 10.0, 6) as u_rov_7day
+,round(if(u_ros_7day > 10.0, 10.0, u_ros_7day) / 10.0, 6) as u_ros_7day
+,round(if(u_ctr_3month > 10.0, 10.0, u_ctr_3month) / 10.0, 6) as u_ctr_3month
+,round(if(u_str_3month > 10.0, 10.0, u_str_3month) / 10.0, 6) as u_str_3month
+,round(if(u_rov_3month > 10.0, 10.0, u_rov_3month) / 10.0, 6) as u_rov_3month
+,round(if(u_ros_3month > 10.0, 10.0, u_ros_3month) / 10.0, 6) as u_ros_3month
+-- 统计特征_视频
+,ceil(log2(i_1day_exp_cnt + 1)) as i_1day_exp_cnt
+,ceil(log2(i_1day_click_cnt + 1)) as i_1day_click_cnt
+,ceil(log2(i_1day_share_cnt + 1)) as i_1day_share_cnt
+,ceil(log2(i_1day_return_cnt + 1)) as i_1day_return_cnt
+,ceil(log2(i_3day_exp_cnt + 1)) as i_3day_exp_cnt
+,ceil(log2(i_3day_click_cnt + 1)) as i_3day_click_cnt
+,ceil(log2(i_3day_share_cnt + 1)) as i_3day_share_cnt
+,ceil(log2(i_3day_return_cnt + 1)) as i_3day_return_cnt
+,ceil(log2(i_7day_exp_cnt + 1)) as i_7day_exp_cnt
+,ceil(log2(i_7day_click_cnt + 1)) as i_7day_click_cnt
+,ceil(log2(i_7day_share_cnt + 1)) as i_7day_share_cnt
+,ceil(log2(i_7day_return_cnt + 1)) as i_7day_return_cnt
+,ceil(log2(i_3month_exp_cnt + 1)) as i_3month_exp_cnt
+,ceil(log2(i_3month_click_cnt + 1)) as i_3month_click_cnt
+,ceil(log2(i_3month_share_cnt + 1)) as i_3month_share_cnt
+,ceil(log2(i_3month_return_cnt + 1)) as i_3month_return_cnt
+,round(if(i_ctr_1day > 10.0, 10.0, i_ctr_1day) / 10.0, 6) as i_ctr_1day
+,round(if(i_str_1day > 10.0, 10.0, i_str_1day) / 10.0, 6) as i_str_1day
+,round(if(i_rov_1day > 10.0, 10.0, i_rov_1day) / 10.0, 6) as i_rov_1day
+,round(if(i_ros_1day > 10.0, 10.0, i_ros_1day) / 10.0, 6) as i_ros_1day
+,round(if(i_ctr_3day > 10.0, 10.0, i_ctr_3day) / 10.0, 6) as i_ctr_3day
+,round(if(i_str_3day > 10.0, 10.0, i_str_3day) / 10.0, 6) as i_str_3day
+,round(if(i_rov_3day > 10.0, 10.0, i_rov_3day) / 10.0, 6) as i_rov_3day
+,round(if(i_ros_3day > 10.0, 10.0, i_ros_3day) / 10.0, 6) as i_ros_3day
+,round(if(i_ctr_7day > 10.0, 10.0, i_ctr_7day) / 10.0, 6) as i_ctr_7day
+,round(if(i_str_7day > 10.0, 10.0, i_str_7day) / 10.0, 6) as i_str_7day
+,round(if(i_rov_7day > 10.0, 10.0, i_rov_7day) / 10.0, 6) as i_rov_7day
+,round(if(i_ros_7day > 10.0, 10.0, i_ros_7day) / 10.0, 6) as i_ros_7day
+,round(if(i_ctr_3month > 10.0, 10.0, i_ctr_3month) / 10.0, 6) as i_ctr_3month
+,round(if(i_str_3month > 10.0, 10.0, i_str_3month) / 10.0, 6) as i_str_3month
+,round(if(i_rov_3month > 10.0, 10.0, i_rov_3month) / 10.0, 6) as i_rov_3month
+,round(if(i_ros_3month > 10.0, 10.0, i_ros_3month) / 10.0, 6) as i_ros_3month
+from
+user_video_features_data_final
+where dt='{datetime}'
+and ad_ornot = '0'
+and apptype != '13'
+), candidate_user as (
+    SELECT 
+    u_id,
+    max(u_1day_exp_cnt) as u_1day_exp_cnt,
+    max(u_1day_click_cnt) as u_1day_click_cnt,
+    max(u_1day_share_cnt) as u_1day_share_cnt,
+    max(u_1day_return_cnt) as u_1day_return_cnt,
+    max(u_3day_exp_cnt) as u_3day_exp_cnt,
+    max(u_3day_click_cnt) as u_3day_click_cnt,
+    max(u_3day_share_cnt) as u_3day_share_cnt,
+    max(u_3day_return_cnt) as u_3day_return_cnt,
+    max(u_7day_exp_cnt) as u_7day_exp_cnt,
+    max(u_7day_click_cnt) as u_7day_click_cnt,
+    max(u_7day_share_cnt) as u_7day_share_cnt,
+    max(u_7day_return_cnt) as u_7day_return_cnt,
+    max(u_3month_exp_cnt) as u_3month_exp_cnt,
+    max(u_3month_click_cnt) as u_3month_click_cnt,
+    max(u_3month_share_cnt) as u_3month_share_cnt,
+    max(u_3month_return_cnt) as u_3month_return_cnt,
+    max(u_ctr_1day) as u_ctr_1day,
+    max(u_str_1day) as u_str_1day,
+    max(u_rov_1day) as u_rov_1day,
+    max(u_ros_1day) as u_ros_1day,
+    max(u_ctr_3day) as u_ctr_3day,
+    max(u_str_3day) as u_str_3day,
+    max(u_rov_3day) as u_rov_3day,
+    max(u_ros_3day) as u_ros_3day,
+    max(u_ctr_7day) as u_ctr_7day,
+    max(u_str_7day) as u_str_7day,
+    max(u_rov_7day) as u_rov_7day,
+    max(u_ros_7day) as u_ros_7day,
+    max(u_ctr_3month) as u_ctr_3month,
+    max(u_str_3month) as u_str_3month,
+    max(u_rov_3month) as u_rov_3month,
+    max(u_ros_3month) as u_ros_3month
+    FROM 
+    candidate
+    group by u_id
+), candidate_item as (
+    select
+    i_id,
+    max(i_up_id) as i_up_id,
+    max(i_title_len) as i_title_len,
+    max(i_play_len) as i_play_len,
+    max(i_days_since_upload) as i_days_since_upload,
+    max(i_1day_exp_cnt) as i_1day_exp_cnt,
+    max(i_1day_click_cnt) as i_1day_click_cnt,
+    max(i_1day_share_cnt) as i_1day_share_cnt,
+    max(i_1day_return_cnt) as i_1day_return_cnt,
+    max(i_3day_exp_cnt) as i_3day_exp_cnt,
+    max(i_3day_click_cnt) as i_3day_click_cnt,
+    max(i_3day_share_cnt) as i_3day_share_cnt,
+    max(i_3day_return_cnt) as i_3day_return_cnt,
+    max(i_7day_exp_cnt) as i_7day_exp_cnt,
+    max(i_7day_click_cnt) as i_7day_click_cnt,
+    max(i_7day_share_cnt) as i_7day_share_cnt,
+    max(i_7day_return_cnt) as i_7day_return_cnt,
+    max(i_3month_exp_cnt) as i_3month_exp_cnt,
+    max(i_3month_click_cnt) as i_3month_click_cnt,
+    max(i_3month_share_cnt) as i_3month_share_cnt,
+    max(i_3month_return_cnt) as i_3month_return_cnt,
+    max(i_ctr_1day) as i_ctr_1day,
+    max(i_str_1day) as i_str_1day,
+    max(i_rov_1day) as i_rov_1day,
+    max(i_ros_1day) as i_ros_1day,
+    max(i_ctr_3day) as i_ctr_3day,
+    max(i_str_3day) as i_str_3day,
+    max(i_rov_3day) as i_rov_3day,
+    max(i_ros_3day) as i_ros_3day,
+    max(i_ctr_7day) as i_ctr_7day,
+    max(i_str_7day) as i_str_7day,
+    max(i_rov_7day) as i_rov_7day,
+    max(i_ros_7day) as i_ros_7day,
+    max(i_ctr_3month) as i_ctr_3month,
+    max(i_str_3month) as i_str_3month,
+    max(i_rov_3month) as i_rov_3month,
+    max(i_ros_3month) as i_ros_3month
+    FROM 
+    candidate
+    group by i_id
+)
+SELECT
+*
+from candidate_user
+    """.format(datetime=datetime)
+    print(sql)
+    data = exe_sql(project, sql)
+    print('sql done')
+    # data.to_csv('./data/ad_out_sample_v2_item.{datetime}'.format(datetime=datetime), sep='\t')
+    # data = pd.read_csv('./data/ad_out_sample_v2_item.{datetime}'.format(datetime=datetime), sep='\t', dtype=str)
+    data.fillna('', inplace=True)
+    lr_model = LrModel('model/ad_out_v2_model_v1.day.json')
+    item_h_dict = {}
+    k_col = 'u_id'
+    for index, row in tqdm(data.iterrows()):
+        k = row['u_id']
+        item_features = get_item_features(row)
+        item_h = lr_model.predict_h(item_features)
+        item_h_dict[k] = item_h
+        # print(item_features)
+        # print(item_h)
+    dt = datetime
+    model_key = 'test_lr_v1'
+    key_name = f"{config_.KEY_NAME_PREFIX_AD_OUT_MODEL_SCORE_USER}{model_key}:{dt}"
+    print(key_name)
+    redis_helper.add_data_with_zset(key_name=key_name, data=item_h_dict, expire_time=2 * 24 * 3600)
+    with open('test_user.json', 'w') as fout:
+        json.dump(item_h_dict, fout, indent=2, ensure_ascii=False, sort_keys=True)
+