瀏覽代碼

Merge branch 'feature_20231204_yangxiaohui_add_lr_v1' of algorithm/rov-server into master

yangxiaohui 1 年之前
父節點
當前提交
a9534e310f
共有 4 個文件被更改,包括 179 次插入0 次删除
  1. 44 0
      ad_out_v1_online_w.json
  2. 67 0
      ad_recommend.py
  3. 30 0
      config.py
  4. 38 0
      lr_model.py

+ 44 - 0
ad_out_v1_online_w.json

@@ -0,0 +1,44 @@
+{
+  "bias": -0.147853,
+  "ctx_apptype#0": 0.180751,
+  "ctx_apptype#17": -0.245126,
+  "ctx_apptype#18": -0.0559548,
+  "ctx_apptype#19": -0.0976163,
+  "ctx_apptype#21": -0.246539,
+  "ctx_apptype#22": -0.230472,
+  "ctx_apptype#3": -0.304472,
+  "ctx_apptype#4": -0.00857673,
+  "ctx_apptype#5": -0.166914,
+  "ctx_apptype#6": -0.451902,
+  "ctx_hour#00": -1.33904,
+  "ctx_hour#01": -1.27071,
+  "ctx_hour#02": -1.28498,
+  "ctx_hour#03": -1.19206,
+  "ctx_hour#04": -1.43452,
+  "ctx_hour#05": -1.61711,
+  "ctx_hour#06": -1.71274,
+  "ctx_hour#07": -1.74905,
+  "ctx_hour#08": 0.0582405,
+  "ctx_hour#09": 0.118864,
+  "ctx_hour#10": 0.14203,
+  "ctx_hour#11": 0.169456,
+  "ctx_hour#12": 0.129989,
+  "ctx_hour#13": 0.102083,
+  "ctx_hour#14": 0.0894642,
+  "ctx_hour#15": 0.093456,
+  "ctx_hour#16": 0.0541803,
+  "ctx_hour#17": 0.0170209,
+  "ctx_hour#18": -0.119698,
+  "ctx_hour#19": -0.172689,
+  "ctx_hour#20": -0.16682,
+  "ctx_hour#21": -0.00945801,
+  "ctx_hour#22": -0.025524,
+  "ctx_hour#23": -0.0166787,
+  "ctx_week#1": -0.105368,
+  "ctx_week#2": -0.100855,
+  "ctx_week#3": -0.0844838,
+  "ctx_week#4": -0.127137,
+  "ctx_week#5": -0.123138,
+  "ctx_week#6": -0.112104,
+  "ctx_week#7": -0.0863324
+}

+ 67 - 0
ad_recommend.py

@@ -1,9 +1,11 @@
 import json
+import time
 import traceback
 import datetime
 from utils import RedisHelper
 from config import set_config
 from log import Log
+from lr_model import get_final_score
 log_ = Log()
 config_ = set_config()
 redis_helper = RedisHelper()
@@ -294,6 +296,59 @@ def predict_mid_video_res_with_add(now_date, mid, video_id, abtest_param, abtest
             'ad_predict': ad_predict}
     return result
 
+def predict_mid_video_res_with_model(now_date, mid, video_id, abtest_param, abtest_id, abtest_config_tag, ab_test_code, care_model_status, app_type):
+
+    model_key = abtest_param.get('model_key', 'ad_out_v1')
+    user_key_name = f"{config_.KEY_NAME_PREFIX_AD_OUT_MODEL_SCORE_USER}{model_key}:{mid}"
+    item_key_name = f"{config_.KEY_NAME_PREFIX_AD_OUT_MODEL_SCORE_ITEM}{model_key}:{video_id}"
+    config_key_prefix = f"{config_.KEY_NAME_PREFIX_AD_OUT_MODEL_CONFIG}{model_key}:{abtest_id}:{abtest_config_tag}"
+    threshold_key = f"{config_key_prefix}:threshold"
+
+    user_score = redis_helper.get_data_from_redis(key_name=user_key_name)
+    item_score = redis_helper.get_data_from_redis(key_name=item_key_name)
+
+    # 如果离线分数为空,则走基线逻辑
+    if user_score is None or item_score is None:
+        result = predict_mid_video_res(
+            now_date=now_date,
+            mid=mid,
+            video_id=video_id,
+            abtest_param=abtest_param,
+            abtest_id=abtest_id,
+            abtest_config_tag=abtest_config_tag,
+            ab_test_code=ab_test_code,
+            care_model_status=care_model_status,
+            app_type=app_type
+        )
+        return result
+
+    offline_score = float(user_score) + float(item_score)
+    online_features = {
+        'ctx_apptype': str(app_type),
+        'ctx_week': time.strftime('%w', time.localtime()),
+        'ctx_hour':  time.strftime('%H', time.localtime()),
+    }
+
+    final_score, online_score = get_final_score(online_features, offline_score)
+    threshold = float(redis_helper.get_data_from_redis(key_name=threshold_key))
+
+    # 跳出率阈值判断
+    if final_score < threshold:
+        # 小于阈值,出广告
+        ad_predict = 2
+    else:
+        # 否则,不出广告
+        ad_predict = 1
+    result = {
+        'user_score': user_score,
+        'item_score': item_score,
+        'final_score': final_score,
+        'online_score': online_score,
+        'threshold': threshold,
+        'ad_predict': ad_predict,
+        'online_features': online_features,
+    }
+    return result
 
 def predict_mid_video_res_with_multiply(now_date, mid, video_id, abtest_param, abtest_id, abtest_config_tag, ab_test_code, care_model_status):
     now_dt = datetime.datetime.strftime(now_date, '%Y%m%d')
@@ -449,6 +504,18 @@ def ad_recommend_predict(app_type, mid, video_id, ab_exp_info, ab_test_code, car
                 ab_test_code=ab_test_code,
                 care_model_status=care_model_status
             )
+        elif threshold_mix_func == 'model':
+            result = predict_mid_video_res_with_model(
+                now_date=now_date,
+                mid=mid,
+                video_id=video_id,
+                abtest_param=abtest_param,
+                abtest_id=abtest_id,
+                abtest_config_tag=abtest_config_tag,
+                ab_test_code=ab_test_code,
+                care_model_status=care_model_status,
+                app_type=app_type
+            )
         else:
             result = predict_mid_video_res(
                 now_date=now_date,

+ 30 - 0
config.py

@@ -1171,6 +1171,30 @@ class BaseConfig(object):
             }
         },  # 所有广告类型数据 + 优化阈值计算方式 + else未开启关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据 + return25_nmids用户不出广告 + 所有用户组top3不出广告
 
+        '173-u': {
+            'threshold_mix_func': 'model',
+            'model_key': 'ad_out_v1',
+            'video': {'data': 'videos0out'},
+            'user': {'data': 'user0out', 'rule': 'rule2'},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+        },  # 是否出广告模型预测
+
+        '173-v': {
+            'threshold_mix_func': 'model',
+            'model_key': 'ad_out_v1',
+            'video': {'data': 'videos0out'},
+            'user': {'data': 'user0out', 'rule': 'rule2'},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+        },  # 是否出广告模型预测
+
+
+
         # 票圈视频+
         # '190-a': {
         #     'video': {'data': 'data1'},
@@ -1886,6 +1910,12 @@ class BaseConfig(object):
 
     }
 
+    # 有广告时的用户跳出模型离线分数 redis key 前缀,完整格式:ad:out:model:score:user:{model_key}:{mid}
+    KEY_NAME_PREFIX_AD_OUT_MODEL_SCORE_USER = 'ad:out:model:score:user:'
+    # 有广告时的视频跳出模型离线分数 redis key 前缀,完整格式:ad:out:model:score:video:{model_key}:{videoid}
+    KEY_NAME_PREFIX_AD_OUT_MODEL_SCORE_ITEM = 'ad:out:model:score:item:'
+    # 有广告时的视频跳出模型策略配置  redis key 前缀,完整格式:ad:out:model:config:{model_key}:{abtest_id}:{abtest_config_tag}:{config}
+    KEY_NAME_PREFIX_AD_OUT_MODEL_CONFIG = 'ad:out:model:config:'
     # 用户组有广告时的分享率预测结果存放 redis key 前缀,完整格式:ad:users:group:predict:share:rate:{user_data_key}:{date}
     KEY_NAME_PREFIX_AD_GROUP = 'ad:users:group:predict:share:rate:'
     # 视频有广告时的分享率预测结果存放 redis key 前缀,完整格式:ad:video:predict:share:rate:{video_data_key}:{date}

+ 38 - 0
lr_model.py

@@ -0,0 +1,38 @@
+#coding utf-8
+import json
+import math
+import time
+
+def load_json(filename):
+    with open(filename, 'r') as fin:
+        json_data = json.load(fin)
+    return json_data
+
+def sigmoid(x):
+    return 1.0 / (1.0 + math.exp(-x))
+
+online_w = load_json('./ad_out_v1_online_w.json')
+
+def get_online_score(online_features):
+    score = online_w.get('bias', 0.0)
+    score += sum([online_w.get('#'.join([k, v]), 0.0) for k, v in online_features.items()])
+    return score
+
+def get_final_score(online_features, offline_score):
+    online_score = get_online_score(online_features)
+    final_score = online_score + offline_score
+    final_score = sigmoid(final_score)
+    return final_score, online_score
+
+if __name__ == '__main__':
+    app_type = 0
+    online_features = {
+        'ctx_apptype': str(app_type),
+        'ctx_week': time.strftime('%w', time.localtime()),
+        'ctx_hour':  time.strftime('%H', time.localtime()),
+    }
+    print(get_final_score(online_features, -1.0))
+    print(get_final_score(online_features, 0.0))
+    print(get_final_score({}, 0.0))
+
+