Browse Source

add ad-recommend

liqian 2 years ago
parent
commit
1efee32109
2 changed files with 87 additions and 0 deletions
  1. 71 0
      ad_users_data_update.py
  2. 16 0
      config.py

+ 71 - 0
ad_users_data_update.py

@@ -0,0 +1,71 @@
+import datetime
+import pandas as pd
+from utils import get_data_from_odps
+from config import set_config
+from log import Log
+config_, _ = set_config()
+log_ = Log()
+
+
+def get_feature_data(project, table, features, now_date):
+    """获取特征数据"""
+    dt = datetime.datetime.strftime(now_date, '%Y%m%d')
+    records = get_data_from_odps(date=dt, project=project, table=table)
+    feature_data = []
+    for record in records:
+        item = {}
+        for feature_name in features:
+            item[feature_name] = record[feature_name]
+        feature_data.append(item)
+    feature_df = pd.DataFrame(feature_data)
+    return feature_df
+
+
+def predict_user_group_share_rate(now_date):
+    """预估用户组对应的有广告时分享率"""
+    # 获取用户组特征
+    project = config_.ad_model_data['users_share_rate'].get('project')
+    table = config_.ad_model_data['users_share_rate'].get('table')
+    features = [
+        'apptype',
+        'group',
+        'sharerate_all',
+        'sharerate_ad'
+    ]
+
+    user_group_df = get_feature_data(project=project, table=table, features=features, now_date=now_date)
+    user_group_df['sharerate_all'] = user_group_df['sharerate_all'].astype(float)
+    user_group_df['sharerate_ad'] = user_group_df['sharerate_ad'].astype(float)
+    # 获取有广告时所有用户组近30天的分享率
+    ad_all_group_share_rate = user_group_df[user_group_df['group'] == 'allmids']['sharerate_ad']
+    user_group_df = user_group_df[user_group_df['group'] != 'allmids']
+    # 计算用户组有广告时分享率
+    user_group_df['group_ad_share_rate'] = \
+        user_group_df['sharerate_ad'] * float(ad_all_group_share_rate) / user_group_df['sharerate_all']
+    return user_group_df
+
+
+def predict_video_share_rate(now_date):
+    """预估视频有广告时分享率"""
+    # 获取视频特征
+    project = config_.ad_model_data['videos_share_rate'].get('project')
+    table = config_.ad_model_data['videos_share_rate'].get('table')
+    features = [
+        'apptype',
+        'videoid',
+        'sharerate_all',
+        'sharerate_ad'
+    ]
+
+    video_df = get_feature_data(project=project, table=table, features=features, now_date=now_date)
+    video_df['sharerate_all'] = video_df['sharerate_all'].astype(float)
+    video_df['sharerate_ad'] = video_df['sharerate_ad'].astype(float)
+    # 获取有广告时所有视频近30天的分享率
+    ad_all_videos_share_rate = video_df[video_df['videoid'] == 'allvideos']['sharerate_ad']
+    video_df = video_df[video_df['videoid'] != 'allvideos']
+    # 计算视频有广告时分享率
+    video_df['video_ad_share_rate'] = \
+        video_df['sharerate_ad'] * float(ad_all_videos_share_rate) / video_df['sharerate_all']
+    return video_df
+
+

+ 16 - 0
config.py

@@ -581,6 +581,22 @@ class BaseConfig(object):
     # 宗教视频列表更新结果存放 redis key 前缀,完整格式:'religion:videos:item:{date}'
     KEY_NAME_PREFIX_RELIGION_VIDEOS = 'religion:videos:item:'
 
+    # 广告模型数据
+    ad_model_data = {
+        'user_group': {
+            'project': 'loghubods',
+            'table': 'user_share_return_admodel'
+        },
+        'users_share_rate': {
+            'project': 'loghubods',
+            'table': 'usergroup_sharerate_admodel'
+        },
+        'videos_share_rate': {
+            'project': 'loghubods',
+            'table': 'video_sharerate_admodel'
+        },
+    }
+
 
 class DevelopmentConfig(BaseConfig):
     """开发环境配置"""