liqian 3 лет назад
Родитель
Сommit
1a1d8e9d0c
5 измененных файлов с 129 добавлено и 8 удалено
  1. 6 0
      config.py
  2. 30 1
      rov_train.py
  3. 12 1
      top_video_list.py
  4. 35 0
      utils.py
  5. 46 6
      videos_filter.py

+ 6 - 0
config.py

@@ -14,6 +14,7 @@ class BaseConfig(object):
         'WAN_NENG_VIDEO': 17,  # 万能影视屋
         'LAO_HAO_KAN_VIDEO': 18,  # 老好看视频
         'ZUI_JING_QI': 19,  # 票圈最惊奇
+        'APP': 13,  # 票圈视频APP
     }
     # 数据存放路径
     DATA_DIR_PATH = './data'
@@ -47,6 +48,9 @@ class BaseConfig(object):
     # 小程序离线ROV模型结果存放 redis key前缀,完整格式:com.weiqu.video.recall.hot.item.score.{date}
     RECALL_KEY_NAME_PREFIX = 'com.weiqu.video.recall.hot.item.score.'
 
+    # app应用 小程序离线ROV模型结果存放 redis key前缀,完整格式:com.weiqu.video.recall.hot.item.score.app.{date}
+    RECALL_KEY_NAME_PREFIX_APP = 'com.weiqu.video.recall.hot.item.score.app.'
+
     # appType = 6, ROV召回池redis key前缀,完整格式:com.weiqu.video.recall.hot.apptype.h.item.score.{appType}.{h}
     RECALL_KEY_NAME_PREFIX_APP_TYPE = 'com.weiqu.video.recall.hot.apptype.h.item.score.'
 
@@ -62,9 +66,11 @@ class BaseConfig(object):
 
     # 修改ROV的视频 redis key
     UPDATE_ROV_KEY_NAME = 'com.weiqu.video.update.rov.item.score'
+    UPDATE_ROV_KEY_NAME_APP = 'com.weiqu.video.update.rov.item.score.app'
 
     # 生效中的置顶视频列表 redis key
     TOP_VIDEO_LIST_KEY_NAME = 'com.weiqu.video.top.item.score.area'
+    TOP_VIDEO_LIST_KEY_NAME_APP = 'com.weiqu.video.top.item.score.area.app'
 
     # rovScore公差
     ROV_SCORE_D = 0.001

+ 30 - 1
rov_train.py

@@ -10,7 +10,7 @@ from sklearn.metrics import mean_absolute_error, r2_score, mean_absolute_percent
 
 from config import set_config
 from utils import read_from_pickle, write_to_pickle, data_normalization, \
-    request_post, filter_video_status, update_video_w_h_rate
+    request_post, filter_video_status, update_video_w_h_rate, filter_video_status_app
 from log import Log
 from db_helper import RedisHelper, MysqlHelper
 
@@ -261,6 +261,21 @@ def predict():
     #                           key_name=config_.W_H_RATE_UP_1_VIDEO_LIST_KEY_NAME['rov_recall'])
     #     log_.info('update video w_h_rate to redis finished!')
 
+    # ####### app应用数据更新
+    # 过滤
+    app_filtered_videos = filter_video_status_app(video_ids=video_id_list)
+    log_.info('app_filtered_videos count = {}'.format(len(app_filtered_videos)))
+    # 获取视频对应分数
+    app_redis_data = {}
+    for video_id in app_filtered_videos:
+        app_redis_data[video_id] = redis_data.get(video_id)
+    # 上传Redis
+    redis_helper = RedisHelper()
+    app_key_name = config_.RECALL_KEY_NAME_PREFIX_APP + time.strftime('%Y%m%d')
+    redis_helper.add_data_with_zset(key_name=app_key_name, data=app_redis_data)
+    log_.info('app test data to redis finished!')
+
+
 
 def predict_test():
     """测试环境数据生成"""
@@ -300,6 +315,20 @@ def predict_test():
     #                           key_name=config_.W_H_RATE_UP_1_VIDEO_LIST_KEY_NAME['rov_recall'])
     #     log_.info('update video w_h_rate to redis finished!')
 
+    # ####### app应用数据更新
+    # 过滤
+    app_filtered_videos = filter_video_status_app(filtered_videos)
+    log_.info('app_filtered_videos count = {}'.format(len(app_filtered_videos)))
+    # 获取视频对应分数
+    app_redis_data = {}
+    for video_id in app_filtered_videos:
+        app_redis_data[video_id] = redis_data.get(video_id)
+    # 上传Redis
+    redis_helper = RedisHelper()
+    app_key_name = config_.RECALL_KEY_NAME_PREFIX_APP + time.strftime('%Y%m%d')
+    redis_helper.add_data_with_zset(key_name=app_key_name, data=app_redis_data)
+    log_.info('app test data to redis finished!')
+
 
 if __name__ == '__main__':
     log_.info('rov model train start...')

+ 12 - 1
top_video_list.py

@@ -1,5 +1,5 @@
 import traceback
-from utils import request_post, filter_video_status
+from utils import request_post, filter_video_status, filter_video_status_app
 from db_helper import RedisHelper
 from config import set_config
 from log import Log
@@ -35,6 +35,17 @@ def get_top_video_list():
         redis_helper.set_data_to_redis(key_name=config_.TOP_VIDEO_LIST_KEY_NAME, value=str(value), expire_time=5 * 60)
         log_.info('置顶视频更新成功!')
 
+        # app推荐状态过滤
+        app_filter_videos = filter_video_status_app(video_ids=video_ids)
+        log_.info('app_filter_videos = {}'.format(app_filter_videos))
+        app_value = [item for item in data if item['videoId'] in app_filter_videos]
+        log_.info('app_value = {}'.format(app_value))
+        # 写入redis
+        redis_helper = RedisHelper()
+        redis_helper.set_data_to_redis(key_name=config_.TOP_VIDEO_LIST_KEY_NAME_APP, value=str(app_value),
+                                       expire_time=5 * 60)
+        log_.info('置顶视频app更新成功!')
+
     except Exception as e:
         log_.error('置顶视频更新失败!')
         log_.error(traceback.format_exc())

+ 35 - 0
utils.py

@@ -171,6 +171,41 @@ def filter_video_status(video_ids):
     return filtered_videos
 
 
+def filter_video_status_app(video_ids):
+    """
+    对视频状态进行过滤 - app
+    :param video_ids: 视频id列表 type-list
+    :return: filtered_videos
+    """
+    if len(video_ids) == 1:
+        sql = "set hg_experimental_enable_shard_pruning=off; " \
+              "SELECT video_id " \
+              "FROM {} " \
+              "WHERE audit_status = 5 " \
+              "AND app_rec_status IN (1, -6, 10) " \
+              "AND open_status = 1 " \
+              "AND payment_status = 0 " \
+              "AND encryption_status != 5 " \
+              "AND transcoding_status = 3 " \
+              "AND video_id IN ({});".format(config_.VIDEO_STATUS, video_ids[0])
+    else:
+        sql = "set hg_experimental_enable_shard_pruning=off; " \
+              "SELECT video_id " \
+              "FROM {} " \
+              "WHERE audit_status = 5 " \
+              "AND app_rec_status IN (1, -6, 10) " \
+              "AND open_status = 1 " \
+              "AND payment_status = 0 " \
+              "AND encryption_status != 5 " \
+              "AND transcoding_status = 3 " \
+              "AND video_id IN {};".format(config_.VIDEO_STATUS, tuple(video_ids))
+
+    hologres_helper = HologresHelper()
+    data = hologres_helper.get_data(sql=sql)
+    filtered_videos = [int(temp[0]) for temp in data]
+    return filtered_videos
+
+
 def update_video_w_h_rate(video_ids, key_name):
     """
     获取横屏视频的宽高比,并存入redis中 (width/height>1)

+ 46 - 6
videos_filter.py

@@ -4,7 +4,7 @@ import traceback
 import ast
 from datetime import date, timedelta, datetime
 
-from utils import filter_video_status, send_msg_to_feishu
+from utils import filter_video_status, send_msg_to_feishu, filter_video_status_app
 from db_helper import RedisHelper
 from config import set_config
 from log import Log
@@ -126,7 +126,10 @@ def filter_rov_pool(app_type=None):
         return
     # 过滤
     video_ids = [int(video_id) for video_id in data]
-    filtered_result = filter_video_status(video_ids=video_ids)
+    if app_type == config_.APP_TYPE['APP']:
+        filtered_result = filter_video_status_app(video_ids=video_ids)
+    else:
+        filtered_result = filter_video_status(video_ids=video_ids)
     # 求差集,获取需要过滤掉的视频,并从redis中移除
     filter_videos = set(video_ids) - set(filtered_result)
     log_.info("video_ids size = {}, filtered size = {}, filter sizer = {}".format(len(video_ids),
@@ -169,7 +172,10 @@ def filter_flow_pool():
             log_.info("data size = {}, video_ids size = {}, data = {}".format(len(data), len(video_ids), data))
             log_.info("app_type {} videos filter end!".format(app_type))
             continue
-        filtered_result = filter_video_status(video_ids=video_ids)
+        if app_type == config_.APP_TYPE['APP']:
+            filtered_result = filter_video_status_app(video_ids=video_ids)
+        else:
+            filtered_result = filter_video_status(video_ids=video_ids)
         # 求差集,获取需要过滤掉的视频,并从redis中移除
         filter_videos = set(video_ids) - set(filtered_result)
         log_.info("data size = {}, video_ids size = {}, filtered size = {}, filter sizer = {}".format(
@@ -236,6 +242,31 @@ def filter_rov_updated():
     log_.info("update rov videos filter end!")
 
 
+def filter_rov_updated_app():
+    """修改过ROV的视频过滤-app推荐状态过滤"""
+    log_.info("update rov videos app filter start ...")
+    # 获取视频
+    redis_helper = RedisHelper()
+    data = redis_helper.get_data_zset_with_index(key_name=config_.UPDATE_ROV_KEY_NAME_APP, start=0, end=-1)
+    if data is None:
+        log_.info("data is None")
+        log_.info("update rov videos app filter end!")
+        return
+    # 过滤
+    video_ids = [int(video_id) for video_id in data]
+    filtered_result = filter_video_status_app(video_ids=video_ids)
+    # 求差集,获取需要过滤掉的视频,并从redis中移除
+    filter_videos = set(video_ids) - set(filtered_result)
+    log_.info("video_ids size = {}, filtered size = {}, filter sizer = {}".format(len(video_ids),
+                                                                                  len(filtered_result),
+                                                                                  len(filter_videos)))
+    if len(filter_videos) == 0:
+        log_.info("update rov videos app filter end!")
+        return
+    redis_helper.remove_value_from_zset(key_name=config_.UPDATE_ROV_KEY_NAME_APP, value=list(filter_videos))
+    log_.info("update rov videos app filter end!")
+
+
 def get_pool_redis_key(pool_type, app_type=None):
     """
     拼接key
@@ -261,15 +292,21 @@ def get_pool_redis_key(pool_type, app_type=None):
                 key_name = '{}{}.{}'.format(config_.RECALL_KEY_NAME_PREFIX_APP_TYPE, app_type, redis_date)
 
                 return key_name, redis_date
-        # 其他
         else:
+            # appType = 13  票圈视频app
+            if app_type == config_.APP_TYPE['APP']:
+                key_name_prefix = config_.RECALL_KEY_NAME_PREFIX_APP
+            # 其他
+            else:
+                key_name_prefix = config_.RECALL_KEY_NAME_PREFIX
+
             # 判断热度列表是否更新,未更新则使用前一天的热度列表
-            key_name = config_.RECALL_KEY_NAME_PREFIX + time.strftime('%Y%m%d')
+            key_name = key_name_prefix + time.strftime('%Y%m%d')
             if redis_helper.key_exists(key_name):
                 redis_date = date.today().strftime('%Y%m%d')
             else:
                 redis_date = (date.today() - timedelta(days=1)).strftime('%Y%m%d')
-                key_name = config_.RECALL_KEY_NAME_PREFIX + redis_date
+                key_name = key_name_prefix + redis_date
 
             return key_name, redis_date
 
@@ -288,12 +325,15 @@ def main():
         filter_rov_pool()
         # appType = 6,ROV召回池视频过滤
         filter_rov_pool(app_type=config_.APP_TYPE['SHORT_VIDEO'])
+        # appType = 6,票圈视频APP视频过滤
+        filter_rov_pool(app_type=config_.APP_TYPE['APP'])
         # 流量池视频过滤
         filter_flow_pool()
         # 兜底视频过滤
         filter_bottom()
         # 修改过ROV的视频过滤
         filter_rov_updated()
+        filter_rov_updated_app()
         # 运营强插相关推荐视频过滤
         filter_relevant_videos()
         # 按位置排序视频过滤