ソースを参照

add filter_shield_video

liqian 2 年 前
コミット
7320bb1522
2 ファイル変更47 行追加18 行削除
  1. 46 17
      rov_train.py
  2. 1 1
      utils.py

+ 46 - 17
rov_train.py

@@ -10,7 +10,7 @@ from sklearn.metrics import mean_absolute_error, r2_score, mean_absolute_percent
 
 from config import set_config
 from utils import read_from_pickle, write_to_pickle, data_normalization, \
-    request_post, filter_video_status, update_video_w_h_rate, filter_video_status_app
+    request_post, filter_video_status, update_video_w_h_rate, filter_video_status_app, filter_shield_video
 from log import Log
 from db_helper import RedisHelper, MysqlHelper
 
@@ -237,11 +237,24 @@ def predict():
                             sort_columns=['rov_score'],
                             ascending=False)
 
+    # 过滤
+    applet_status_filtered_videos = filter_video_status(video_ids=video_id_list)
+    log_.info('applet_status_filtered_videos count = {}'.format(len(applet_status_filtered_videos)))
+    # 屏蔽视频过滤
+    applet_filtered_videos = filter_shield_video(video_ids=applet_status_filtered_videos,
+                                                 shield_key_name_list=config_.SHIELD_CONFIG.get('-1'))
+    log_.info('applet_filtered_videos count = {}'.format(len(applet_filtered_videos)))
+
+    # 获取视频对应分数
+    applet_redis_data = {}
+    for video_id in applet_filtered_videos:
+        applet_redis_data[video_id] = redis_data.get(video_id)
+
     # 上传redis
     key_name = config_.RECALL_KEY_NAME_PREFIX + time.strftime('%Y%m%d')
     redis_helper = RedisHelper()
-    redis_helper.add_data_with_zset(key_name=key_name, data=redis_data)
-    log_.info('data to redis finished!')
+    redis_helper.add_data_with_zset(key_name=key_name, data=applet_redis_data)
+    log_.info('applet data to redis finished!')
 
     # 清空修改ROV的视频数据
     redis_helper.del_keys(key_name=config_.UPDATE_ROV_KEY_NAME)
@@ -263,8 +276,13 @@ def predict():
 
     # ####### app应用数据更新
     # 过滤
-    app_filtered_videos = filter_video_status_app(video_ids=video_id_list)
+    app_status_filtered_videos = filter_video_status_app(video_ids=video_id_list)
+    log_.info('app_status_filtered_videos count = {}'.format(len(app_status_filtered_videos)))
+    # 屏蔽视频过滤
+    app_filtered_videos = filter_shield_video(video_ids=app_status_filtered_videos,
+                                              shield_key_name_list=config_.SHIELD_CONFIG.get('-1'))
     log_.info('app_filtered_videos count = {}'.format(len(app_filtered_videos)))
+
     # 获取视频对应分数
     app_redis_data = {}
     for video_id in app_filtered_videos:
@@ -273,7 +291,7 @@ def predict():
     redis_helper = RedisHelper()
     app_key_name = config_.RECALL_KEY_NAME_PREFIX_APP + time.strftime('%Y%m%d')
     redis_helper.add_data_with_zset(key_name=app_key_name, data=app_redis_data)
-    log_.info('app test data to redis finished!')
+    log_.info('app data to redis finished!')
     # 清空修改ROV的视频数据
     redis_helper.del_keys(key_name=config_.UPDATE_ROV_KEY_NAME_APP)
 
@@ -285,13 +303,19 @@ def predict_test():
     mysql_helper = MysqlHelper(mysql_info=config_.MYSQL_INFO)
     data = mysql_helper.get_data(sql=sql)
     video_ids = [video[0] for video in data]
+
     # 视频状态过滤
-    filtered_videos = filter_video_status(video_ids)
-    log_.info('filtered_videos count = {}'.format(len(filtered_videos)))
+    applet_status_filtered_videos = filter_video_status(video_ids=video_ids)
+    log_.info('applet_status_filtered_videos count = {}'.format(len(applet_status_filtered_videos)))
+    # 屏蔽视频过滤
+    applet_filtered_videos = filter_shield_video(video_ids=applet_status_filtered_videos,
+                                                 shield_key_name_list=config_.SHIELD_CONFIG.get('-1'))
+    log_.info('applet_filtered_videos count = {}'.format(len(applet_filtered_videos)))
+
     # 随机生成 0-100 数作为分数
     redis_data = {}
     json_data = []
-    for video_id in filtered_videos:
+    for video_id in applet_filtered_videos:
         score = random.uniform(0, 100)
         redis_data[video_id] = score
         json_data.append({'videoId': video_id, 'rovScore': score})
@@ -318,8 +342,13 @@ def predict_test():
 
     # ####### app应用数据更新
     # 过滤
-    app_filtered_videos = filter_video_status_app(filtered_videos)
+    app_status_filtered_videos = filter_video_status_app(video_ids=applet_filtered_videos)
+    log_.info('app_status_filtered_videos count = {}'.format(len(app_status_filtered_videos)))
+    # 屏蔽视频过滤
+    app_filtered_videos = filter_shield_video(video_ids=app_status_filtered_videos,
+                                              shield_key_name_list=config_.SHIELD_CONFIG.get('-1'))
     log_.info('app_filtered_videos count = {}'.format(len(app_filtered_videos)))
+
     # 获取视频对应分数
     app_redis_data = {}
     for video_id in app_filtered_videos:
@@ -412,14 +441,14 @@ def predict_18_19():
 
 
 if __name__ == '__main__':
-    log_.info('rov model train start...')
-    train_start = time.time()
-    train_filename = config_.TRAIN_DATA_FILENAME
-    X, Y, videos, fea = process_data(filename=train_filename)
-    log_.info('X_shape = {}, Y_sahpe = {}'.format(X.shape, Y.shape))
-    train(X, Y, features=fea)
-    train_end = time.time()
-    log_.info('rov model train end, execute time = {}ms'.format((train_end - train_start)*1000))
+    # log_.info('rov model train start...')
+    # train_start = time.time()
+    # train_filename = config_.TRAIN_DATA_FILENAME
+    # X, Y, videos, fea = process_data(filename=train_filename)
+    # log_.info('X_shape = {}, Y_sahpe = {}'.format(X.shape, Y.shape))
+    # train(X, Y, features=fea)
+    # train_end = time.time()
+    # log_.info('rov model train end, execute time = {}ms'.format((train_end - train_start)*1000))
 
     log_.info('rov model predict start...')
     predict_start = time.time()

+ 1 - 1
utils.py

@@ -306,7 +306,7 @@ def filter_shield_video(video_ids, shield_key_name_list):
         if not shield_videos_list:
             continue
         shield_videos = [int(video) for video in shield_videos_list]
-        video_ids = [video_id for video_id in video_ids if video_id not in shield_videos]
+        video_ids = [int(video_id) for video_id in video_ids if int(video_id) not in shield_videos]
 
     return video_ids