Explorar o código

add appType in [18, 19] rov predict

liqian %!s(int64=3) %!d(string=hai) anos
pai
achega
ff0137d4fd
Modificáronse 4 ficheiros con 96 adicións e 1 borrados
  1. 20 1
      config.py
  2. 14 0
      get_data.py
  3. 55 0
      rov_train.py
  4. 7 0
      videos_filter.py

+ 20 - 1
config.py

@@ -63,6 +63,22 @@ class BaseConfig(object):
     # 预测数据文件存放路径
     PREDICT_DATA_FILENAME = 'predict_data.pickle'
 
+    # app_type: [18, 19]预测表名
+    PREDICT_PROJECT_18_19 = {
+        '18': '',
+        '19': ''
+    }
+    PREDICT_TABLE_18_19 = {
+        '18': '',
+        '19': ''
+    }
+    # 预测数据文件存放路径
+    PREDICT_DATA_FILENAME_18_19 = {
+        '18': 'predict_data_18.pickle',
+        '19': 'predict_data_19.pickle'
+    }
+
+
     # 模型存放文件
     MODEL_FILENAME = 'model.pickle'
 
@@ -87,8 +103,11 @@ class BaseConfig(object):
     # app应用 小时级数据更新最终结果存放 redis key前缀,完整格式:com.weiqu.video.recall.hot.item.score.app.{date}.{h}
     APP_FINAL_RECALL_KEY_NAME_PREFIX = 'com.weiqu.video.recall.hot.item.score.app.'
 
+    # appType:[18, 19]小程序离线ROV模型结果存放 redis key前缀,完整格式:com.weiqu.video.recall.hot.item.score.{appType}.{date}
+    RECALL_KEY_NAME_PREFIX_APP_TYPE = 'com.weiqu.video.recall.hot.item.score.'
+
     # appType = 6, ROV召回池redis key前缀,完整格式:com.weiqu.video.recall.hot.apptype.h.item.score.{appType}.{h}
-    RECALL_KEY_NAME_PREFIX_APP_TYPE = 'com.weiqu.video.recall.hot.apptype.h.item.score.'
+    # RECALL_KEY_NAME_PREFIX_APP_TYPE = 'com.weiqu.video.recall.hot.apptype.h.item.score.'
 
     # 流量池离线模型结果存放 redis key前缀,完整格式 com.weiqu.video.flowpool.hot.item.score.{appType}
     FLOWPOOL_KEY_NAME_PREFIX = 'com.weiqu.video.flowpool.hot.item.score.'

+ 14 - 0
get_data.py

@@ -92,6 +92,20 @@ def get_train_predict_data():
     write_to_pickle(data=predict_data, filename=config_.PREDICT_DATA_FILENAME)
     log_.info('predict data finished, shape={}'.format(predict_data.shape))
 
+    # ###### app_type: [18, 19]预测数据
+    for app_type in [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]:
+        log_.info(f"app_type = {app_type}")
+        project = config_.PREDICT_PROJECT_18_19[str(app_type)]
+        table = config_.PREDICT_TABLE_18_19[str(app_type)]
+        predict_data = get_data_with_date(
+            date=predict_date,
+            delta_days=config_.PREDICT_DELTA_DAYS,
+            project=project,
+            table=table
+        )
+        write_to_pickle(data=predict_data, filename=config_.PREDICT_DATA_FILENAME_18_19[str(app_type)])
+        log_.info(f'predict data finished, app_type = {app_type}, shape={predict_data.shape}')
+
 
 if __name__ == '__main__':
     get_train_predict_data()

+ 55 - 0
rov_train.py

@@ -333,6 +333,60 @@ def predict_test():
     redis_helper.del_keys(key_name=config_.UPDATE_ROV_KEY_NAME_APP)
 
 
+def predict_18_19():
+    """预测 app_type:[18, 19]"""
+    for app_type in [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]:
+        log_.info(f"app_type = {app_type}")
+        # 读取预测数据并进行清洗
+        predict_data_filename = config_.PREDICT_DATA_FILENAME_18_19[str(app_type)]
+        x, video_ids = process_predict_data(predict_data_filename)
+        log_.info('predict data shape: x = {}'.format(x.shape))
+        # 获取训练好的模型
+        model = read_from_pickle(filename=config_.MODEL_FILENAME)
+        # 预测
+        y_ = model.predict(x)
+        log_.info('predict finished!')
+
+        # 将结果进行归一化到[0, 100]
+        normal_y_ = data_normalization(list(y_))
+        log_.info('normalization finished!')
+
+        # 按照normal_y_降序排序
+        predict_data = []
+        for i, video_id in enumerate(video_ids):
+            data = {'video_id': video_id, 'normal_y_': normal_y_[i], 'y_': y_[i]}
+            predict_data.append(data)
+        predict_data_sorted = sorted(predict_data, key=lambda temp: temp['normal_y_'], reverse=True)
+
+        # 按照排序,从100以固定差值做等差递减,以该值作为rovScore
+        predict_result = []
+        redis_data = {}
+        json_data = []
+        video_id_list = []
+        for j, item in enumerate(predict_data_sorted):
+            video_id = int(item['video_id'])
+            rov_score = 100 - j * config_.ROV_SCORE_D
+            item['rov_score'] = rov_score
+            predict_result.append(item)
+            redis_data[video_id] = rov_score
+            json_data.append({'videoId': video_id, 'rovScore': rov_score})
+            video_id_list.append(video_id)
+
+        # 打包预测结果存入csv
+        predict_result_filename = f'predict_{app_type}.csv'
+        pack_list_result_to_csv(filename=predict_result_filename,
+                                data=predict_result,
+                                columns=['video_id', 'rov_score', 'normal_y_', 'y_'],
+                                sort_columns=['rov_score'],
+                                ascending=False)
+
+        # 上传redis
+        key_name = f"{config_.RECALL_KEY_NAME_PREFIX_APP_TYPE}{app_type}.{time.strftime('%Y%m%d')}"
+        redis_helper = RedisHelper()
+        redis_helper.add_data_with_zset(key_name=key_name, data=redis_data)
+        log_.info('data to redis finished!')
+
+
 if __name__ == '__main__':
     log_.info('rov model train start...')
     train_start = time.time()
@@ -349,6 +403,7 @@ if __name__ == '__main__':
         predict_test()
     elif env in ['pre', 'pro']:
         predict()
+        predict_18_19()
     else:
         log_.error('env error')
     predict_end = time.time()

+ 7 - 0
videos_filter.py

@@ -296,6 +296,9 @@ def get_pool_redis_key(pool_type, app_type=None):
             # appType = 13  票圈视频app
             if app_type == config_.APP_TYPE['APP']:
                 key_name_prefix = config_.RECALL_KEY_NAME_PREFIX_APP
+            # appType: [18, 19]
+            elif app_type in [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]:
+                key_name_prefix = f'{config_.RECALL_KEY_NAME_PREFIX_APP_TYPE}{app_type}.'
             # 其他
             else:
                 key_name_prefix = config_.RECALL_KEY_NAME_PREFIX
@@ -416,6 +419,10 @@ def main():
         filter_rov_pool(app_type=config_.APP_TYPE['SHORT_VIDEO'])
         # appType = 13,票圈视频APP视频过滤
         filter_rov_pool(app_type=config_.APP_TYPE['APP'])
+        # appType = 18, ROV召回池视频过滤
+        filter_rov_pool(app_type=config_.APP_TYPE['LAO_HAO_KAN_VIDEO'])
+        # appType = 19, ROV召回池视频过滤
+        filter_rov_pool(app_type=config_.APP_TYPE['ZUI_JING_QI'])
         # 流量池视频过滤
         filter_flow_pool()
         # 兜底视频过滤