|
@@ -4,17 +4,20 @@
|
|
|
# @Time: 2022/4/21 下午4:31
|
|
|
# @Software: PyCharm
|
|
|
|
|
|
+import time
|
|
|
import datetime
|
|
|
import pandas as pd
|
|
|
import math
|
|
|
+import random
|
|
|
from odps import ODPS
|
|
|
from threading import Timer
|
|
|
from get_data import get_data_from_odps
|
|
|
-from db_helper import RedisHelper
|
|
|
+from db_helper import RedisHelper, MysqlHelper
|
|
|
from config import set_config
|
|
|
from log import Log
|
|
|
+from utils import filter_video_status
|
|
|
|
|
|
-config_, _ = set_config()
|
|
|
+config_, env = set_config()
|
|
|
log_ = Log()
|
|
|
|
|
|
features = [
|
|
@@ -186,11 +189,57 @@ def h_timer_check(app_type):
|
|
|
Timer(60, h_timer_check).start()
|
|
|
|
|
|
|
|
|
-if __name__ == '__main__':
|
|
|
- # df1 = get_feature_data()
|
|
|
- # res = cal_score(df=df1)
|
|
|
- # video_rank(df=res, now_date=datetime.datetime.today())
|
|
|
- # rank_by_h()
|
|
|
- app_type_list = [18, 19]
|
|
|
+def predict(app_type_list):
|
|
|
for app_type in app_type_list:
|
|
|
h_timer_check(app_type=app_type)
|
|
|
+
|
|
|
+
|
|
|
+def predict_test(app_type_list):
|
|
|
+ now_date = datetime.datetime.today()
|
|
|
+ now_h = datetime.datetime.now().hour
|
|
|
+ log_.info(f"now_date = {datetime.datetime.strftime(now_date, '%Y%m%d%H')}, now_h = {now_h}")
|
|
|
+ # 获取测试环境中最近发布的40000条视频
|
|
|
+ sql = "SELECT id FROM wx_video ORDER BY id DESC LIMIT 40000;"
|
|
|
+ mysql_helper = MysqlHelper(mysql_info=config_.MYSQL_INFO)
|
|
|
+ data = mysql_helper.get_data(sql=sql)
|
|
|
+ video_ids = [video[0] for video in data]
|
|
|
+ # 视频状态过滤
|
|
|
+ filtered_videos = filter_video_status(video_ids)
|
|
|
+ log_.info('filtered_videos count = {}'.format(len(filtered_videos)))
|
|
|
+
|
|
|
+ for app_type in app_type_list:
|
|
|
+ log_.info(f"app_type = {app_type}")
|
|
|
+ videos_temp = random.sample(filtered_videos, 300)
|
|
|
+ redis_data_temp = {}
|
|
|
+ csv_data_temp = []
|
|
|
+ for video_id in videos_temp:
|
|
|
+ score = random.uniform(0, 100)
|
|
|
+ redis_data_temp[video_id] = score
|
|
|
+ csv_data_temp.append({'video_id': video_id, 'rov_score': score})
|
|
|
+ # 打包预测结果存入csv
|
|
|
+ score_df = pd.DataFrame(data=csv_data_temp, columns=['video_id', 'rov_score'])
|
|
|
+ score_df = score_df.sort_values(by=['rov_score'], ascending=False)
|
|
|
+ score_filename = f"score_{app_type}_{datetime.datetime.strftime(now_date, '%Y%m%d%H')}.csv"
|
|
|
+ score_df.to_csv(f'./data/{score_filename}', index=False)
|
|
|
+
|
|
|
+ # 存入对应的redis
|
|
|
+ key_name = \
|
|
|
+ f"{config_.RECALL_KEY_NAME_PREFIX_APP_TYPE}{app_type}.{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
|
|
|
+ redis_helper = RedisHelper()
|
|
|
+ redis_helper.add_data_with_zset(key_name=key_name, data=redis_data_temp)
|
|
|
+ log_.info('data to redis finished!')
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ app_type_list = [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]
|
|
|
+ log_.info(f'appType: {app_type_list} predict start...')
|
|
|
+ predict_start = time.time()
|
|
|
+ if env in ['dev', 'test']:
|
|
|
+ predict_test(app_type_list=app_type_list)
|
|
|
+ elif env in ['pre', 'pro']:
|
|
|
+ predict(app_type_list=app_type_list)
|
|
|
+ else:
|
|
|
+ log_.error('env error')
|
|
|
+ predict_end = time.time()
|
|
|
+ log_.info('appType: [18, 19] predict end, execute time = {}ms'.format((predict_end - predict_start) * 1000))
|
|
|
+
|