|
@@ -1,14 +1,15 @@
|
|
|
import datetime
|
|
|
-import random
|
|
|
+import pandas as pd
|
|
|
from odps import ODPS
|
|
|
from threading import Timer
|
|
|
from get_data import get_data_from_odps
|
|
|
-from db_helper import RedisHelper, MysqlHelper
|
|
|
+from db_helper import RedisHelper
|
|
|
from config import set_config
|
|
|
from log import Log
|
|
|
|
|
|
config_, env = set_config()
|
|
|
log_ = Log()
|
|
|
+features = ['videoid', 'play_count', 'dt']
|
|
|
|
|
|
|
|
|
def data_check(project, table, now_date):
|
|
@@ -39,16 +40,26 @@ def get_religion_videos(now_date, project, table):
|
|
|
# 获取videoId
|
|
|
dt = datetime.datetime.strftime(now_date, '%Y%m%d')
|
|
|
records = get_data_from_odps(date=dt, project=project, table=table)
|
|
|
- video_id_list = [record['videoid'] for record in records]
|
|
|
- # 排序合并,随机给定分数
|
|
|
+ feature_data = []
|
|
|
+ for record in records:
|
|
|
+ item = {}
|
|
|
+ for feature_name in features:
|
|
|
+ item[feature_name] = record[feature_name]
|
|
|
+ feature_data.append(item)
|
|
|
+ feature_df = pd.DataFrame(feature_data)
|
|
|
+ # 按照发布时间和播放量进行倒序
|
|
|
+ feature_df = feature_df.sort_values(by=['dt', 'play_count'], ascending=False)
|
|
|
+ video_id_list = feature_df['videoid'].to_list()
|
|
|
+ # 按照排序给定分数
|
|
|
final_result = {}
|
|
|
- for video_id in video_id_list:
|
|
|
- score = random.uniform(0, 100)
|
|
|
+ step = 100 / (len(video_id_list) * 2)
|
|
|
+ for i, video_id in enumerate(video_id_list):
|
|
|
+ score = 100 - i * step
|
|
|
final_result[int(video_id)] = score
|
|
|
|
|
|
# 写入对应的redis
|
|
|
key_name = \
|
|
|
- f"{config_.KEY_NAME_PREFIX_SPECIAL_VIDEOS}{datetime.datetime.strftime(now_date, '%Y%m%d')}"
|
|
|
+ f"{config_.KEY_NAME_PREFIX_RELIGION_VIDEOS}{datetime.datetime.strftime(now_date, '%Y%m%d')}"
|
|
|
if len(final_result) > 0:
|
|
|
redis_helper = RedisHelper()
|
|
|
redis_helper.add_data_with_zset(key_name=key_name, data=final_result, expire_time=2 * 24 * 3600)
|