12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273 |
- import pandas as pd
- import json
- import time
- from my_config import set_config
- from db_helper import RedisHelper
- from my_utils import filter_video_status
- config_, _ = set_config()
- # ##### 离线更新相关推荐强插实验数据
- def get_relevant_videos_with_excel():
- filepath = './data/relevant.xlsx'
- # startTime 和 finishTime 的数据类型指定为str
- df = pd.read_excel(filepath, converters={'startTime': str, 'finishTime': str})
- relevant_videos = {}
- head_videos = []
- for i in range(len(df)):
- dt = df.iloc[i]['dt']
- head_vid = int(df.iloc[i]['headVid'])
- order = int(df.iloc[i]['order'])
- recommend_vid = int(df.iloc[i]['recommendVid'])
- # 状态过滤
- filtered_videos = filter_video_status(video_ids=[recommend_vid])
- if filtered_videos is None or len(filtered_videos) == 0:
- continue
- # 将时间转换为10位时间戳
- start_time = int(time.mktime(time.strptime(df.iloc[i]['startTime'].split(r'.')[0], '%Y-%m-%d %H:%M:%S')))
- finish_time = int(time.mktime(time.strptime(df.iloc[i]['finishTime'].split(r'.')[0], '%Y-%m-%d %H:%M:%S')))
- item = {
- 'order': order,
- 'recommend_vid': filtered_videos[0],
- 'start_time': start_time,
- 'finish_time': finish_time
- }
- if head_vid in head_videos:
- relevant_videos[head_vid].append(item)
- else:
- relevant_videos[head_vid] = [item]
- head_videos.append(head_vid)
- print(head_videos)
- return relevant_videos, head_videos
- def update_relevant_videos_to_redis(relevant_videos, head_videos):
- if not relevant_videos:
- return
- redis_helper = RedisHelper()
- for head_vid, videos in relevant_videos.items():
- # 拼接key
- key_name = '{}{}'.format(config_.RELEVANT_VIDEOS_WITH_OP_KEY_NAME, head_vid)
- # 将数据转换为json
- videos_json = json.dumps(videos)
- # 以最晚结束的视频的结束时间 - 当前时间 + 5s 作为key的过期时间
- finish_time_list = [item['finish_time'] for item in videos]
- expire_time = max(finish_time_list) - int(time.time()) + 5
- if expire_time <= 0:
- print('head_vid = {} expire_time <= 0!'.format(head_vid))
- continue
- # 存入redis
- redis_helper.set_data_to_redis(key_name=key_name, value=videos_json, expire_time=expire_time)
- print('head_vid = {} relevant videos update finished!'.format(head_vid))
- # 将头部id存入redis中
- redis_helper.add_data_with_set(key_name=config_.RELEVANT_TOP_VIDEOS_KEY_NAME,
- values=tuple(head_videos), expire_time=24*3600)
- print('relevant top videos update finished!')
- if __name__ == '__main__':
- relevant_videos, head_videos = get_relevant_videos_with_excel()
- update_relevant_videos_to_redis(relevant_videos=relevant_videos, head_videos=head_videos)
|