import pandas as pd import json import time from config import set_config from db_helper import RedisHelper from utils import filter_video_status config_, _ = set_config() # ##### 离线更新相关推荐强插实验数据 def get_relevant_videos_with_excel(): filepath = './data/relevant.xlsx' # startTime 和 finishTime 的数据类型指定为str df = pd.read_excel(filepath, converters={'startTime': str, 'finishTime': str}) relevant_videos = {} head_videos = [] for i in range(len(df)): dt = df.iloc[i]['dt'] head_vid = int(df.iloc[i]['headVid']) order = int(df.iloc[i]['order']) recommend_vid = int(df.iloc[i]['recommendVid']) # 状态过滤 filtered_videos = filter_video_status(video_ids=[recommend_vid]) if filtered_videos is None or len(filtered_videos) == 0: continue # 将时间转换为10位时间戳 start_time = int(time.mktime(time.strptime(df.iloc[i]['startTime'].split(r'.')[0], '%Y-%m-%d %H:%M:%S'))) finish_time = int(time.mktime(time.strptime(df.iloc[i]['finishTime'].split(r'.')[0], '%Y-%m-%d %H:%M:%S'))) item = { 'order': order, 'recommend_vid': filtered_videos[0], 'start_time': start_time, 'finish_time': finish_time } if head_vid in head_videos: relevant_videos[head_vid].append(item) else: relevant_videos[head_vid] = [item] head_videos.append(head_vid) print(head_videos) return relevant_videos, head_videos def update_relevant_videos_to_redis(relevant_videos, head_videos): if not relevant_videos: return redis_helper = RedisHelper() for head_vid, videos in relevant_videos.items(): # 拼接key key_name = '{}{}'.format(config_.RELEVANT_VIDEOS_WITH_OP_KEY_NAME, head_vid) # 将数据转换为json videos_json = json.dumps(videos) # 以最晚结束的视频的结束时间 - 当前时间 + 5s 作为key的过期时间 finish_time_list = [item['finish_time'] for item in videos] expire_time = max(finish_time_list) - int(time.time()) + 5 if expire_time <= 0: print('head_vid = {} expire_time <= 0!'.format(head_vid)) continue # 存入redis redis_helper.set_data_to_redis(key_name=key_name, value=videos_json, expire_time=expire_time) print('head_vid = {} relevant videos update finished!'.format(head_vid)) # 将头部id存入redis中 redis_helper.add_data_with_set(key_name=config_.RELEVANT_TOP_VIDEOS_KEY_NAME, values=tuple(head_videos), expire_time=24*3600) print('relevant top videos update finished!') if __name__ == '__main__': relevant_videos, head_videos = get_relevant_videos_with_excel() update_relevant_videos_to_redis(relevant_videos=relevant_videos, head_videos=head_videos)