import pandas as pd import json import time from config import set_config from db_helper import RedisHelper config_, _ = set_config() # ##### 离线更新相关推荐强插实验数据 def get_relevant_videos_with_excel(): filepath = './data/relevant.xlsx' # startTime 和 finishTime 的数据类型指定为str df = pd.read_excel(filepath, converters={'startTime': str, 'finishTime': str}) relevant_videos = {} head_videos = [] for i in range(len(df)): dt = df.iloc[i]['dt'] head_vid = int(df.iloc[i]['headVid']) order = int(df.iloc[i]['order']) recommend_vid = int(df.iloc[i]['recommendVid']) # 将时间转换为10位时间戳 start_time = int(time.mktime(time.strptime(df.iloc[i]['startTime'].split(r'.')[0], '%Y-%m-%d %H:%M:%S'))) finish_time = int(time.mktime(time.strptime(df.iloc[i]['finishTime'].split(r'.')[0], '%Y-%m-%d %H:%M:%S'))) item = { 'order': order, 'recommend_vid': recommend_vid, 'start_time': start_time, 'finish_time': finish_time } if head_vid in head_videos: relevant_videos[head_vid].append(item) else: relevant_videos[head_vid] = [item] head_videos.append(head_vid) print(head_videos) return relevant_videos def update_relevant_videos_to_redis(relevant_videos): if not relevant_videos: return for head_vid, videos in relevant_videos.items(): # 拼接key key_name = '{}{}'.format(config_.RELEVANT_VIDEOS_WITH_OP_KEY_NAME, head_vid) # 将数据转换为json videos_json = json.dumps(videos) # 以最晚结束的视频的结束时间 - 当前时间 + 5s 作为key的过期时间 finish_time_list = [item['finish_time'] for item in videos] expire_time = max(finish_time_list) - int(time.time()) + 5 if expire_time <= 0: return # 存入redis redis_helper = RedisHelper() redis_helper.set_data_to_redis(key_name=key_name, value=videos_json, expire_time=expire_time) print('head_vid = {} relevant videos update finished!'.format(head_vid)) if __name__ == '__main__': relevant_videos = get_relevant_videos_with_excel() update_relevant_videos_to_redis(relevant_videos=relevant_videos)