relevant_top_videos.py 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. import pandas as pd
  2. import json
  3. import time
  4. from my_config import set_config
  5. from db_helper import RedisHelper
  6. from my_utils import filter_video_status
  7. config_, _ = set_config()
  8. # ##### 离线更新相关推荐强插实验数据
  9. def get_relevant_videos_with_excel():
  10. filepath = './data/relevant.xlsx'
  11. # startTime 和 finishTime 的数据类型指定为str
  12. df = pd.read_excel(filepath, converters={'startTime': str, 'finishTime': str})
  13. relevant_videos = {}
  14. head_videos = []
  15. for i in range(len(df)):
  16. dt = df.iloc[i]['dt']
  17. head_vid = int(df.iloc[i]['headVid'])
  18. order = int(df.iloc[i]['order'])
  19. recommend_vid = int(df.iloc[i]['recommendVid'])
  20. # 状态过滤
  21. filtered_videos = filter_video_status(video_ids=[recommend_vid])
  22. if filtered_videos is None or len(filtered_videos) == 0:
  23. continue
  24. # 将时间转换为10位时间戳
  25. start_time = int(time.mktime(time.strptime(df.iloc[i]['startTime'].split(r'.')[0], '%Y-%m-%d %H:%M:%S')))
  26. finish_time = int(time.mktime(time.strptime(df.iloc[i]['finishTime'].split(r'.')[0], '%Y-%m-%d %H:%M:%S')))
  27. item = {
  28. 'order': order,
  29. 'recommend_vid': filtered_videos[0],
  30. 'start_time': start_time,
  31. 'finish_time': finish_time
  32. }
  33. if head_vid in head_videos:
  34. relevant_videos[head_vid].append(item)
  35. else:
  36. relevant_videos[head_vid] = [item]
  37. head_videos.append(head_vid)
  38. print(head_videos)
  39. return relevant_videos, head_videos
  40. def update_relevant_videos_to_redis(relevant_videos, head_videos):
  41. if not relevant_videos:
  42. return
  43. redis_helper = RedisHelper()
  44. for head_vid, videos in relevant_videos.items():
  45. # 拼接key
  46. key_name = '{}{}'.format(config_.RELEVANT_VIDEOS_WITH_OP_KEY_NAME, head_vid)
  47. # 将数据转换为json
  48. videos_json = json.dumps(videos)
  49. # 以最晚结束的视频的结束时间 - 当前时间 + 5s 作为key的过期时间
  50. finish_time_list = [item['finish_time'] for item in videos]
  51. expire_time = max(finish_time_list) - int(time.time()) + 5
  52. if expire_time <= 0:
  53. print('head_vid = {} expire_time <= 0!'.format(head_vid))
  54. continue
  55. # 存入redis
  56. redis_helper.set_data_to_redis(key_name=key_name, value=videos_json, expire_time=expire_time)
  57. print('head_vid = {} relevant videos update finished!'.format(head_vid))
  58. # 将头部id存入redis中
  59. redis_helper.add_data_with_set(key_name=config_.RELEVANT_TOP_VIDEOS_KEY_NAME,
  60. values=tuple(head_videos), expire_time=24*3600)
  61. print('relevant top videos update finished!')
  62. if __name__ == '__main__':
  63. relevant_videos, head_videos = get_relevant_videos_with_excel()
  64. update_relevant_videos_to_redis(relevant_videos=relevant_videos, head_videos=head_videos)