compose_score.py 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. import sys
  2. import pandas as pd
  3. from db_helper import RedisHelper
  4. redis_helper = RedisHelper()
  5. def cal_compose_score(score_hour_path, score_24h_path):
  6. """分值合并"""
  7. score_hour_df = pd.read_csv(score_hour_path)
  8. score_24h_df = pd.read_csv(score_24h_path)
  9. print(score_hour_df)
  10. print(score_24h_df)
  11. score_hour_df['videoid'] = score_hour_df['videoid'].astype(int)
  12. score_24h_df['videoid'] = score_24h_df['videoid'].astype(int)
  13. score_merge_df = pd.merge(score_hour_df, score_24h_df, on='videoid', how='outer')
  14. score_merge_df.fillna(0, inplace=True)
  15. print(score_merge_df)
  16. print(score_hour_df.shape)
  17. print(score_24h_df.shape)
  18. print(score_merge_df.shape)
  19. score_merge_df['score1'] = score_merge_df['24h_score1'] + score_merge_df['hour_score1']
  20. score_merge_df['score2'] = score_merge_df['24h_score1'] + score_merge_df['hour_score2']
  21. score_merge_df['score3'] = score_merge_df['24h_score1'] + score_merge_df['hour_score3']
  22. score_merge_df['score4'] = score_merge_df['24h_score1'] + score_merge_df['hour_score4']
  23. score_merge_df['score5'] = score_merge_df['24h_score1'] + score_merge_df['hour_score5']
  24. print(score_merge_df)
  25. print(score_merge_df.shape)
  26. score_df = score_merge_df[['videoid', 'score1', 'score2', 'score3', 'score4', 'score5']]
  27. return score_df
  28. def score_to_redis(score_df):
  29. redis_data = dict()
  30. rank_score_key_prefix = 'rank:'
  31. score_name_list = score_df.columns.to_list()[1:]
  32. for ind, row in score_df.iterrows():
  33. video_id = int(row['videoid'])
  34. for score_name in score_name_list:
  35. score = row[score_name]
  36. rank_score_key = f"{rank_score_key_prefix}{score_name}:{video_id}"
  37. redis_data[rank_score_key] = score
  38. print(rank_score_key, score)
  39. # redis_helper.set_data_to_redis(key_name=rank_score_key, value=score, expire_time=24*60*60)
  40. if ind % 1000 == 0:
  41. if len(redis_data) > 0:
  42. print(ind, len(redis_data))
  43. redis_helper.update_batch_set_key(data=redis_data, expire_time=24*60*60)
  44. redis_data = {}
  45. if len(redis_data) > 0:
  46. redis_helper.update_batch_set_key(data=redis_data, expire_time=24 * 60 * 60)
  47. print(len(redis_data))
  48. if __name__ == '__main__':
  49. now_date = sys.argv[1]
  50. print("now date:", now_date)
  51. score_hour_path = f"./data/hour_score_{now_date}.csv"
  52. score_24h_path = f"./data/24h_score_{now_date}.csv"
  53. score_df = cal_compose_score(score_hour_path=score_hour_path, score_24h_path=score_24h_path)
  54. score_to_redis(score_df=score_df)