get3HotRecall.py 3.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. #coding utf-8
  2. import sys
  3. from operator import itemgetter
  4. import json
  5. import pandas as pd
  6. from db_help import RedisHelper
  7. if __name__=="__main__":
  8. #1.load data
  9. nowdate=sys.argv[1]
  10. vlog='0'
  11. love_live = 4
  12. data_path = "./data/3_days_video_data_"+nowdate
  13. f = open(data_path)
  14. #data = pd.read_csv(data_path, encoding="utf-8", sep='\t')
  15. #print(data)
  16. index = 0
  17. data_dict = {}
  18. redis_helper = RedisHelper()
  19. while True:
  20. line = f.readline()
  21. if not line:
  22. break
  23. if index==0:
  24. index += 1
  25. continue
  26. index +=1
  27. items = line.strip().split("\t")
  28. if len(items)<11:
  29. continue
  30. vid = int(items[1])
  31. view_users = int(items[4])
  32. view_pv = int(items[5])
  33. play_users = int(items[6])
  34. play_pv = int(items[7])
  35. share_users = int(items[8])
  36. share_pv = int(items[9])
  37. return_users = int(items[10])
  38. #print(items)
  39. if vid not in data_dict:
  40. data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
  41. else:
  42. item_info = data_dict[vid]
  43. view_users = item_info[0]+view_users
  44. view_pv = item_info[1]+view_pv
  45. play_users = item_info[2]+play_pv
  46. share_users = item_info[3]+share_users
  47. share_pv = item_info[4]+share_pv
  48. return_users = item_info[5]+return_users
  49. data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
  50. #print(data_dict.items())
  51. info_dict = {}
  52. data_path = "./data/3_days_recall_hot_"+nowdate
  53. f = open(data_path, 'w')
  54. for k, v in data_dict.items():
  55. #print(v)
  56. return_users = v[6]
  57. #print(return_users)
  58. view_users = v[0]
  59. view_pv = v[1]
  60. if view_pv<100 or view_users<10:
  61. continue
  62. share_pv = v[5]
  63. share_users = v[4]
  64. play_users = v[2]
  65. play_pv = v[3]
  66. #print("return_users:", return_users)
  67. k_score = float(return_users)/(float(view_users)+30)
  68. #print(k_score)
  69. share_score = float(share_pv)/(float(view_pv)+100)
  70. backrate = float(return_users)/(float(view_users)+30)
  71. ctr_score = float(play_pv)/float(view_pv+100)
  72. if ctr_score<=0.5:
  73. continue
  74. #print(k, k_score, share_score*backrate, share_score, backrate)
  75. score_info = [k_score, share_score*backrate, share_score, backrate, ctr_score, view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users]
  76. info_dict[k] = score_info
  77. sorted_v = sorted(info_dict.items(), key=lambda s:s[1][1], reverse=True)
  78. print("sorted_v:", sorted_v[:30])
  79. recall_name = "hot_3day:"
  80. hot_recall_res = []
  81. for item in sorted_v[:5]:
  82. hot_recall_res.append((item[0], item[1][1]))
  83. if len(hot_recall_res)>5:
  84. score_info = json.dumps(hot_recall_res)
  85. print("score_info:", score_info)
  86. redis_helper.set_data_to_redis(recall_name, score_info, 60*60*24*15)
  87. f.write(recall_name+"\t"+score_info+"\n")
  88. f.close()
  89. #info_dict[k] = score_info
  90. #f.write(k+"\t"+score_info+"\n")
  91. #redis_helper.update_batch_setnx_key(info_dict, 60*60*24*15)
  92. #f.close()