calHourTypeCtr.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. #coding utf-8
  2. import sys
  3. from operator import itemgetter
  4. import json
  5. import pandas as pd
  6. #from db_help import RedisHelper
  7. if __name__=="__main__":
  8. #1.load data
  9. nowdate=sys.argv[1]
  10. vlog='0'
  11. love_live = 4
  12. data_path = "./data/hour_video_data_"+nowdate
  13. f = open(data_path)
  14. #data = pd.read_csv(data_path, encoding="utf-8", sep='\t')
  15. #print(data)
  16. index = 0
  17. data_dict = {}
  18. #redis_helper = RedisHelper()
  19. while True:
  20. line = f.readline()
  21. if not line:
  22. break
  23. if index==0:
  24. index += 1
  25. continue
  26. index +=1
  27. items = line.strip().split("\t")
  28. #print(items)
  29. if len(items)<9:
  30. continue
  31. apptype = items[0]
  32. vid = items[1]
  33. view_users = int(items[2])
  34. view_pv = int(items[3])
  35. play_users = int(items[4])
  36. play_pv = int(items[5])
  37. share_users = int(items[6])
  38. share_pv = int(items[7])
  39. return_users = int(items[8])
  40. #print(items)
  41. app_vid = apptype+"\t"+vid
  42. if app_vid not in data_dict:
  43. data_dict[app_vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
  44. else:
  45. item_info = data_dict[app_vid]
  46. view_users = item_info[0]+view_users
  47. view_pv = item_info[1]+view_pv
  48. play_users = item_info[2]+play_users
  49. play_pv = item_info[3]+play_pv
  50. share_users = item_info[4]+share_users
  51. share_pv = item_info[5]+share_pv
  52. return_users = item_info[6]+return_users
  53. data_dict[app_vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
  54. #print(data_dict.items())
  55. f.close()
  56. info_dict = {}
  57. hour_data_path = "./data/sorted_type_hour_data_"+nowdate
  58. f = open(hour_data_path, 'w')
  59. for k, v in data_dict.items():
  60. #print(v)
  61. return_users = v[6]
  62. #print(return_users)
  63. view_users = v[0]
  64. view_pv = v[1]
  65. share_pv = v[5]
  66. share_users = v[4]
  67. play_users = v[2]
  68. play_pv = v[3]
  69. #print("return_users:", return_users)
  70. k_score = float(return_users)/(float(view_users)+5)
  71. k_score2 = float(return_users)/(float(view_pv)+5)
  72. #print(k_score)
  73. share_score = float(share_pv)/(float(view_pv)+5)
  74. backrate = float(return_users)/(float(view_users)+5)
  75. #print(k, k_score, share_score*backrate, share_score, backrate)
  76. score_info = [k_score2, share_score*backrate, share_score, backrate, view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users]
  77. score_info = json.dumps(score_info)
  78. info_dict[k] = score_info
  79. f.write(k+"\t"+score_info+"\n")
  80. #redis_helper.update_batch_setnx_key(info_dict, 60*60*24*15)
  81. f.close()