|
@@ -0,0 +1,80 @@
|
|
|
+#coding utf-8
|
|
|
+import sys
|
|
|
+from operator import itemgetter
|
|
|
+import json
|
|
|
+import pandas as pd
|
|
|
+#from db_help import RedisHelper
|
|
|
+
|
|
|
+if __name__=="__main__":
|
|
|
+ #1.load data
|
|
|
+ nowdate=sys.argv[1]
|
|
|
+ vlog='0'
|
|
|
+ love_live = 4
|
|
|
+ data_path = "./data/hour_video_data_"+nowdate
|
|
|
+ f = open(data_path)
|
|
|
+ #data = pd.read_csv(data_path, encoding="utf-8", sep='\t')
|
|
|
+ #print(data)
|
|
|
+ index = 0
|
|
|
+ data_dict = {}
|
|
|
+ #redis_helper = RedisHelper()
|
|
|
+ while True:
|
|
|
+ line = f.readline()
|
|
|
+ if not line:
|
|
|
+ break
|
|
|
+ if index==0:
|
|
|
+ index += 1
|
|
|
+ continue
|
|
|
+ index +=1
|
|
|
+ items = line.strip().split("\t")
|
|
|
+ #print(items)
|
|
|
+ if len(items)<9:
|
|
|
+ continue
|
|
|
+ vid = items[1]
|
|
|
+ view_users = items[2]
|
|
|
+ view_pv = items[3]
|
|
|
+ play_users = items[4]
|
|
|
+ play_pv = items[5]
|
|
|
+ share_users = items[6]
|
|
|
+ share_pv = items[7]
|
|
|
+ return_users = items[8]
|
|
|
+ #print(items)
|
|
|
+ if vid not in data_dict:
|
|
|
+ data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
|
|
|
+ else:
|
|
|
+ item_info = data_dict[vid]
|
|
|
+ view_users = item_info[0]+view_users
|
|
|
+ view_pv = item_info[1]+view_pv
|
|
|
+ play_users = item_info[2]+play_pv
|
|
|
+ share_users = item_info[3]+share_users
|
|
|
+ share_pv = item_info[4]+share_pv
|
|
|
+ return_users = item_info[5]+return_users
|
|
|
+ data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
|
|
|
+ #print(data_dict.items())
|
|
|
+ f.close()
|
|
|
+ info_dict = {}
|
|
|
+ hour_data_path = "./data/sorted_hour_data_"+nowdate
|
|
|
+ f = open(hour_data_path, 'w')
|
|
|
+ for k, v in data_dict.items():
|
|
|
+ #print(v)
|
|
|
+ return_users = v[6]
|
|
|
+ #print(return_users)
|
|
|
+ view_users = v[0]
|
|
|
+ view_pv = v[1]
|
|
|
+ share_pv = v[5]
|
|
|
+ share_users = [4]
|
|
|
+ play_users = v[2]
|
|
|
+ #print("return_users:", return_users)
|
|
|
+ k_score = float(return_users)/(float(view_users)+5)
|
|
|
+ #k_score2 = float(return_users)/(float(share_pv)+5)
|
|
|
+ #print(k_score)
|
|
|
+ share_score = float(share_pv)/(float(view_pv)+5)
|
|
|
+ backrate = float(return_users)/(float(share_pv)+5)
|
|
|
+ #print(k, k_score, share_score*backrate, share_score, backrate)
|
|
|
+ score_info = [share_score, share_score*backrate, share_score, backrate]
|
|
|
+ score_info = json.dumps(score_info)
|
|
|
+ info_dict[k] = score_info
|
|
|
+ f.write(k+"\t"+score_info+"\n")
|
|
|
+ #redis_helper.update_batch_setnx_key(info_dict, 60*60*24*15)
|
|
|
+ f.close()
|
|
|
+
|
|
|
+
|