|
@@ -0,0 +1,95 @@
|
|
|
|
+#coding utf-8
|
|
|
|
+import sys
|
|
|
|
+from operator import itemgetter
|
|
|
|
+import json
|
|
|
|
+import pandas as pd
|
|
|
|
+from db_help import RedisHelper
|
|
|
|
+
|
|
|
|
+if __name__=="__main__":
|
|
|
|
+ #1.load data
|
|
|
|
+ nowdate=sys.argv[1]
|
|
|
|
+ vlog='0'
|
|
|
|
+ love_live = 4
|
|
|
|
+ data_path = "./data/3_days_video_data_"+nowdate
|
|
|
|
+ f = open(data_path)
|
|
|
|
+ #data = pd.read_csv(data_path, encoding="utf-8", sep='\t')
|
|
|
|
+ #print(data)
|
|
|
|
+ index = 0
|
|
|
|
+ data_dict = {}
|
|
|
|
+ redis_helper = RedisHelper()
|
|
|
|
+ while True:
|
|
|
|
+ line = f.readline()
|
|
|
|
+ if not line:
|
|
|
|
+ break
|
|
|
|
+ if index==0:
|
|
|
|
+ index += 1
|
|
|
|
+ continue
|
|
|
|
+ index +=1
|
|
|
|
+ items = line.strip().split("\t")
|
|
|
|
+ if len(items)<11:
|
|
|
|
+ continue
|
|
|
|
+ vid = int(items[1])
|
|
|
|
+ view_users = int(items[4])
|
|
|
|
+ view_pv = int(items[5])
|
|
|
|
+ play_users = int(items[6])
|
|
|
|
+ play_pv = int(items[7])
|
|
|
|
+ share_users = int(items[8])
|
|
|
|
+ share_pv = int(items[9])
|
|
|
|
+ return_users = int(items[10])
|
|
|
|
+ #print(items)
|
|
|
|
+ if vid not in data_dict:
|
|
|
|
+ data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
|
|
|
|
+ else:
|
|
|
|
+ item_info = data_dict[vid]
|
|
|
|
+ view_users = item_info[0]+view_users
|
|
|
|
+ view_pv = item_info[1]+view_pv
|
|
|
|
+ play_users = item_info[2]+play_pv
|
|
|
|
+ share_users = item_info[3]+share_users
|
|
|
|
+ share_pv = item_info[4]+share_pv
|
|
|
|
+ return_users = item_info[5]+return_users
|
|
|
|
+ data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
|
|
|
|
+ #print(data_dict.items())
|
|
|
|
+ info_dict = {}
|
|
|
|
+ data_path = "./data/3_days_recall_hot_"+nowdate
|
|
|
|
+ f = open(data_path, 'w')
|
|
|
|
+ for k, v in data_dict.items():
|
|
|
|
+ #print(v)
|
|
|
|
+ return_users = v[6]
|
|
|
|
+ #print(return_users)
|
|
|
|
+ view_users = v[0]
|
|
|
|
+ view_pv = v[1]
|
|
|
|
+ if view_pv<100 or view_users<10:
|
|
|
|
+ continue
|
|
|
|
+ share_pv = v[5]
|
|
|
|
+ share_users = v[4]
|
|
|
|
+ play_users = v[2]
|
|
|
|
+ play_pv = v[3]
|
|
|
|
+ #print("return_users:", return_users)
|
|
|
|
+ k_score = float(return_users)/(float(view_users)+30)
|
|
|
|
+ #print(k_score)
|
|
|
|
+ share_score = float(share_pv)/(float(view_pv)+100)
|
|
|
|
+ backrate = float(return_users)/(float(view_users)+30)
|
|
|
|
+ ctr_score = float(play_pv)/float(view_pv+100)
|
|
|
|
+ if ctr_score<=0.5:
|
|
|
|
+ continue
|
|
|
|
+ #print(k, k_score, share_score*backrate, share_score, backrate)
|
|
|
|
+ score_info = [k_score, share_score*backrate, share_score, backrate, ctr_score, view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users]
|
|
|
|
+ info_dict[k] = score_info
|
|
|
|
+ sorted_v = sorted(info_dict.items(), key=lambda s:s[1][1], reverse=True)
|
|
|
|
+ print("sorted_v:", sorted_v[:30])
|
|
|
|
+ recall_name = "hot_3day:"
|
|
|
|
+ hot_recall_res = []
|
|
|
|
+ for item in sorted_v[:30]:
|
|
|
|
+ hot_recall_res.append((item[0], item[1][1]))
|
|
|
|
+ if len(hot_recall_res)>10:
|
|
|
|
+ score_info = json.dumps(hot_recall_res)
|
|
|
|
+ print("score_info:", score_info)
|
|
|
|
+ redis_helper.set_data_to_redis(recall_name, score_info, 60*60*24*15)
|
|
|
|
+ f.write(recall_name+"\t"+score_info+"\n")
|
|
|
|
+ f.close()
|
|
|
|
+ #info_dict[k] = score_info
|
|
|
|
+ #f.write(k+"\t"+score_info+"\n")
|
|
|
|
+ #redis_helper.update_batch_setnx_key(info_dict, 60*60*24*15)
|
|
|
|
+ #f.close()
|
|
|
|
+
|
|
|
|
+
|