|
@@ -0,0 +1,102 @@
|
|
|
+#coding utf-8
|
|
|
+import sys
|
|
|
+import json
|
|
|
+import math
|
|
|
+from operator import attrgetter
|
|
|
+
|
|
|
+if __name__=="__main__":
|
|
|
+ f = open(sys.argv[1])
|
|
|
+ exp_item_dict = {}
|
|
|
+ while True:
|
|
|
+ line = f.readline()
|
|
|
+ if not line:
|
|
|
+ break
|
|
|
+ items = line.strip().split("\t")
|
|
|
+ if len(items)<3:
|
|
|
+ continue
|
|
|
+ mid = items[1]
|
|
|
+ vid = items[2]
|
|
|
+ key_info = mid+"\t"+vid
|
|
|
+ if key_info in exp_item_dict:
|
|
|
+ exp_item_dict[key_info]+=1 # 曝光一行是一条记录,求曝光和
|
|
|
+ else:
|
|
|
+ exp_item_dict[key_info] = 1
|
|
|
+ f.close()
|
|
|
+ return_item_dict = {}
|
|
|
+ f1 = open(sys.argv[2])
|
|
|
+ #f2 = open(sys.argv[3], 'w')
|
|
|
+ while True:
|
|
|
+ line = f1.readline()
|
|
|
+ if not line:
|
|
|
+ break
|
|
|
+ items = line.strip().split("\t")
|
|
|
+ if len(items)<3:
|
|
|
+ continue
|
|
|
+ uid = items[0]
|
|
|
+ vid = items[1]
|
|
|
+ return_count = items[2]
|
|
|
+ key_info = uid+"\t"+vid
|
|
|
+ if key_info not in return_item_dict:
|
|
|
+ return_item_dict[key_info] = 1 # 回流只计数 没有用回流量 = 有多少次分享带回来了回流
|
|
|
+ else:
|
|
|
+ return_item_dict[key_info]+=1
|
|
|
+ f1.close()
|
|
|
+ f2 = open(sys.argv[3])
|
|
|
+ f3 = open(sys.argv[4], 'w')
|
|
|
+ while True:
|
|
|
+ line = f2.readline()
|
|
|
+ if not line:
|
|
|
+ break
|
|
|
+ items = line.strip().split("\t")
|
|
|
+ if len(items)<3:
|
|
|
+ continue
|
|
|
+ mvid = items[0]
|
|
|
+ vid_list = json.loads(items[1])
|
|
|
+ user_list = json.loads(items[2])
|
|
|
+ rec_item_dict = {}
|
|
|
+ #rec_item_list = []
|
|
|
+ for vid_info in vid_list:
|
|
|
+ rec_vid = vid_info[0]
|
|
|
+ if rec_vid == mvid:
|
|
|
+ continue
|
|
|
+ score1 = float(vid_info[1])
|
|
|
+ rec_k_exp = 0
|
|
|
+ rec_k_return = 0
|
|
|
+ for uid in user_list:
|
|
|
+ rec_k = uid+"\t"+rec_vid
|
|
|
+ if rec_k in exp_item_dict:
|
|
|
+ rec_k_exp +=exp_item_dict[rec_k]
|
|
|
+ if rec_k in return_item_dict:
|
|
|
+ rec_k_return += return_item_dict[rec_k]
|
|
|
+ score2 = 0.0000000000001
|
|
|
+ if rec_k_exp>0:
|
|
|
+ score2 = float(rec_k_return)/float(rec_k_exp)
|
|
|
+ if score1 == 0:
|
|
|
+ score1 = 0.000000000000001
|
|
|
+ else:
|
|
|
+ score1 = math.log(score1)
|
|
|
+ #if rec_vid in item_dict:
|
|
|
+ # score2 = float(item_dict[rec_vid][0])
|
|
|
+ return_score = 0.000000000000001
|
|
|
+ #print("rec_k_return:", rec_k_return)
|
|
|
+ if rec_k_return>0:
|
|
|
+ return_score=math.log(rec_k_return+1)
|
|
|
+ #score = score1*score2
|
|
|
+ score = return_score*score2
|
|
|
+ if score<=0.0:
|
|
|
+ continue
|
|
|
+ #rec_item_list = []
|
|
|
+ rec_vid = int(rec_vid)
|
|
|
+ if rec_vid not in rec_item_dict:
|
|
|
+ #rec_item_list.append((rec_vid,score))
|
|
|
+ rec_item_dict[rec_vid] = score
|
|
|
+ else:
|
|
|
+ rec_item_dict[rec_vid] += score
|
|
|
+ rec_item_list2= sorted(rec_item_dict.items(), key=lambda s:s[1], reverse=True)
|
|
|
+ #print(rec_item_list2)
|
|
|
+ #rec_list = list(rec_item_dict.values())
|
|
|
+ #for k, v in rec_item_dict.items():
|
|
|
+ #rec_item_list2= sorted(rec_item_dict.items(), key=lambda s:s[1][1], reverse=True)
|
|
|
+ f3.write(str(mvid)+"\t"+json.dumps(rec_item_list2[:100])+"\n")
|
|
|
+ #f1.close()
|
|
|
+ f3.close()
|