#coding utf-8 import sys import json import math from operator import attrgetter if __name__=="__main__": f = open(sys.argv[1]) exp_item_dict = {} while True: line = f.readline() if not line: break items = line.strip().split("\t") if len(items)<3: continue mid = items[1] vid = items[2] key_info = mid+"\t"+vid if key_info in exp_item_dict: exp_item_dict[key_info]+=1 else: exp_item_dict[key_info] = 1 f.close() return_item_dict = {} f1 = open(sys.argv[2]) #f2 = open(sys.argv[3], 'w') while True: line = f1.readline() if not line: break items = line.strip().split("\t") if len(items)<3: continue uid = items[0] vid = items[1] return_count = items[2] key_info = uid+"\t"+vid if key_info not in return_item_dict: return_item_dict[key_info] = 1 else: return_item_dict[key_info]+=1 f1.close() f2 = open(sys.argv[3]) f3 = open(sys.argv[4], 'w') while True: line = f2.readline() if not line: break items = line.strip().split("\t") if len(items)<3: continue mvid = items[0] vid_list = json.loads(items[1]) user_list = json.loads(items[2]) rec_item_dict = {} #rec_item_list = [] for vid_info in vid_list: rec_vid = vid_info[0] if rec_vid == mvid: continue score1 = float(vid_info[1]) rec_k_exp = 0 rec_k_return = 0 for uid in user_list: rec_k = uid+"\t"+rec_vid if rec_k in exp_item_dict: rec_k_exp +=exp_item_dict[rec_k] if rec_k in return_item_dict: rec_k_return += return_item_dict[rec_k] score2 = 0.0000000000001 if rec_k_exp>0: score2 = float(rec_k_return)/float(rec_k_exp) if score1 == 0: score1 = 0.000000000000001 else: score1 = math.log(score1) #if rec_vid in item_dict: # score2 = float(item_dict[rec_vid][0]) return_score = 0.000000000000001 #print("rec_k_return:", rec_k_return) if rec_k_return>0: return_score=math.log(rec_k_return+1) #score = score1*score2 score = return_score*score2 if score<=0.0: continue #rec_item_list = [] rec_vid = int(rec_vid) if rec_vid not in rec_item_dict: #rec_item_list.append((rec_vid,score)) rec_item_dict[rec_vid] = score else: rec_item_dict[rec_vid] += score rec_item_list2= sorted(rec_item_dict.items(), key=lambda s:s[1], reverse=True) #print(rec_item_list2) #rec_list = list(rec_item_dict.values()) #for k, v in rec_item_dict.items(): #rec_item_list2= sorted(rec_item_dict.items(), key=lambda s:s[1][1], reverse=True) f3.write(str(mvid)+"\t"+json.dumps(rec_item_list2[:100])+"\n") #f1.close() f3.close()