123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102 |
- #coding utf-8
- import sys
- import json
- import math
- from operator import attrgetter
- if __name__=="__main__":
- f = open(sys.argv[1])
- exp_item_dict = {}
- while True:
- line = f.readline()
- if not line:
- break
- items = line.strip().split("\t")
- if len(items)<3:
- continue
- mid = items[1]
- vid = items[2]
- key_info = mid+"\t"+vid
- if key_info in exp_item_dict:
- exp_item_dict[key_info]+=1
- else:
- exp_item_dict[key_info] = 1
- f.close()
- return_item_dict = {}
- f1 = open(sys.argv[2])
- #f2 = open(sys.argv[3], 'w')
- while True:
- line = f1.readline()
- if not line:
- break
- items = line.strip().split("\t")
- if len(items)<3:
- continue
- uid = items[0]
- vid = items[1]
- return_count = items[2]
- key_info = uid+"\t"+vid
- if key_info not in return_item_dict:
- return_item_dict[key_info] = 1
- else:
- return_item_dict[key_info]+=1
- f1.close()
- f2 = open(sys.argv[3])
- f3 = open(sys.argv[4], 'w')
- while True:
- line = f2.readline()
- if not line:
- break
- items = line.strip().split("\t")
- if len(items)<3:
- continue
- mvid = items[0]
- vid_list = json.loads(items[1])
- user_list = json.loads(items[2])
- rec_item_dict = {}
- #rec_item_list = []
- for vid_info in vid_list:
- rec_vid = vid_info[0]
- if rec_vid == mvid:
- continue
- score1 = float(vid_info[1])
- rec_k_exp = 0
- rec_k_return = 0
- for uid in user_list:
- rec_k = uid+"\t"+rec_vid
- if rec_k in exp_item_dict:
- rec_k_exp +=exp_item_dict[rec_k]
- if rec_k in return_item_dict:
- rec_k_return += return_item_dict[rec_k]
- score2 = 0.0000000000001
- if rec_k_exp>0:
- score2 = float(rec_k_return)/float(rec_k_exp)
- if score1 == 0:
- score1 = 0.000000000000001
- else:
- score1 = math.log(score1)
- #if rec_vid in item_dict:
- # score2 = float(item_dict[rec_vid][0])
- return_score = 0.000000000000001
- #print("rec_k_return:", rec_k_return)
- if rec_k_return>0:
- return_score=math.log(rec_k_return+1)
- #score = score1*score2
- score = return_score*score2
- if score<=0.0:
- continue
- #rec_item_list = []
- rec_vid = int(rec_vid)
- if rec_vid not in rec_item_dict:
- #rec_item_list.append((rec_vid,score))
- rec_item_dict[rec_vid] = score
- else:
- rec_item_dict[rec_vid] += score
- rec_item_list2= sorted(rec_item_dict.items(), key=lambda s:s[1], reverse=True)
- #print(rec_item_list2)
- #rec_list = list(rec_item_dict.values())
- #for k, v in rec_item_dict.items():
- #rec_item_list2= sorted(rec_item_dict.items(), key=lambda s:s[1][1], reverse=True)
- f3.write(str(mvid)+"\t"+json.dumps(rec_item_list2[:100])+"\n")
- #f1.close()
- f3.close()
|