#coding utf-8 import sys from operator import itemgetter import json if __name__=="__main__": #1.load data nowdate=sys.argv[1] f = open("./data/user_item_share_"+nowdate) user_item_dict={} item_dict = {} while True: line = f.readline() if not line: break items = line.strip().split("\t") if len(items)<3: continue key = (items[1],items[2]) #print(key) if key not in user_item_dict: user_item_dict[key] = 1 else: user_item_dict[key] = user_item_dict[key]+1 if items[2] not in item_dict: item_dict[items[2]] = 1 else: item_dict[items[2]] = item_dict[items[2]]+1 f.close() #((user,item), score) #print(user_item_dict) #2. (uid, [(vid, score)....]) user_group_dict = {} for k, v in user_item_dict.items(): uid = k[0] vid = k[1] score = v #if score <3: # continue vid_list = [] if uid not in user_group_dict: vid_list.append((vid, score)) user_group_dict[uid] = vid_list else: vid_list = user_group_dict[uid] vid_list.append((vid, score)) user_group_dict[uid] = vid_list #print(user_group_dict) item_pair_dict = {} #3. expand item for k, v_list in user_group_dict.items(): v_n = len(v_list) if v_n<2: continue for i in range(v_n): for j in range(1, v_n): if v_list[i][0] == v_list[j][0]: continue item_key = (v_list[i][0], v_list[j][0]) item_score = 1 if item_key not in item_pair_dict: item_pair_dict[item_key] = item_score else: item_pair_dict[item_key] = item_pair_dict[item_key]+item_score #print(item_pair_dict) print(item_pair_dict) print(item_dict) left_pair_num = 0 rec_item_dict = {} #4. rec item for k, v in item_pair_dict.items(): if v<2: continue left_pair_num+=1 item1 = k[0] item2 = k[1] pair_score = v if item1 in item_dict: item_score1 = item_dict[item1] i2i_pro = pair_score/(item_score1+5) rec_list1 = [] if item2 not in rec_item_dict: rec_list1.append((item1, i2i_pro, pair_score, item_score1)) rec_item_dict[item2] = rec_list1 else: rec_list1 = rec_item_dict[item2] rec_list1.append((item1, i2i_pro, pair_score, item_score1)) rec_item_dict[item2] = rec_list1 if item2 in item_dict: item_score2 = item_dict[item2] i2i_pro = pair_score/(item_score2+5) rec_list2 = [] if item1 not in rec_item_dict: rec_list2.append((item2, i2i_pro, pair_score, item_score2)) rec_item_dict[item1] = rec_list2 else: rec_list2 = rec_item_dict[item1] rec_list2.append((item2, i2i_pro, pair_score, item_score2)) rec_item_dict[item1] = rec_list2 #(item, share_count) print(left_pair_num) #print(rec_item_dict) final_rec_list = [] #f = open("rec_result", "w") #5. sorted item_list for k,v in rec_item_dict.items(): v_set = set('') value_list = v dup_list = [] for item in value_list: if item[0] in v_set: continue v_set.add(item[0]) dup_list.append(item) sorted_v = sorted(dup_list, key=itemgetter(1), reverse=True) final_rec_list.append((k, sorted_v)) #print(final_rec_list[:1]) #json_str = json.dumps(final_rec_list) with open("./data/rec_result2_"+nowdate+".json", "w") as f : json.dump(final_rec_list, f)