#coding utf-8 import sys from operator import itemgetter import json if __name__=="__main__": #1.load data nowdate=sys.argv[1] f1 = open("./data/user_item_share_"+nowdate) user_share_item_dict={} item_dict = {} while True: line = f1.readline() if not line: break items = line.strip().split("\t") if len(items)<4: continue #(user, share) uid = items[1] shareid = items[2] vid = items[3] user_share_item_dict[shareid] = (uid,vid) f1.close() print(len(user_share_item_dict)) f2 = open("./data/user_item_click_"+nowdate) #user_group_dict={} item_group_dict = {} item_dict = {} while True: line = f2.readline() if not line: break items = line.strip().split("\t") if len(items)<3: continue #(user, share) uid = items[1] shareid = items[2] #vid = items[3] sim_user_set = set('') if shareid in user_share_item_dict: # 是2日内分享的回流 kuid, kvid = user_share_item_dict[shareid] key_info = kvid if key_info in item_group_dict: sim_user_set = item_group_dict[key_info] sim_user_set.add(uid) item_group_dict[key_info] = sim_user_set else: sim_user_set.add(uid) item_group_dict[key_info] = sim_user_set print(len(item_group_dict)) f2.close() f3 = open("./data/return_item_"+nowdate, 'w') for k, v in item_group_dict.items(): f3.write(k+"\t"+json.dumps(list(v))+"\n") f3.close() #((user,item), score) #print(user_item_dict) #2. (uid, [(vid, score)....])