#coding utf-8 import sys from operator import itemgetter import json if __name__=="__main__": #1.load data nowdate=sys.argv[1] f1 = open("./data/user_item_share_"+nowdate) user_share_item_dict={} user_shareid_dict = {} while True: line = f1.readline() if not line: break items = line.strip().split("\t") if len(items)<4: continue #(user, share) uid = items[1] shareid = items[2] vid = items[3] k_info = uid+"\t"+vid user_share_item_dict[k_info] = shareid user_shareid_dict[shareid] = (uid,vid) f1.close() print(len(user_share_item_dict)) f2 = open("./data/user_item_click_"+nowdate) #user_group_dict={} share_return_dict = {} #item_dict = {} while True: line = f2.readline() if not line: break items = line.strip().split("\t") if len(items)<3: continue #(user, share) uid = items[1] shareid = items[2] #vid = items[3] if shareid not in share_return_dict: share_return_dict[shareid] = 1 else: share_return_dict[shareid] += 1 f2.close() user_item_return = {} for k, v in user_share_item_dict.items(): if v in share_return_dict: if k not in user_item_return: user_item_return[k] = share_return_dict[v] else: user_item_return[k] += share_return_dict[v] f3 = open("./data/user_item_return_count_"+nowdate, 'w') for k, v in user_item_return.items(): f3.write(k+"\t"+str(v)+"\n") f3.close() #((user,item), score) #print(user_item_dict) #2. (uid, [(vid, score)....])