123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960 |
- #coding utf-8
- import sys
- from operator import itemgetter
- import json
- if __name__=="__main__":
- #1.load data
- nowdate=sys.argv[1]
- f1 = open("./data/user_item_share_hour_"+nowdate)
- user_share_item_dict={}
- item_dict = {}
- while True:
- line = f1.readline()
- if not line:
- break
- items = line.strip().split("\t")
- if len(items)<4:
- continue
- #(user, share)
- uid = items[1]
- shareid = items[2]
- vid = items[3]
- user_share_item_dict[shareid] = (uid,vid)
- f1.close()
- print(len(user_share_item_dict))
- f2 = open("./data/user_item_click_hour_"+nowdate)
- #user_group_dict={}
- item_group_dict = {}
- item_dict = {}
- while True:
- line = f2.readline()
- if not line:
- break
- items = line.strip().split("\t")
- if len(items)<3:
- continue
- #(user, share)
- uid = items[1]
- shareid = items[2]
- #vid = items[3]
- sim_user_set = set('')
- if shareid in user_share_item_dict:
- kuid, kvid = user_share_item_dict[shareid]
- key_info = kvid
- if key_info in item_group_dict:
- sim_user_set = item_group_dict[key_info]
- sim_user_set.add(uid)
- item_group_dict[key_info] = sim_user_set
- else:
- sim_user_set.add(uid)
- item_group_dict[key_info] = sim_user_set
- print(len(item_group_dict))
- f2.close()
- f3 = open("./data/return_item_hour_"+nowdate, 'w')
- for k, v in item_group_dict.items():
- f3.write(k+"\t"+json.dumps(list(v))+"\n")
- f3.close()
- #((user,item), score)
- #print(user_item_dict)
- #2. (uid, [(vid, score)....])
|