12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061 |
- #coding utf-8
- import sys
- from operator import itemgetter
- import json
- if __name__=="__main__":
- #1.load data
- nowdate=sys.argv[1]
- f1 = open("./data/user_item_share_"+nowdate)
- user_share_item_dict={}
- user_shareid_dict = {}
- while True:
- line = f1.readline()
- if not line:
- break
- items = line.strip().split("\t")
- if len(items)<4:
- continue
- #(user, share)
- uid = items[1]
- shareid = items[2]
- vid = items[3]
- k_info = uid+"\t"+vid
- user_share_item_dict[k_info] = shareid
- user_shareid_dict[shareid] = (uid,vid)
- f1.close()
- print(len(user_share_item_dict))
- f2 = open("./data/user_item_click_"+nowdate)
- #user_group_dict={}
- share_return_dict = {}
- #item_dict = {}
- while True:
- line = f2.readline()
- if not line:
- break
- items = line.strip().split("\t")
- if len(items)<3:
- continue
- #(user, share)
- uid = items[1]
- shareid = items[2]
- #vid = items[3]
- if shareid not in share_return_dict:
- share_return_dict[shareid] = 1
- else:
- share_return_dict[shareid] += 1
- f2.close()
- user_item_return = {}
- for k, v in user_share_item_dict.items():
- if v in share_return_dict:
- if k not in user_item_return:
- user_item_return[k] = share_return_dict[v]
- else:
- user_item_return[k] += share_return_dict[v]
- f3 = open("./data/user_item_return_count_"+nowdate, 'w')
- for k, v in user_item_return.items():
- f3.write(k+"\t"+str(v)+"\n")
- f3.close()
- #((user,item), score)
- #print(user_item_dict)
- #2. (uid, [(vid, score)....])
|