|
@@ -0,0 +1,141 @@
|
|
|
|
+#coding utf-8
|
|
|
|
+import sys
|
|
|
|
+from operator import itemgetter
|
|
|
|
+import json
|
|
|
|
+
|
|
|
|
+if __name__=="__main__":
|
|
|
|
+ #1.load data
|
|
|
|
+ nowdate=sys.argv[1]
|
|
|
|
+ f = open("./data/user_item_share_"+nowdate)
|
|
|
|
+ user_item_dict={}
|
|
|
|
+ item_dict = {}
|
|
|
|
+ while True:
|
|
|
|
+ line = f.readline()
|
|
|
|
+ if not line:
|
|
|
|
+ break
|
|
|
|
+ items = line.strip().split("\t")
|
|
|
|
+ if len(items)<3:
|
|
|
|
+ continue
|
|
|
|
+ vid = -1
|
|
|
|
+ try:
|
|
|
|
+ vid = int(items[2])
|
|
|
|
+ except:
|
|
|
|
+ continue
|
|
|
|
+ if vid == -1:
|
|
|
|
+ continue
|
|
|
|
+ key = (items[1],vid)
|
|
|
|
+ #print(key)
|
|
|
|
+ if key not in user_item_dict:
|
|
|
|
+ user_item_dict[key] = 1
|
|
|
|
+ else:
|
|
|
|
+ user_item_dict[key] = user_item_dict[key]+1
|
|
|
|
+ if items[2] not in item_dict:
|
|
|
|
+ item_dict[items[2]] = 1
|
|
|
|
+ else:
|
|
|
|
+ item_dict[items[2]] = item_dict[items[2]]+1
|
|
|
|
+ f.close()
|
|
|
|
+ #((user,item), score)
|
|
|
|
+ #print(user_item_dict)
|
|
|
|
+ #2. (uid, [(vid, score)....])
|
|
|
|
+ user_group_dict = {}
|
|
|
|
+ for k, v in user_item_dict.items():
|
|
|
|
+ uid = k[0]
|
|
|
|
+ vid = k[1]
|
|
|
|
+ score = v
|
|
|
|
+ #if score <3:
|
|
|
|
+ # continue
|
|
|
|
+ vid_list = []
|
|
|
|
+ if uid not in user_group_dict:
|
|
|
|
+ vid_list.append((vid, score))
|
|
|
|
+ user_group_dict[uid] = vid_list
|
|
|
|
+ else:
|
|
|
|
+ vid_list = user_group_dict[uid]
|
|
|
|
+ vid_list.append((vid, score))
|
|
|
|
+ user_group_dict[uid] = vid_list
|
|
|
|
+ #print(user_group_dict)
|
|
|
|
+ item_pair_dict = {}
|
|
|
|
+ #3. expand item
|
|
|
|
+ for k, v_list in user_group_dict.items():
|
|
|
|
+ v_n = len(v_list)
|
|
|
|
+ if v_n<2:
|
|
|
|
+ continue
|
|
|
|
+ for i in range(v_n):
|
|
|
|
+ for j in range(1, v_n):
|
|
|
|
+ if v_list[i][0] == v_list[j][0]:
|
|
|
|
+ continue
|
|
|
|
+ item_key = (v_list[i][0], v_list[j][0])
|
|
|
|
+ item_score = 1
|
|
|
|
+ if item_key not in item_pair_dict:
|
|
|
|
+ item_pair_dict[item_key] = item_score
|
|
|
|
+ else:
|
|
|
|
+ item_pair_dict[item_key] = item_pair_dict[item_key]+item_score
|
|
|
|
+ print(len(item_pair_dict))
|
|
|
|
+ #print(item_pair_dict)
|
|
|
|
+ #print(item_dict)
|
|
|
|
+ left_pair_num = 0
|
|
|
|
+ rec_item_dict = {}
|
|
|
|
+ #4. rec item
|
|
|
|
+ for k, v in item_pair_dict.items():
|
|
|
|
+ if v<3:
|
|
|
|
+ continue
|
|
|
|
+ left_pair_num+=1
|
|
|
|
+ #print(k[0])
|
|
|
|
+ #print(k[1])
|
|
|
|
+
|
|
|
|
+ item1 = int(k[0])
|
|
|
|
+ item2 = int(k[1])
|
|
|
|
+ pair_score = v
|
|
|
|
+ if item1 in item_dict:
|
|
|
|
+ item_score1 = item_dict[item1]
|
|
|
|
+ i2i_pro = float(pair_score)/(float(item_score1)+5)
|
|
|
|
+ if i2i_pro<0.000001:
|
|
|
|
+ continue
|
|
|
|
+ rec_list1 = []
|
|
|
|
+ if item2 not in rec_item_dict:
|
|
|
|
+ rec_list1.append((item1, i2i_pro, pair_score, item_score1))
|
|
|
|
+ rec_item_dict[item2] = rec_list1
|
|
|
|
+ else:
|
|
|
|
+ rec_list1 = rec_item_dict[item2]
|
|
|
|
+ rec_list1.append((item1, i2i_pro, pair_score, item_score1))
|
|
|
|
+ rec_item_dict[item2] = rec_list1
|
|
|
|
+ if item2 in item_dict:
|
|
|
|
+ item_score2 = item_dict[item2]
|
|
|
|
+ i2i_pro = float(pair_score)/(float(item_score2)+5)
|
|
|
|
+ if i2i_pro<0.000001:
|
|
|
|
+ continue
|
|
|
|
+ rec_list2 = []
|
|
|
|
+ if item1 not in rec_item_dict:
|
|
|
|
+ rec_list2.append((item2, i2i_pro, pair_score, item_score2))
|
|
|
|
+ rec_item_dict[item1] = rec_list2
|
|
|
|
+ else:
|
|
|
|
+ rec_list2 = rec_item_dict[item1]
|
|
|
|
+ rec_list2.append((item2, i2i_pro, pair_score, item_score2))
|
|
|
|
+ rec_item_dict[item1] = rec_list2
|
|
|
|
+
|
|
|
|
+ #(item, share_count)
|
|
|
|
+ print(left_pair_num)
|
|
|
|
+ #print(rec_item_dict)
|
|
|
|
+ final_rec_list = []
|
|
|
|
+ #f = open("rec_result", "w")
|
|
|
|
+ #5. sorted item_list
|
|
|
|
+
|
|
|
|
+ for k,v in rec_item_dict.items():
|
|
|
|
+ v_set = set('')
|
|
|
|
+ value_list = v
|
|
|
|
+ dup_list = []
|
|
|
|
+ for item in value_list:
|
|
|
|
+ if item[0] in v_set:
|
|
|
|
+ continue
|
|
|
|
+ v_set.add(item[0])
|
|
|
|
+ #print(item[1])
|
|
|
|
+ #if float(items[1])<0.000001:
|
|
|
|
+ # continue
|
|
|
|
+ dup_list.append(item)
|
|
|
|
+ sorted_v = sorted(dup_list, key=itemgetter(1), reverse=True)
|
|
|
|
+ final_rec_list.append((k, sorted_v))
|
|
|
|
+ #print(final_rec_list[:1])
|
|
|
|
+ #json_str = json.dumps(final_rec_list)
|
|
|
|
+ with open("./data/rec_result_"+nowdate+".json", "w") as f :
|
|
|
|
+ json.dump(final_rec_list, f)
|
|
|
|
+
|
|
|
|
+
|