06_calReturn.py 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. #coding utf-8
  2. import sys
  3. from operator import itemgetter
  4. import json
  5. if __name__=="__main__":
  6. #1.load data
  7. nowdate=sys.argv[1]
  8. f1 = open("./data/user_item_share_"+nowdate)
  9. user_share_item_dict={}
  10. user_shareid_dict = {}
  11. while True:
  12. line = f1.readline()
  13. if not line:
  14. break
  15. items = line.strip().split("\t")
  16. if len(items)<4:
  17. continue
  18. #(user, share)
  19. uid = items[1]
  20. shareid = items[2]
  21. vid = items[3]
  22. k_info = uid+"\t"+vid
  23. user_share_item_dict[k_info] = shareid
  24. user_shareid_dict[shareid] = (uid,vid)
  25. f1.close()
  26. print(len(user_share_item_dict))
  27. f2 = open("./data/user_item_click_"+nowdate)
  28. #user_group_dict={}
  29. share_return_dict = {}
  30. #item_dict = {}
  31. while True:
  32. line = f2.readline()
  33. if not line:
  34. break
  35. items = line.strip().split("\t")
  36. if len(items)<3:
  37. continue
  38. #(user, share)
  39. uid = items[1]
  40. shareid = items[2]
  41. #vid = items[3]
  42. if shareid not in share_return_dict:
  43. share_return_dict[shareid] = 1
  44. else:
  45. share_return_dict[shareid] += 1
  46. f2.close()
  47. user_item_return = {}
  48. for k, v in user_share_item_dict.items():
  49. if v in share_return_dict:
  50. if k not in user_item_return:
  51. user_item_return[k] = share_return_dict[v]
  52. else:
  53. user_item_return[k] += share_return_dict[v]
  54. f3 = open("./data/user_item_return_count_"+nowdate, 'w')
  55. for k, v in user_item_return.items():
  56. f3.write(k+"\t"+str(v)+"\n")
  57. f3.close()
  58. #((user,item), score)
  59. #print(user_item_dict)
  60. #2. (uid, [(vid, score)....])