07_getI2ICTRGroup.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. #coding utf-8
  2. import sys
  3. import json
  4. import math
  5. from operator import attrgetter
  6. if __name__=="__main__":
  7. f = open(sys.argv[1])
  8. exp_item_dict = {}
  9. while True:
  10. line = f.readline()
  11. if not line:
  12. break
  13. items = line.strip().split("\t")
  14. if len(items)<3:
  15. continue
  16. mid = items[1]
  17. vid = items[2]
  18. key_info = mid+"\t"+vid
  19. if key_info in exp_item_dict:
  20. exp_item_dict[key_info]+=1
  21. else:
  22. exp_item_dict[key_info] = 1
  23. f.close()
  24. return_item_dict = {}
  25. f1 = open(sys.argv[2])
  26. #f2 = open(sys.argv[3], 'w')
  27. while True:
  28. line = f1.readline()
  29. if not line:
  30. break
  31. items = line.strip().split("\t")
  32. if len(items)<3:
  33. continue
  34. uid = items[0]
  35. vid = items[1]
  36. return_count = items[2]
  37. key_info = uid+"\t"+vid
  38. if key_info not in return_item_dict:
  39. return_item_dict[key_info] = 1
  40. else:
  41. return_item_dict[key_info]+=1
  42. f1.close()
  43. f2 = open(sys.argv[3])
  44. f3 = open(sys.argv[4], 'w')
  45. while True:
  46. line = f2.readline()
  47. if not line:
  48. break
  49. items = line.strip().split("\t")
  50. if len(items)<3:
  51. continue
  52. mvid = items[0]
  53. vid_list = json.loads(items[1])
  54. user_list = json.loads(items[2])
  55. rec_item_dict = {}
  56. #rec_item_list = []
  57. for vid_info in vid_list:
  58. rec_vid = vid_info[0]
  59. if rec_vid == mvid:
  60. continue
  61. score1 = float(vid_info[1])
  62. rec_k_exp = 0
  63. rec_k_return = 0
  64. for uid in user_list:
  65. rec_k = uid+"\t"+rec_vid
  66. if rec_k in exp_item_dict:
  67. rec_k_exp +=exp_item_dict[rec_k]
  68. if rec_k in return_item_dict:
  69. rec_k_return += return_item_dict[rec_k]
  70. score2 = 0.0000000000001
  71. if rec_k_exp>0:
  72. score2 = float(rec_k_return)/float(rec_k_exp)
  73. if score1 == 0:
  74. score1 = 0.000000000000001
  75. else:
  76. score1 = math.log(score1)
  77. #if rec_vid in item_dict:
  78. # score2 = float(item_dict[rec_vid][0])
  79. return_score = 0.000000000000001
  80. #print("rec_k_return:", rec_k_return)
  81. if rec_k_return>0:
  82. return_score=math.log(rec_k_return+1)
  83. score = score1*score2
  84. #score = return_score*score2
  85. if score<=0.0:
  86. continue
  87. #rec_item_list = []
  88. rec_vid = int(rec_vid)
  89. if rec_vid not in rec_item_dict:
  90. #rec_item_list.append((rec_vid,score))
  91. rec_item_dict[rec_vid] = score
  92. else:
  93. rec_item_dict[rec_vid] += score
  94. rec_item_list2= sorted(rec_item_dict.items(), key=lambda s:s[1], reverse=True)
  95. #print(rec_item_list2)
  96. #rec_list = list(rec_item_dict.values())
  97. #for k, v in rec_item_dict.items():
  98. #rec_item_list2= sorted(rec_item_dict.items(), key=lambda s:s[1][1], reverse=True)
  99. f3.write(str(mvid)+"\t"+json.dumps(rec_item_list2[:100])+"\n")
  100. #f1.close()
  101. f3.close()