app_item2vec_0316.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. # encoding: utf-8
  2. import base64
  3. import json
  4. # from meinheld import server
  5. import flask
  6. from flask import request, Flask
  7. from flask import Flask
  8. from embedding_manager import EmbeddingManager
  9. from embedding_manager_user import EmbeddingManagerUser
  10. import time
  11. import logging
  12. from logging.handlers import TimedRotatingFileHandler
  13. app = Flask(__name__)
  14. def setLog():
  15. log_fmt = '%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s'
  16. formatter = logging.Formatter(log_fmt)
  17. fh = TimedRotatingFileHandler(filename="log/run_faiss_item2vec_server" + str(time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())) + ".log", when="H", interval=1,
  18. backupCount=72)
  19. fh.setFormatter(formatter)
  20. logging.basicConfig(level=logging.INFO)
  21. log = logging.getLogger()
  22. log.addHandler(fh)
  23. setLog()
  24. print("load user embedding")
  25. mgr_user_embedding = EmbeddingManagerUser("/work/xielixun/item2vec-java/tzld_videoids_users_embedding-210419-app.csv",
  26. "userId", "vector")
  27. print("load video embedding")
  28. mgr_video_embedding = EmbeddingManager(
  29. # "/root/xielixun/ant-learn-recsys/datas/tzld_video_embedding-1106-sort.csv",
  30. #"/root/xielixun/ant-learn-recsys/datas/tzld_video_embedding-210222-sort.csv",
  31. "/work/xielixun/item2vec-java/tzld_video_embedding-210419-sort.csv",
  32. "word", "vector")
  33. """
  34. 健康检查
  35. """
  36. @app.route("/healthcheck", methods=['GET'])
  37. def index_health_check():
  38. logging.info("I'm ok")
  39. return "ok"
  40. # 定义路由
  41. @app.route("/ai/v1/user2video", methods=['POST'])
  42. def get_video_by_user_mid2vid():
  43. try:
  44. start_time = time.time()
  45. resParm = flask.request.data
  46. # 转字符串
  47. resParm = str(resParm, encoding="utf-8")
  48. resParm = eval(resParm)
  49. requestId = resParm.get('requestId')
  50. # 服务鉴权
  51. token = resParm.get('token')
  52. if not token:
  53. res = {'code': 3, 'msg': 'token fail'}
  54. logging.error("code: 3 msg: token fail ")
  55. return json.dumps(res)
  56. # 按照debase64进行处理
  57. mid = resParm.get("mid")
  58. vid = resParm.get("vid")
  59. page_size = resParm.get("pageSize")
  60. # 1. 获取该用户的embedding
  61. user_embedding_str = mgr_user_embedding.get_embedding(mid)
  62. user_str = "["
  63. target_video_ids = list()
  64. if user_embedding_str != "":
  65. user_list = user_embedding_str[1:-1].strip('\n').split()
  66. for idx, emb in enumerate(user_list):
  67. if idx < 63:
  68. user_str += emb + ","
  69. else:
  70. user_str += emb + "]"
  71. # 2. 获取该用户看过的视频ID列表
  72. # watch_ids = obj_user_rating.get_user_watched_ids(user_id)
  73. # 3. 使用近邻搜索获取用户可能喜欢的视频ID列表
  74. target_video_ids = mgr_video_embedding.search_ids_by_embedding(user_str,
  75. page_size)
  76. timeUsed = time.time() - start_time
  77. data = {'requestId': requestId, 'videoIds': str(target_video_ids), 'timeUsed': timeUsed, 'mid': mid}
  78. res = {'code': 0, 'msg': 'success', 'data': data}
  79. logging.info(f"code:0 msg:success user2video cost Time is: {str(timeUsed)} ")
  80. return json.dumps(res)
  81. except Exception as x:
  82. logging.exception(x)
  83. res = {'code': 6, 'msg': 'request exception', 'data': {}, 'mid': mid}
  84. return json.dumps(res)
  85. # 定义路由
  86. @app.route("/ai/v1/video2video", methods=['POST'])
  87. def get_video_by_video_vid2vid():
  88. try:
  89. start_time = time.time()
  90. resParm = flask.request.data
  91. # 转字符串
  92. resParm = str(resParm, encoding="utf-8")
  93. resParm = eval(resParm)
  94. requestId = resParm.get('requestId')
  95. # 服务鉴权
  96. token = resParm.get('token')
  97. if not token:
  98. res = {'code': 3, 'msg': 'token fail'}
  99. logging.error("code: 3 msg: token fail ")
  100. return json.dumps(res)
  101. # 按照debase64进行处理
  102. # mid = resParm.get("mid")
  103. vid = resParm.get("vid")
  104. page_size = resParm.get("pageSize")
  105. video_ids = list()
  106. # 查询自己的embedding
  107. video_embedding = mgr_video_embedding.get_embedding(vid)
  108. if video_embedding != "":
  109. # 查询相似的视频
  110. video_ids = mgr_video_embedding.search_ids_by_embedding(video_embedding, page_size)
  111. timeUsed = time.time() - start_time
  112. data = {'requestId': requestId, 'videoIds': str(video_ids), 'timeUsed': timeUsed, 'vid': vid}
  113. res = {'code': 0, 'msg': 'success', 'data': data}
  114. logging.info(f"code:0 msg:success video2video cost Time is: {str(timeUsed)} ")
  115. return json.dumps(res)
  116. except Exception as x:
  117. logging.exception(x)
  118. res = {'code': 6, 'msg': 'request exception', 'data': {}, 'vid': vid}
  119. return json.dumps(res)
  120. @app.route("/ai/v1/videolist2video", methods=['POST'])
  121. def get_video_by_video_vidList2vid():
  122. try:
  123. start_time = time.time()
  124. resParm = flask.request.data
  125. # 转字符串
  126. resParm = str(resParm, encoding="utf-8")
  127. resParm = eval(resParm)
  128. requestId = resParm.get('requestId')
  129. # 服务鉴权
  130. token = resParm.get('token')
  131. if not token:
  132. res = {'code': 3, 'msg': 'token fail'}
  133. logging.error("code: 3 msg: token fail ")
  134. return json.dumps(res)
  135. # 按照debase64进行处理
  136. # mid = resParm.get("mid")
  137. vid_str = resParm.get("vidList")
  138. vid_list = vid_str.split(",")
  139. page_size = resParm.get("pageSize")
  140. video_embedding_list = list()
  141. # 查询自己的embedding
  142. for vid in vid_list:
  143. video_embedding = mgr_video_embedding.get_embedding(vid)
  144. if video_embedding == "":
  145. continue
  146. video_embedding_list.append(json.loads(video_embedding))
  147. # 查询相似的视频
  148. video_ids = list()
  149. if len(video_embedding_list) > 0:
  150. video_ids = mgr_video_embedding.search_ids_by_embedding_list(video_embedding_list, page_size)
  151. timeUsed = time.time() - start_time
  152. data = {'requestId': requestId, 'videoIds': str(video_ids), 'timeUsed': timeUsed, 'vid': vid}
  153. res = {'code': 0, 'msg': 'success', 'data': data}
  154. logging.info(f"code:0 msg:success Faiss videolist2video cost Time is: {str(timeUsed)} ")
  155. return json.dumps(res)
  156. except Exception as x:
  157. logging.exception(x)
  158. res = {'code': 6, 'msg': 'request exception', 'data': {}, 'vid': vid}
  159. return json.dumps(res)
  160. if __name__ == '__main__':
  161. # 启动服务
  162. # app.run(host="0.0.0.0", port=9996) # test
  163. #app.run(host="0.0.0.0", port=9997) # dssm
  164. app.run(host="0.0.0.0", port=9999) # item2vec