common.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. """
  2. @author: luojunhui
  3. """
  4. import os
  5. import json
  6. import uuid
  7. import requests
  8. import urllib.parse
  9. from applications.functions.auto_white import auto_white
  10. from applications.functions.mysql import select, select_sensitive_words
  11. from applications.functions.ask_kimi import ask_kimi
  12. from applications.log import logging
  13. def sensitive_flag(title):
  14. """
  15. 判断标题是否命中过滤词
  16. :param title:
  17. :return:
  18. """
  19. sensitive_words = select_sensitive_words()
  20. for word in sensitive_words:
  21. if word in title:
  22. # title = title.replace(word, "*")
  23. return False
  24. return True
  25. def ask_kimi_and_save_to_local(info_tuple):
  26. """
  27. save file to local
  28. :return:
  29. """
  30. title, trace_id, save_path = info_tuple[0], info_tuple[1], info_tuple[2]
  31. if os.path.exists(save_path):
  32. logging(
  33. code="2001",
  34. info="该 video 信息已经挖掘完成---{}".format(title),
  35. function="ask_kimi_and_save_to_local",
  36. trace_id=trace_id,
  37. )
  38. else:
  39. os.makedirs(os.path.dirname(save_path), exist_ok=True)
  40. if not title:
  41. result = {}
  42. else:
  43. result = ask_kimi(title)
  44. logging(
  45. code="2001",
  46. info="kimi-result",
  47. data=result,
  48. trace_id=trace_id,
  49. function="ask_kimi_and_save_to_local"
  50. )
  51. with open(save_path, "w", encoding="utf-8") as f:
  52. f.write(json.dumps(result, ensure_ascii=False))
  53. def create_gzh_path(video_id, shared_uid):
  54. """
  55. :param video_id: 视频 id
  56. :param shared_uid: 分享 id
  57. """
  58. root_share_id = str(uuid.uuid4())
  59. url = f"pages/user-videos?id={video_id}&su={shared_uid}&fromGzh=1&rootShareId={root_share_id}&shareId={root_share_id}"
  60. # 自动把 root_share_id 加入到白名单
  61. auto_white(root_share_id)
  62. return root_share_id, f"pages/category?jumpPage={urllib.parse.quote(url, safe='')}"
  63. def request_for_info(video_id):
  64. """
  65. 请求数据
  66. :param video_id:
  67. :return:
  68. """
  69. url = "https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/batchSelectVideoInfo"
  70. data = {
  71. "videoIdList": [video_id]
  72. }
  73. header = {
  74. "Content-Type": "application/json",
  75. }
  76. response = requests.post(url, headers=header, data=json.dumps(data))
  77. return response.json()
  78. def choose_video(result):
  79. """
  80. :param result: 计算出来的结果
  81. :return: uid, video_id
  82. """
  83. score1 = result['s1_score']
  84. if score1 > 0:
  85. return result['s1_uid'], result['s1_vid']
  86. else:
  87. return None, None
  88. def find_videos_in_mysql(trace_id):
  89. """
  90. 通过 trace_id去 pq_spider_mysql 搜索视频
  91. :param trace_id:
  92. :return:
  93. """
  94. sql = "select video_id, video_title from crawler_video where out_user_id = '{}' limit 10;".format(trace_id)
  95. out_video_list = select(sql=sql)
  96. if len(out_video_list) > 0:
  97. vid_list = [i[0] for i in out_video_list if i[0] != 0]
  98. vid_list = [vid_list[-1]]
  99. dir_path = os.path.join(os.getcwd(), 'applications', 'static', "out_videos")
  100. os.makedirs(os.path.dirname(dir_path), exist_ok=True)
  101. done_list = os.listdir(dir_path)
  102. process_list = [
  103. (
  104. i[1],
  105. trace_id,
  106. os.path.join(dir_path, "{}.json".format(i[0]))
  107. ) for i in out_video_list if not "{}.json".format(i[0]) in done_list
  108. ]
  109. if process_list:
  110. ask_kimi_and_save_to_local(process_list[0])
  111. logging(
  112. code="2003",
  113. trace_id=trace_id,
  114. info="recall_search_list",
  115. function="find_videos_in_mysql",
  116. data=vid_list
  117. )
  118. return {
  119. "search_videos": "success",
  120. "trace_id": trace_id,
  121. "video_list": vid_list
  122. }
  123. else:
  124. return {
  125. "search_videos": "failed",
  126. "trace_id": trace_id,
  127. "video_list": []
  128. }
  129. def clean_title(strings):
  130. """
  131. :param strings:
  132. :return:
  133. """
  134. return (
  135. strings.strip()
  136. .replace("\n", "")
  137. .replace("/", "")
  138. .replace("\r", "")
  139. .replace("#", "")
  140. .replace(".", "。")
  141. .replace("\\", "")
  142. .replace("&NBSP", "")
  143. .replace(":", "")
  144. .replace("*", "")
  145. .replace("?", "")
  146. .replace("?", "")
  147. .replace('"', "")
  148. .replace("<", "")
  149. .replace(">", "")
  150. .replace("|", "")
  151. .replace(" ", "")
  152. .replace('"', "")
  153. .replace("'", "")
  154. )