matchArticle_deal.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. # encoding: utf-8
  2. """
  3. @author: luojunhui
  4. """
  5. import json
  6. import time
  7. import requests
  8. from uuid import uuid4
  9. from applications.config import db_config
  10. from applications.functions import whisper
  11. from applications.pipeline import question_fission, search_materials, summary_articles, generate_text
  12. class MatchArticlesTask(object):
  13. """
  14. 视频匹配文章流程
  15. 流程
  16. 1. 拿视频id,标题等信息匹配账号
  17. 2. 账号匹配成功后,使用 AI Search 获取文章的生产资料
  18. 3. 通过GPT4, 腾讯元宝等AI 优化文章
  19. 4. 生成一篇文章,包含标题,文本,封面, 插图, 以及匹配到到小程序
  20. """
  21. def __init__(self, mysql_client):
  22. """
  23. :param mysql_client mysql服务池
  24. """
  25. self.mysql_client = mysql_client
  26. async def whisper_task(self):
  27. """
  28. 执行定时任务,把库里面的视频转文本
  29. :return:
  30. """
  31. select_sql = f"""SELECT video_id FROM {db_config} WHERE status_code = 0 ORDER BY id ASC limit 10;"""
  32. video_list = await self.mysql_client.select(select_sql)
  33. async def whisper_and_update(video_id, mysql_client):
  34. """
  35. whisper处理视频并且把信息更新到mysql表中
  36. :param video_id:
  37. :param mysql_client:
  38. :return:
  39. """
  40. w_response = whisper(video_id)
  41. print(w_response)
  42. text = w_response['text']
  43. update_sql = f"""
  44. UPDATE {db_config}
  45. SET
  46. video_text = '{text}',
  47. status_code = 1
  48. WHERE video_id = {video_id};
  49. """
  50. await mysql_client.async_insert(sql=update_sql)
  51. for vid in video_list:
  52. await whisper_and_update(video_id=vid[0], mysql_client=self.mysql_client)
  53. async def materials_task(self):
  54. """
  55. 获取task的材料
  56. :return:
  57. """
  58. select_sql = f"""SELECT task_id, video_title, video_text FROM {db_config} WHERE status_code = 1 ORDER BY id ASC limit 10;"""
  59. task_list = await self.mysql_client.select(select_sql)
  60. async def find_material(task_tuple, mysql_client):
  61. task_id, title, text = task_tuple
  62. # 先用视频标题作为query, 后续可逐步优化
  63. question_dict = question_fission(title)
  64. material_dict = {}
  65. for question_key in question_dict:
  66. question = question_dict[question_key]
  67. material = generate_text(question)
  68. material_dict[question] = material_dict
  69. material_result = json.dumps(material_dict, ensure_ascii=False)
  70. update_sql = f"""
  71. UPDATE {db_config}
  72. SET materials = '{material_result}', status_code = 2
  73. WHERE task_id = '{task_id}'
  74. """
  75. await mysql_client.async_insert(sql=update_sql)
  76. for task in task_list:
  77. await find_material(task, self.mysql_client)
  78. async def ai_task(self):
  79. """
  80. 通过ai工具和材料来生成文章
  81. :return:
  82. """
  83. select_sql = f"""SELECT task_id, video_title, materials FROM '{db_config}' WHERE status_code = 2 ORDER BY id ASC limit 10;"""
  84. task_list = await self.mysql_client.select(sql=select_sql)
  85. async def ai_generate_text(task_tuple, mysql_client):
  86. task_id, video_title, materials = task_tuple
  87. imgs, ai_title, ai_text = summary_articles(materials)
  88. update_sql = f"""
  89. UPDATE {db_config}
  90. SET ai_text = '{ai_text}', ai_title = '{ai_title}', img_list = '{json.dumps(imgs, ensure_ascii=False)}',status_code = 3
  91. WHERE task_id = '{task_id}';
  92. """
  93. for task in task_list:
  94. await ai_generate_text(task, self.mysql_client)
  95. class MatchArticlesV1(object):
  96. """
  97. 接受请求,并且把数据存储到MySQL服务器中
  98. """
  99. def __init__(self, params, mysql_client):
  100. self.title = None
  101. self.video_id = None
  102. self.params = params
  103. self.mysql_client = mysql_client
  104. def check_params(self):
  105. """
  106. params check
  107. """
  108. try:
  109. self.video_id = self.params['videoId']
  110. self.title = self.params['title']
  111. return None
  112. except AttributeError as e:
  113. response = {
  114. "code": 0,
  115. "error": "Params Error",
  116. "msg": "Params: {} is not correct".format(e)
  117. }
  118. return response
  119. async def record(self):
  120. """
  121. 将数据存储到服务中
  122. :return:
  123. """
  124. request_id = "Article_{}_{}".format(uuid4(), int(time.time()))
  125. request_time = int(time.time())
  126. insert_sql = f"""
  127. INSERT INTO {db_config}
  128. (video_id, task_id, video_title, request_time)
  129. VALUES
  130. ({self.video_id}, '{request_id}', '{self.title}', {request_time})
  131. """
  132. await self.mysql_client.async_insert(sql=insert_sql)
  133. return request_id
  134. async def deal(self):
  135. """
  136. deal function
  137. :return:
  138. """
  139. params_error = self.check_params()
  140. if params_error:
  141. return params_error
  142. else:
  143. task_id = await self.record()
  144. res = {
  145. "status": "success",
  146. "task_id": task_id
  147. }
  148. return res
  149. class MatchArticlesV2(object):
  150. """
  151. 获取视频信息
  152. """
  153. def __init__(self, params, mysql_client):
  154. self.task_id = None
  155. self.params = params
  156. self.mysql_client = mysql_client
  157. def check_params(self):
  158. """
  159. params check
  160. """
  161. try:
  162. self.task_id = self.params['taskId']
  163. return None
  164. except AttributeError as e:
  165. response = {
  166. "code": 0,
  167. "error": "Params Error",
  168. "msg": "Params: {} is not correct".format(e)
  169. }
  170. return response
  171. @classmethod
  172. def get_basic_video_info(cls, video_id):
  173. """
  174. 获取视频信息
  175. :return:
  176. """
  177. url = "http://localhost:8888/singleVideo"
  178. body = {
  179. "videoId": video_id
  180. }
  181. headers = {
  182. "Content-Type": "application/json"
  183. }
  184. response = requests.post(url=url, json=body, headers=headers)
  185. return response.json()
  186. async def recall_articles(self):
  187. """
  188. 从表中召回视频
  189. :return:
  190. """
  191. select_sql = f"""
  192. SELECT video_id, cover, img_list, ai_text, ai_title, status_code
  193. FROM {db_config}
  194. WHERE task_id = '{self.task_id}';
  195. """
  196. print(select_sql)
  197. result = await self.mysql_client.select(select_sql)
  198. print(result)
  199. video_id, cover, images, ai_text, ai_title, status_code = result[0]
  200. match status_code:
  201. case 0:
  202. return {
  203. "task_id": self.task_id,
  204. "code": 0,
  205. "msg": "未处理"
  206. }
  207. case 1:
  208. return {
  209. "task_id": self.task_id,
  210. "code": 1,
  211. "msg": "处理中, 已经用whisper生成视频文本"
  212. }
  213. case 2:
  214. return {
  215. "task_id": self.task_id,
  216. "code": 2,
  217. "msg": "处理中, 已经用AI搜索生成资料"
  218. }
  219. case 3:
  220. result = {
  221. "title": ai_title,
  222. "cover": cover,
  223. "content": ai_text,
  224. "images": images,
  225. "videos": [
  226. self.get_basic_video_info(video_id)
  227. ]
  228. }
  229. response = {
  230. "status": "success",
  231. "article": result
  232. }
  233. return response