matchArticle_deal.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. # encoding: utf-8
  2. """
  3. @author: luojunhui
  4. """
  5. import json
  6. import time
  7. import requests
  8. from uuid import uuid4
  9. from spider.baidu_imgs import get_img_list
  10. from applications.config import db_config
  11. from applications.functions import whisper
  12. from applications.pipeline import question_fission, search_materials, summary_articles, generate_text
  13. class MatchArticlesTask(object):
  14. """
  15. 视频匹配文章流程
  16. 流程
  17. 1. 拿视频id,标题等信息匹配账号
  18. 2. 账号匹配成功后,使用 AI Search 获取文章的生产资料
  19. 3. 通过GPT4, 腾讯元宝等AI 优化文章
  20. 4. 生成一篇文章,包含标题,文本,封面, 插图, 以及匹配到到小程序
  21. """
  22. def __init__(self, mysql_client):
  23. """
  24. :param mysql_client mysql服务池
  25. """
  26. self.mysql_client = mysql_client
  27. async def whisper_task(self):
  28. """
  29. 执行定时任务,把库里面的视频转文本
  30. :return:
  31. """
  32. select_sql = f"""SELECT video_id FROM {db_config} WHERE status_code = 0 ORDER BY id ASC limit 1;"""
  33. video_list = await self.mysql_client.select(select_sql)
  34. async def whisper_and_update(video_id, mysql_client):
  35. """
  36. whisper处理视频并且把信息更新到mysql表中
  37. :param video_id:
  38. :param mysql_client:
  39. :return:
  40. """
  41. try:
  42. w_response = whisper(video_id)
  43. except:
  44. w_response = {"text": "whisper failed"}
  45. print(w_response)
  46. text = w_response['text'].replace("'", "")
  47. update_sql = f"""
  48. UPDATE {db_config}
  49. SET
  50. video_text = %s,
  51. status_code = %s
  52. WHERE video_id = %s;
  53. """
  54. print(update_sql)
  55. await mysql_client.async_insert(sql=update_sql, params=(text, 1, video_id))
  56. for vid in video_list:
  57. await whisper_and_update(video_id=vid[0], mysql_client=self.mysql_client)
  58. async def materials_task(self):
  59. """
  60. 获取task的材料
  61. :return:
  62. """
  63. select_sql = f"""SELECT task_id, video_title, video_text FROM {db_config} WHERE status_code = 1 ORDER BY id ASC limit 1;"""
  64. task_list = await self.mysql_client.select(select_sql)
  65. async def find_material(task_tuple, mysql_client):
  66. task_id, title, text = task_tuple
  67. # 先用视频标题作为query, 后续可逐步优化
  68. try:
  69. question_dict = question_fission(title)
  70. material_dict = {}
  71. for question_key in question_dict:
  72. question = question_dict[question_key]
  73. material = generate_text(question)
  74. cleand_question = question.replace("\n", "").replace("*", "").replace("#", "").replace(":", "").replace('"', "").replace("'", "")
  75. material_dict[cleand_question] = material
  76. material_result = json.dumps(material_dict, ensure_ascii=False)
  77. except:
  78. material_result=json.dumps({title: text}, ensure_ascii=False)
  79. update_sql = f"""
  80. UPDATE {db_config}
  81. SET materials = %s, status_code = %s
  82. WHERE task_id = %s;
  83. """
  84. print(update_sql)
  85. await mysql_client.async_insert(sql=update_sql, params=(material_result, 2, task_id))
  86. for task in task_list:
  87. await find_material(task, self.mysql_client)
  88. async def ai_task(self):
  89. """
  90. 通过ai工具和材料来生成文章
  91. :return:
  92. """
  93. select_sql = f"""SELECT task_id, video_title, materials FROM {db_config} WHERE status_code = 2 ORDER BY id ASC limit 1;"""
  94. task_list = await self.mysql_client.select(sql=select_sql)
  95. async def ai_generate_text(task_tuple, mysql_client):
  96. task_id, video_title, materials = task_tuple
  97. try:
  98. ai_title, ai_text = summary_articles(materials)
  99. except:
  100. ai_title, ai_text = video_title, "文章生成失败"
  101. imgs = get_img_list(video_title)
  102. update_sql = f"""
  103. UPDATE {db_config}
  104. SET ai_text = %s, ai_title = %s, cover = %s, img_list = %s, status_code = %s
  105. WHERE task_id = %s;
  106. """
  107. print(update_sql)
  108. await mysql_client.async_insert(
  109. sql=update_sql,
  110. params=(
  111. ai_text,
  112. ai_title,
  113. imgs[0],
  114. json.dumps(imgs, ensure_ascii=False),
  115. 3,
  116. task_id
  117. )
  118. )
  119. for task in task_list:
  120. await ai_generate_text(task, self.mysql_client)
  121. class MatchArticlesV1(object):
  122. """
  123. 接受请求,并且把数据存储到MySQL服务器中
  124. """
  125. def __init__(self, params, mysql_client):
  126. self.title = None
  127. self.video_id = None
  128. self.params = params
  129. self.mysql_client = mysql_client
  130. def check_params(self):
  131. """
  132. params check
  133. """
  134. try:
  135. self.video_id = self.params['videoId']
  136. self.title = self.params['title']
  137. return None
  138. except AttributeError as e:
  139. response = {
  140. "code": 0,
  141. "error": "Params Error",
  142. "msg": "Params: {} is not correct".format(e)
  143. }
  144. return response
  145. async def record(self):
  146. """
  147. 将数据存储到服务中
  148. :return:
  149. """
  150. request_id = "Article_{}_{}".format(uuid4(), int(time.time()))
  151. request_time = int(time.time())
  152. insert_sql = f"""
  153. INSERT INTO {db_config}
  154. (video_id, task_id, video_title, request_time)
  155. VALUES
  156. (%s, %s, %s, %s)
  157. """
  158. await self.mysql_client.async_insert(
  159. sql=insert_sql,
  160. params=(
  161. self.video_id,
  162. request_id,
  163. self.title,
  164. request_time
  165. )
  166. )
  167. return request_id
  168. async def deal(self):
  169. """
  170. deal function
  171. :return:
  172. """
  173. params_error = self.check_params()
  174. if params_error:
  175. return params_error
  176. else:
  177. task_id = await self.record()
  178. res = {
  179. "status": "success",
  180. "task_id": task_id
  181. }
  182. return res
  183. class MatchArticlesV2(object):
  184. """
  185. 获取视频信息
  186. """
  187. def __init__(self, params, mysql_client):
  188. self.task_id = None
  189. self.params = params
  190. self.mysql_client = mysql_client
  191. def check_params(self):
  192. """
  193. params check
  194. """
  195. try:
  196. self.task_id = self.params['taskId']
  197. return None
  198. except AttributeError as e:
  199. response = {
  200. "code": 0,
  201. "error": "Params Error",
  202. "msg": "Params: {} is not correct".format(e)
  203. }
  204. return response
  205. @classmethod
  206. def get_basic_video_info(cls, video_id):
  207. """
  208. 获取视频信息
  209. :return:
  210. """
  211. url = "http://localhost:8888/singleVideo"
  212. body = {
  213. "videoId": video_id
  214. }
  215. headers = {
  216. "Content-Type": "application/json"
  217. }
  218. response = requests.post(url=url, json=body, headers=headers)
  219. return response.json()
  220. async def recall_articles(self):
  221. """
  222. 从表中召回视频
  223. :return:
  224. """
  225. select_sql = f"""
  226. SELECT video_id, cover, img_list, ai_text, video_title, status_code
  227. FROM {db_config}
  228. WHERE task_id = '{self.task_id}';
  229. """
  230. result = await self.mysql_client.select(select_sql)
  231. video_id, cover, images, ai_text, ai_title, status_code = result[0]
  232. match status_code:
  233. case 0:
  234. return {
  235. "task_id": self.task_id,
  236. "code": 0,
  237. "msg": "未处理"
  238. }
  239. case 1:
  240. return {
  241. "task_id": self.task_id,
  242. "code": 1,
  243. "msg": "处理中, 已经用whisper生成视频文本"
  244. }
  245. case 2:
  246. return {
  247. "task_id": self.task_id,
  248. "code": 2,
  249. "msg": "处理中, 已经用AI搜索生成资料"
  250. }
  251. case 3:
  252. result = {
  253. "title": ai_title,
  254. "cover": cover,
  255. "content": ai_text,
  256. "images": json.loads(images),
  257. "videos": [
  258. self.get_basic_video_info(video_id)
  259. ]
  260. }
  261. response = {
  262. "status": "success",
  263. "article": result
  264. }
  265. return response
  266. async def deal(self):
  267. params_error = self.check_params()
  268. if params_error:
  269. return params_error
  270. else:
  271. return await self.recall_articles()