wx_search_task.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. """
  2. @author: luojunhui
  3. """
  4. import json
  5. import time
  6. import requests
  7. from uuid import uuid4
  8. from applications.config import db_config
  9. from applications.functions import whisper
  10. class wxGenerateTask(object):
  11. """
  12. 视频匹配文章流程
  13. 流程
  14. 1. 拿视频id,标题等信息匹配账号
  15. """
  16. def __init__(self, mysql_client):
  17. """
  18. :param mysql_client mysql服务池
  19. """
  20. self.mysql_client = mysql_client
  21. async def whisper_task(self):
  22. """
  23. 执行定时任务,把库里面的视频转文本
  24. :return:
  25. """
  26. select_sql = f"""SELECT video_id FROM {db_config} WHERE status_code = 0 ORDER BY id ASC limit 1;"""
  27. video_list = await self.mysql_client.select(select_sql)
  28. async def whisper_and_update(video_id, mysql_client):
  29. """
  30. whisper处理视频并且把信息更新到mysql表中
  31. :param video_id:
  32. :param mysql_client:
  33. :return:
  34. """
  35. try:
  36. w_response = whisper(video_id)
  37. except:
  38. w_response = {"text": "whisper failed"}
  39. print(w_response)
  40. text = w_response['text'].replace("'", "")
  41. update_sql = f"""
  42. UPDATE {db_config}
  43. SET
  44. video_text = %s,
  45. status_code = %s
  46. WHERE video_id = %s;
  47. """
  48. print(update_sql)
  49. await mysql_client.async_insert(sql=update_sql, params=(text, 1, video_id))
  50. for vid in video_list:
  51. await whisper_and_update(video_id=vid[0], mysql_client=self.mysql_client)
  52. @classmethod
  53. def search_articles(cls, title):
  54. """
  55. search articles in wx
  56. :return:
  57. """
  58. url = "http://8.217.190.241:8888/crawler/wei_xin/keyword"
  59. payload = json.dumps({
  60. "keyword": title,
  61. "cursor": "1"
  62. })
  63. headers = {
  64. 'Content-Type': 'application/json'
  65. }
  66. response = requests.request("POST", url, headers=headers, data=payload)
  67. return response.json()
  68. @classmethod
  69. def get_article_text(cls, content_link):
  70. """
  71. 获取文章
  72. :param content_link:
  73. :return:
  74. """
  75. url = "http://8.217.190.241:8888/crawler/wei_xin/detail"
  76. payload = json.dumps({
  77. "content_link": content_link,
  78. "is_count": False,
  79. "is_ad": False
  80. })
  81. headers = {
  82. 'Content-Type': 'application/json'
  83. }
  84. response = requests.request("POST", url, headers=headers, data=payload)
  85. return response.json()
  86. if __name__ == '__main__':
  87. wgt = wxGenerateTask(mysql_client="client")
  88. text = wgt.get_article_text("https://mp.weixin.qq.com/s/8lk6A3NuGduVb4N6uUaBNg")
  89. img_list = text['data']['data']['image_url_list']
  90. img_list = [i['image_url'] for i in img_list]
  91. cover = img_list[0]
  92. title = text['data']['data']['title']
  93. res = {
  94. "text": text['data']['data']['body_text'],
  95. "title": title,
  96. "cover": cover,
  97. "img_list": img_list
  98. }
  99. print(json.dumps(res, ensure_ascii=False))