123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114 |
- """
- @author: luojunhui
- """
- import json
- import time
- import requests
- from uuid import uuid4
- from applications.config import db_config
- from applications.functions import whisper
- class wxGenerateTask(object):
- """
- 视频匹配文章流程
- 流程
- 1. 拿视频id,标题等信息匹配账号
- """
- def __init__(self, mysql_client):
- """
- :param mysql_client mysql服务池
- """
- self.mysql_client = mysql_client
- async def whisper_task(self):
- """
- 执行定时任务,把库里面的视频转文本
- :return:
- """
- select_sql = f"""SELECT video_id FROM {db_config} WHERE status_code = 0 ORDER BY id ASC limit 1;"""
- video_list = await self.mysql_client.select(select_sql)
- async def whisper_and_update(video_id, mysql_client):
- """
- whisper处理视频并且把信息更新到mysql表中
- :param video_id:
- :param mysql_client:
- :return:
- """
- try:
- w_response = whisper(video_id)
- except:
- w_response = {"text": "whisper failed"}
- print(w_response)
- text = w_response['text'].replace("'", "")
- update_sql = f"""
- UPDATE {db_config}
- SET
- video_text = %s,
- status_code = %s
- WHERE video_id = %s;
- """
- print(update_sql)
- await mysql_client.async_insert(sql=update_sql, params=(text, 1, video_id))
- for vid in video_list:
- await whisper_and_update(video_id=vid[0], mysql_client=self.mysql_client)
- @classmethod
- def search_articles(cls, title):
- """
- search articles in wx
- :return:
- """
- url = "http://8.217.190.241:8888/crawler/wei_xin/keyword"
- payload = json.dumps({
- "keyword": title,
- "cursor": "1"
- })
- headers = {
- 'Content-Type': 'application/json'
- }
- response = requests.request("POST", url, headers=headers, data=payload)
- return response.json()
- @classmethod
- def get_article_text(cls, content_link):
- """
- 获取文章
- :param content_link:
- :return:
- """
- url = "http://8.217.190.241:8888/crawler/wei_xin/detail"
- payload = json.dumps({
- "content_link": content_link,
- "is_count": False,
- "is_ad": False
- })
- headers = {
- 'Content-Type': 'application/json'
- }
- response = requests.request("POST", url, headers=headers, data=payload)
- return response.json()
- if __name__ == '__main__':
- wgt = wxGenerateTask(mysql_client="client")
- text = wgt.get_article_text("https://mp.weixin.qq.com/s/8lk6A3NuGduVb4N6uUaBNg")
- img_list = text['data']['data']['image_url_list']
- img_list = [i['image_url'] for i in img_list]
- cover = img_list[0]
- title = text['data']['data']['title']
- res = {
- "text": text['data']['data']['body_text'],
- "title": title,
- "cover": cover,
- "img_list": img_list
- }
- print(json.dumps(res, ensure_ascii=False))
|