123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475 |
- """
- @author: luojunhui
- """
- import time
- import json
- import schedule
- import pymysql
- import requests
- from datetime import datetime, timedelta
- def find_defeat_info():
- """
- 查找失败的视频
- :return:
- """
- now_dt = datetime.utcfromtimestamp(int(time.time()) - 1 * 60 * 60)
- beijing_time = now_dt + timedelta(hours=8)
- today_dt = datetime.today().strftime("%Y-%m-%d")
- select_sql = f"""
- select trace_id, article_title, article_text, gh_id, account_name
- from long_articles_video
- where update_time < '{beijing_time}' and update_time > '{today_dt}' and success = 0;"""
- connection = pymysql.connect(
- host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
- port=3306, # 端口号
- user="crawler", # mysql用户名
- passwd="crawler123456@", # mysql用户登录密码
- db="piaoquan-crawler", # 数据库名
- charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
- )
- cursor = connection.cursor()
- cursor.execute(select_sql)
- fail_list = cursor.fetchall()
- return fail_list
- def job2():
- """
- 定时任务
- :return:
- """
- fail_list = find_defeat_info()
- if fail_list:
- for result in fail_list:
- params = {
- "trace_id": result[0],
- "title": result[1],
- "ghId": result[3],
- "content": result[2],
- "accountName": result[4]
- }
- url = "http://61.48.133.26:8111/re_search_videos"
- a = time.time()
- header = {
- "Content-Type": "application/json",
- }
- response = requests.post(url, json=params, headers=header, timeout=600)
- b = time.time()
- print(response.text)
- print(b - a)
- print(json.dumps(response.json(), ensure_ascii=False, indent=4))
- time.sleep(20)
- else:
- print("No videos")
- if __name__ == '__main__':
- schedule.every().hour.do(job2)
- while True:
- schedule.run_pending()
- time.sleep(1)
|