|
@@ -0,0 +1,75 @@
|
|
|
+"""
|
|
|
+@author: luojunhui
|
|
|
+"""
|
|
|
+import time
|
|
|
+import json
|
|
|
+import schedule
|
|
|
+import pymysql
|
|
|
+import requests
|
|
|
+from datetime import datetime, timedelta
|
|
|
+
|
|
|
+
|
|
|
+def find_defeat_info():
|
|
|
+ """
|
|
|
+ 查找失败的视频
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+
|
|
|
+ now_dt = datetime.utcfromtimestamp(int(time.time()) - 1 * 60 * 60)
|
|
|
+ beijing_time = now_dt + timedelta(hours=8)
|
|
|
+ today_dt = datetime.today().strftime("%Y-%m-%d")
|
|
|
+ select_sql = f"""
|
|
|
+ select trace_id, article_title, article_text, gh_id, account_name
|
|
|
+ from long_articles_video
|
|
|
+ where update_time < '{beijing_time}' and update_time > '{today_dt}' and success = 0;"""
|
|
|
+ connection = pymysql.connect(
|
|
|
+ host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
|
|
|
+ port=3306, # 端口号
|
|
|
+ user="crawler", # mysql用户名
|
|
|
+ passwd="crawler123456@", # mysql用户登录密码
|
|
|
+ db="piaoquan-crawler", # 数据库名
|
|
|
+ charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
|
|
|
+ )
|
|
|
+ cursor = connection.cursor()
|
|
|
+ cursor.execute(select_sql)
|
|
|
+ fail_list = cursor.fetchall()
|
|
|
+ return fail_list
|
|
|
+
|
|
|
+
|
|
|
+def job2():
|
|
|
+ """
|
|
|
+ 定时任务
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ fail_list = find_defeat_info()
|
|
|
+ if fail_list:
|
|
|
+ for result in fail_list:
|
|
|
+ params = {
|
|
|
+ "trace_id": result[0],
|
|
|
+ "title": result[1],
|
|
|
+ "ghId": result[3],
|
|
|
+ "content": result[2],
|
|
|
+ "accountName": result[4]
|
|
|
+
|
|
|
+ }
|
|
|
+ url = "http://61.48.133.26:8111/re_search_videos"
|
|
|
+ a = time.time()
|
|
|
+ header = {
|
|
|
+ "Content-Type": "application/json",
|
|
|
+ }
|
|
|
+
|
|
|
+ response = requests.post(url, json=params, headers=header, timeout=600)
|
|
|
+ b = time.time()
|
|
|
+ print(response.text)
|
|
|
+ print(b - a)
|
|
|
+ print(json.dumps(response.json(), ensure_ascii=False, indent=4))
|
|
|
+ time.sleep(20)
|
|
|
+ else:
|
|
|
+ print("No videos")
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ schedule.every().hour.do(job2)
|
|
|
+ while True:
|
|
|
+ schedule.run_pending()
|
|
|
+ time.sleep(1)
|