| 
					
				 | 
			
			
				@@ -0,0 +1,75 @@ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+@author: luojunhui 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import time 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import json 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import schedule 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import pymysql 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import requests 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from datetime import datetime, timedelta 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def find_defeat_info(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    查找失败的视频 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    :return: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    now_dt = datetime.utcfromtimestamp(int(time.time()) - 1 * 60 * 60) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    beijing_time = now_dt + timedelta(hours=8) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    today_dt = datetime.today().strftime("%Y-%m-%d") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    select_sql = f""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    select trace_id, article_title, article_text, gh_id, account_name  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    from long_articles_video  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    where update_time < '{beijing_time}' and update_time > '{today_dt}' and success = 0;""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    connection = pymysql.connect( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        port=3306,  # 端口号 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        user="crawler",  # mysql用户名 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        passwd="crawler123456@",  # mysql用户登录密码 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        db="piaoquan-crawler",  # 数据库名 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的,charset指定是utf8 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    cursor = connection.cursor() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    cursor.execute(select_sql) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    fail_list = cursor.fetchall() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return fail_list 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def job2(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    定时任务 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    :return: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    fail_list = find_defeat_info() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if fail_list: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for result in fail_list: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            params = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                "trace_id": result[0], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                "title": result[1], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                "ghId": result[3], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                "content": result[2], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                "accountName": result[4] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            url = "http://61.48.133.26:8111/re_search_videos" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            a = time.time() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            header = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                "Content-Type": "application/json", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            response = requests.post(url, json=params, headers=header, timeout=600) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            b = time.time() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            print(response.text) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            print(b - a) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            print(json.dumps(response.json(), ensure_ascii=False, indent=4)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            time.sleep(20) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print("No videos") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+if __name__ == '__main__': 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    schedule.every().hour.do(job2) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    while True: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        schedule.run_pending() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        time.sleep(1) 
			 |