Browse Source

修改utc时间
上线research定时任务

罗俊辉 10 months ago
parent
commit
270d5f07f0
4 changed files with 85 additions and 68 deletions
  1. 4 2
      match_rate_app.py
  2. 0 45
      re_search.py
  3. 75 0
      research_app.py
  4. 6 21
      t.py

+ 4 - 2
match_rate_app.py

@@ -3,7 +3,7 @@
 """
 import time
 import schedule
-from datetime import datetime
+from datetime import datetime, timedelta
 
 from functions import MatchRate, RateDetail
 from feishu import Feishu
@@ -20,10 +20,12 @@ def job():
     today_str = datetime.today().strftime("%Y%m%d")
     e_time = MR.generate_today_stamp(today_str)
     s_time = MR.generate_yesterday_stamp(today_str)
+    utc_dt = datetime.utcfromtimestamp(e_time / 1000)
+    beijing_time = utc_dt + timedelta(hours=8)
     result_list = MR.match_rate(start_time_stamp=s_time, end_time_stamp=e_time)
     result_obj = RD.rate_and_error_list(result_list)
     rate_list = [
-        datetime.utcfromtimestamp(e_time / 1000).strftime("%Y%m%d"),
+        beijing_time.strftime("%Y-%m-%d"),
         result_obj['total_count'],
         result_obj['success_count'],
         result_obj['success_count'] / result_obj['total_count'] if result_obj['total_count'] else None,

+ 0 - 45
re_search.py

@@ -1,45 +0,0 @@
-"""
-@author: luojunhui
-"""
-import time
-import json
-import pymysql
-import requests
-
-
-trace_id = "search-5b5343dc-b6a8-4f65-9e6b-e04b9961e530-1716955405"
-sql = f"""select trace_id, article_title, article_text, gh_id, account_name from long_articles_video where trace_id = '{trace_id}';"""
-connection = pymysql.connect(
-        host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
-        port=3306,  # 端口号
-        user="crawler",  # mysql用户名
-        passwd="crawler123456@",  # mysql用户登录密码
-        db="piaoquan-crawler",  # 数据库名
-        charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的,charset指定是utf8
-    )
-cursor = connection.cursor()
-cursor.execute(sql)
-out_video_list = cursor.fetchall()
-result = out_video_list[0]
-params = {
-    "trace_id": result[0],
-    "title": result[1],
-    "ghId": result[3],
-    "content": result[2],
-    "accountName": result[4]
-
-}
-# print(params)
-url = "http://localhost:8111/re_search_videos"
-
-
-a = time.time()
-header = {
-    "Content-Type": "application/json",
-}
-
-response = requests.post(url, json=params, headers=header, timeout=600)
-b = time.time()
-print(response.text)
-print(b - a)
-print(json.dumps(response.json(), ensure_ascii=False, indent=4))

+ 75 - 0
research_app.py

@@ -0,0 +1,75 @@
+"""
+@author: luojunhui
+"""
+import time
+import json
+import schedule
+import pymysql
+import requests
+from datetime import datetime, timedelta
+
+
+def find_defeat_info():
+    """
+    查找失败的视频
+    :return:
+    """
+
+    now_dt = datetime.utcfromtimestamp(int(time.time()) - 1 * 60 * 60)
+    beijing_time = now_dt + timedelta(hours=8)
+    today_dt = datetime.today().strftime("%Y-%m-%d")
+    select_sql = f"""
+    select trace_id, article_title, article_text, gh_id, account_name 
+    from long_articles_video 
+    where update_time < '{beijing_time}' and update_time > '{today_dt}' and success = 0;"""
+    connection = pymysql.connect(
+        host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
+        port=3306,  # 端口号
+        user="crawler",  # mysql用户名
+        passwd="crawler123456@",  # mysql用户登录密码
+        db="piaoquan-crawler",  # 数据库名
+        charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的,charset指定是utf8
+    )
+    cursor = connection.cursor()
+    cursor.execute(select_sql)
+    fail_list = cursor.fetchall()
+    return fail_list
+
+
+def job2():
+    """
+    定时任务
+    :return:
+    """
+    fail_list = find_defeat_info()
+    if fail_list:
+        for result in fail_list:
+            params = {
+                "trace_id": result[0],
+                "title": result[1],
+                "ghId": result[3],
+                "content": result[2],
+                "accountName": result[4]
+
+            }
+            url = "http://61.48.133.26:8111/re_search_videos"
+            a = time.time()
+            header = {
+                "Content-Type": "application/json",
+            }
+
+            response = requests.post(url, json=params, headers=header, timeout=600)
+            b = time.time()
+            print(response.text)
+            print(b - a)
+            print(json.dumps(response.json(), ensure_ascii=False, indent=4))
+            time.sleep(20)
+    else:
+        print("No videos")
+
+
+if __name__ == '__main__':
+    schedule.every().hour.do(job2)
+    while True:
+        schedule.run_pending()
+        time.sleep(1)

+ 6 - 21
t.py

@@ -1,23 +1,8 @@
-import pandas as pd
-import random
+from datetime import datetime, timedelta
+import time
 
-df = pd.read_excel("result.xlsx")
-columns = df.columns
-data_list = df.values.tolist()
-print(len(data_list))
-
-
-# 定义范围
-start, end = 0, 531
-
-# 从1到532中随机取出30个不重复的数字
-random_numbers = random.sample(range(start, end + 1), 30)
-
-result = []
-for i in random_numbers:
-    print(data_list[i])
-    result.append(data_list[i])
-
-out_df = pd.DataFrame(result, columns=columns)
-out_df.to_excel("test.xlsx", index=False)
+ts = time.time()
 
+utc_dt = datetime.utcfromtimestamp(ts)
+beijing_time = utc_dt + timedelta(hours=8)
+print(beijing_time.strftime("%Y%m%d %H:%M:%S"))