Explorar o código

change backup url

luojunhui hai 1 mes
pai
achega
6cd35feadb

+ 3 - 2
applications/utils/common.py

@@ -1,6 +1,7 @@
 """
 @author: luojunhui
 """
+
 import hashlib
 
 
@@ -11,11 +12,11 @@ def str_to_md5(strings):
     :return:
     """
     # 将字符串转换为字节
-    original_bytes = strings.encode('utf-8')
+    original_bytes = strings.encode("utf-8")
     # 创建一个md5 hash对象
     md5_hash = hashlib.md5()
     # 更新hash对象,传入原始字节
     md5_hash.update(original_bytes)
     # 获取16进制形式的MD5哈希值
     md5_value = md5_hash.hexdigest()
-    return md5_value
+    return md5_value

+ 24 - 25
applications/utils/download_video.py

@@ -1,6 +1,7 @@
 """
 @author: luojunhui
 """
+
 import os
 import re
 import html
@@ -14,10 +15,7 @@ from fake_useragent import FakeUserAgent
 from applications.utils.common import str_to_md5
 from config import decrypt_key_path
 
-headers = {
-  'Content-Type': 'application/json',
-  'User-Agent': FakeUserAgent().chrome
-}
+headers = {"Content-Type": "application/json", "User-Agent": FakeUserAgent().chrome}
 
 
 def extract_video_url_from_article(article_url):
@@ -27,7 +25,7 @@ def extract_video_url_from_article(article_url):
     """
     response = requests.get(
         url=article_url,
-        headers={'User-Agent': FakeUserAgent().random},
+        headers={"User-Agent": FakeUserAgent().random},
     )
     html_text = response.text
     w = re.search(
@@ -53,18 +51,18 @@ def download_gzh_video(article_url):
         return
     save_path = "static/{}.mp4".format(str_to_md5(video_url))
     headers = {
-        'Accept': '*/*',
-        'Accept-Language': 'zh,zh-CN;q=0.9',
-        'Connection': 'keep-alive',
-        'Origin': 'https://mp.weixin.qq.com',
-        'Referer': 'https://mp.weixin.qq.com/',
-        'Sec-Fetch-Dest': 'video',
-        'Sec-Fetch-Mode': 'cors',
-        'Sec-Fetch-Site': 'cross-site',
-        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
-        'sec-ch-ua': '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
-        'sec-ch-ua-mobile': '?0',
-        'sec-ch-ua-platform': '"macOS"'
+        "Accept": "*/*",
+        "Accept-Language": "zh,zh-CN;q=0.9",
+        "Connection": "keep-alive",
+        "Origin": "https://mp.weixin.qq.com",
+        "Referer": "https://mp.weixin.qq.com/",
+        "Sec-Fetch-Dest": "video",
+        "Sec-Fetch-Mode": "cors",
+        "Sec-Fetch-Site": "cross-site",
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
+        "sec-ch-ua": '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
+        "sec-ch-ua-mobile": "?0",
+        "sec-ch-ua-platform": '"macOS"',
     }
     res = requests.get(video_url, headers=headers)
     with open(save_path, "wb") as f:
@@ -89,7 +87,7 @@ def download_sph_video(download_url, key):
         with requests.get(download_url, headers=headers, stream=True) as response:
             response.raise_for_status()
 
-            with open(encrypted_path, 'wb') as f:
+            with open(encrypted_path, "wb") as f:
                 for chunk in response.iter_content(chunk_size=8192):
                     if chunk:  # filter out keep-alive chunks
                         f.write(chunk)
@@ -109,7 +107,7 @@ def download_sph_video(download_url, key):
         raise RuntimeError(f"Video processing failed: {str(e)}") from e
 
 
-def decrypt_sph_video(video_path: str, key: int,  save_path: str) -> None:
+def decrypt_sph_video(video_path: str, key: int, save_path: str) -> None:
     """
     Decrypt video file using C library.
     Args:
@@ -124,19 +122,20 @@ def decrypt_sph_video(video_path: str, key: int,  save_path: str) -> None:
 
     try:
         lib = ffi.dlopen(decrypt_key_path)
-        ffi.cdef('void decrypt(unsigned char *data, const size_t data_length, const uint32_t key);')
+        ffi.cdef(
+            "void decrypt(unsigned char *data, const size_t data_length, const uint32_t key);"
+        )
 
-        with open(video_path, 'rb') as f:
+        with open(video_path, "rb") as f:
             encrypted_data = f.read()
 
-        c_data = ffi.new('unsigned char[]', list(encrypted_data))
-        lib.decrypt(c_data, 2 ** 17, int(key))
+        c_data = ffi.new("unsigned char[]", list(encrypted_data))
+        lib.decrypt(c_data, 2**17, int(key))
         decrypted_data = bytes(ffi.buffer(c_data, len(encrypted_data))[:])
 
-        with open(save_path, 'wb') as f:
+        with open(save_path, "wb") as f:
             f.write(decrypted_data)
 
     except Exception as e:
         print(traceback.format_exc())
         raise RuntimeError(f"Decryption failed: {str(e)}") from e
-

+ 4 - 3
applications/utils/item.py

@@ -1,10 +1,11 @@
 """
 @author: luojunhui
 """
+
 import time
 
 default_single_video_table_fields = {
-    "platform": 'gzh',
+    "platform": "gzh",
     "article_title": None,
     "content_trace_id": None,
     "read_cnt": 0,
@@ -22,7 +23,7 @@ default_single_video_table_fields = {
     "like_cnt": 0,
     "bad_status": 0,
     "tags": None,
-    "video_oss_path": None
+    "video_oss_path": None,
 }
 
 
@@ -65,4 +66,4 @@ class Item(object):
             case "video":
                 self.check_video_item()
             case "article":
-                self.check_article_item()
+                self.check_article_item()

+ 21 - 20
applications/utils/save_to_db.py

@@ -1,6 +1,7 @@
 """
 @author: luojunhui
 """
+
 import traceback
 from applications.aliyunLogApi import log
 
@@ -19,33 +20,33 @@ def insert_into_single_video_source_table(db_client, video_item):
         db_client.save(
             query=insert_sql,
             params=(
-                video_item['content_trace_id'],
-                video_item['article_title'],
-                video_item['out_account_id'],
-                video_item['out_account_name'],
-                video_item['read_cnt'],
-                video_item['like_cnt'],
-                video_item['article_url'],
-                video_item['cover_url'],
-                video_item['video_oss_path'],
-                video_item['publish_timestamp'],
-                video_item['crawler_timestamp'],
-                video_item['url_unique_md5'],
-                video_item['category'],
-                video_item['tags'],
-                video_item['platform'],
-                video_item['source_account'],
+                video_item["content_trace_id"],
+                video_item["article_title"],
+                video_item["out_account_id"],
+                video_item["out_account_name"],
+                video_item["read_cnt"],
+                video_item["like_cnt"],
+                video_item["article_url"],
+                video_item["cover_url"],
+                video_item["video_oss_path"],
+                video_item["publish_timestamp"],
+                video_item["crawler_timestamp"],
+                video_item["url_unique_md5"],
+                video_item["category"],
+                video_item["tags"],
+                video_item["platform"],
+                video_item["source_account"],
             ),
         )
     except Exception as e:
         log(
-            task="{}_video_crawler".format(video_item['platform']),
+            task="{}_video_crawler".format(video_item["platform"]),
             function="save_each_video",
             message="save video failed",
             data={
                 "error": str(e),
                 "traceback": traceback.format_exc(),
-                "video_id": video_item['url_unique_md5'],
-                "oss_path": video_item['video_oss_path']
-            }
+                "video_id": video_item["url_unique_md5"],
+                "oss_path": video_item["video_oss_path"],
+            },
         )

+ 2 - 1
applications/utils/upload.py

@@ -1,6 +1,7 @@
 """
 @author: luojunhui
 """
+
 import oss2
 from uuid import uuid4
 
@@ -19,4 +20,4 @@ def upload_to_oss(local_video_path):
         oss2.Auth(access_key_id, access_key_secret), endpoint, bucket_name
     )
     bucket.put_object_from_file(key=oss_video_key, filename=local_video_path)
-    return oss_video_key
+    return oss_video_key