Browse Source

西瓜_author上线

罗俊辉 1 year ago
parent
commit
31d4d6c383

+ 1 - 3
xigua/xigua_author/__init__.py

@@ -1,3 +1 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/5/26
+from .xigua_author import XiGuaAuthor

+ 924 - 0
xigua/xigua_author/xigua_author.py

@@ -0,0 +1,924 @@
+import json
+import os
+import random
+import sys
+import string
+import time
+import uuid
+import base64
+import requests
+from fake_useragent import FakeUserAgent
+
+from common.mq import MQ
+
+sys.path.append(os.getcwd())
+from common import AliyunLogger, PiaoQuanPipeline
+
+
+def tunnel_proxies():
+    # 隧道域名:端口号
+    tunnel = "q796.kdltps.com:15818"
+
+    # 用户名密码方式
+    username = "t17772369458618"
+    password = "5zqcjkmy"
+    tunnel_proxies = {
+        "http": "http://%(user)s:%(pwd)s@%(proxy)s/"
+        % {"user": username, "pwd": password, "proxy": tunnel},
+        "https": "http://%(user)s:%(pwd)s@%(proxy)s/"
+        % {"user": username, "pwd": password, "proxy": tunnel},
+    }
+
+    return tunnel_proxies
+
+
+def random_signature():
+    src_digits = string.digits  # string_数字
+    src_uppercase = string.ascii_uppercase  # string_大写字母
+    src_lowercase = string.ascii_lowercase  # string_小写字母
+    digits_num = random.randint(1, 6)
+    uppercase_num = random.randint(1, 26 - digits_num - 1)
+    lowercase_num = 26 - (digits_num + uppercase_num)
+    password = (
+        random.sample(src_digits, digits_num)
+        + random.sample(src_uppercase, uppercase_num)
+        + random.sample(src_lowercase, lowercase_num)
+    )
+    random.shuffle(password)
+    new_password = "AAAAAAAAAA" + "".join(password)[10:-4] + "AAAB"
+    new_password_start = new_password[0:18]
+    new_password_end = new_password[-7:]
+    if new_password[18] == "8":
+        new_password = new_password_start + "w" + new_password_end
+    elif new_password[18] == "9":
+        new_password = new_password_start + "x" + new_password_end
+    elif new_password[18] == "-":
+        new_password = new_password_start + "y" + new_password_end
+    elif new_password[18] == ".":
+        new_password = new_password_start + "z" + new_password_end
+    else:
+        new_password = new_password_start + "y" + new_password_end
+    return new_password
+
+
+def get_video_url(video_info):
+    video_url_dict = {}
+    # video_url
+    if "videoResource" not in video_info:
+        video_url_dict["video_url"] = ""
+        video_url_dict["audio_url"] = ""
+        video_url_dict["video_width"] = 0
+        video_url_dict["video_height"] = 0
+
+    elif "dash_120fps" in video_info["videoResource"]:
+        if (
+            "video_list" in video_info["videoResource"]["dash_120fps"]
+            and "video_4" in video_info["videoResource"]["dash_120fps"]["video_list"]
+        ):
+            video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_4"
+            ]["backup_url_1"]
+            audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_4"
+            ]["backup_url_1"]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_4"
+            ]["vwidth"]
+            video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_4"
+            ]["vheight"]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+        elif (
+            "video_list" in video_info["videoResource"]["dash_120fps"]
+            and "video_3" in video_info["videoResource"]["dash_120fps"]["video_list"]
+        ):
+            video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_3"
+            ]["backup_url_1"]
+            audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_3"
+            ]["backup_url_1"]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_3"
+            ]["vwidth"]
+            video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_3"
+            ]["vheight"]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+        elif (
+            "video_list" in video_info["videoResource"]["dash_120fps"]
+            and "video_2" in video_info["videoResource"]["dash_120fps"]["video_list"]
+        ):
+            video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_2"
+            ]["backup_url_1"]
+            audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_2"
+            ]["backup_url_1"]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_2"
+            ]["vwidth"]
+            video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_2"
+            ]["vheight"]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+        elif (
+            "video_list" in video_info["videoResource"]["dash_120fps"]
+            and "video_1" in video_info["videoResource"]["dash_120fps"]["video_list"]
+        ):
+            video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_1"
+            ]["backup_url_1"]
+            audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_1"
+            ]["backup_url_1"]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_1"
+            ]["vwidth"]
+            video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_1"
+            ]["vheight"]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+
+        elif (
+            "dynamic_video" in video_info["videoResource"]["dash_120fps"]
+            and "dynamic_video_list"
+            in video_info["videoResource"]["dash_120fps"]["dynamic_video"]
+            and "dynamic_audio_list"
+            in video_info["videoResource"]["dash_120fps"]["dynamic_video"]
+            and len(
+                video_info["videoResource"]["dash_120fps"]["dynamic_video"][
+                    "dynamic_video_list"
+                ]
+            )
+            != 0
+            and len(
+                video_info["videoResource"]["dash_120fps"]["dynamic_video"][
+                    "dynamic_audio_list"
+                ]
+            )
+            != 0
+        ):
+            video_url = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
+                "dynamic_video_list"
+            ][-1]["backup_url_1"]
+            audio_url = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
+                "dynamic_audio_list"
+            ][-1]["backup_url_1"]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
+                "dynamic_video_list"
+            ][-1]["vwidth"]
+            video_height = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
+                "dynamic_video_list"
+            ][-1]["vheight"]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+        else:
+            video_url_dict["video_url"] = ""
+            video_url_dict["audio_url"] = ""
+            video_url_dict["video_width"] = 0
+            video_url_dict["video_height"] = 0
+
+    elif "dash" in video_info["videoResource"]:
+        if (
+            "video_list" in video_info["videoResource"]["dash"]
+            and "video_4" in video_info["videoResource"]["dash"]["video_list"]
+        ):
+            video_url = video_info["videoResource"]["dash"]["video_list"]["video_4"][
+                "backup_url_1"
+            ]
+            audio_url = video_info["videoResource"]["dash"]["video_list"]["video_4"][
+                "backup_url_1"
+            ]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["dash"]["video_list"]["video_4"][
+                "vwidth"
+            ]
+            video_height = video_info["videoResource"]["dash"]["video_list"]["video_4"][
+                "vheight"
+            ]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+        elif (
+            "video_list" in video_info["videoResource"]["dash"]
+            and "video_3" in video_info["videoResource"]["dash"]["video_list"]
+        ):
+            video_url = video_info["videoResource"]["dash"]["video_list"]["video_3"][
+                "backup_url_1"
+            ]
+            audio_url = video_info["videoResource"]["dash"]["video_list"]["video_3"][
+                "backup_url_1"
+            ]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["dash"]["video_list"]["video_3"][
+                "vwidth"
+            ]
+            video_height = video_info["videoResource"]["dash"]["video_list"]["video_3"][
+                "vheight"
+            ]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+        elif (
+            "video_list" in video_info["videoResource"]["dash"]
+            and "video_2" in video_info["videoResource"]["dash"]["video_list"]
+        ):
+            video_url = video_info["videoResource"]["dash"]["video_list"]["video_2"][
+                "backup_url_1"
+            ]
+            audio_url = video_info["videoResource"]["dash"]["video_list"]["video_2"][
+                "backup_url_1"
+            ]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["dash"]["video_list"]["video_2"][
+                "vwidth"
+            ]
+            video_height = video_info["videoResource"]["dash"]["video_list"]["video_2"][
+                "vheight"
+            ]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+        elif (
+            "video_list" in video_info["videoResource"]["dash"]
+            and "video_1" in video_info["videoResource"]["dash"]["video_list"]
+        ):
+            video_url = video_info["videoResource"]["dash"]["video_list"]["video_1"][
+                "backup_url_1"
+            ]
+            audio_url = video_info["videoResource"]["dash"]["video_list"]["video_1"][
+                "backup_url_1"
+            ]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["dash"]["video_list"]["video_1"][
+                "vwidth"
+            ]
+            video_height = video_info["videoResource"]["dash"]["video_list"]["video_1"][
+                "vheight"
+            ]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+
+        elif (
+            "dynamic_video" in video_info["videoResource"]["dash"]
+            and "dynamic_video_list"
+            in video_info["videoResource"]["dash"]["dynamic_video"]
+            and "dynamic_audio_list"
+            in video_info["videoResource"]["dash"]["dynamic_video"]
+            and len(
+                video_info["videoResource"]["dash"]["dynamic_video"][
+                    "dynamic_video_list"
+                ]
+            )
+            != 0
+            and len(
+                video_info["videoResource"]["dash"]["dynamic_video"][
+                    "dynamic_audio_list"
+                ]
+            )
+            != 0
+        ):
+            video_url = video_info["videoResource"]["dash"]["dynamic_video"][
+                "dynamic_video_list"
+            ][-1]["backup_url_1"]
+            audio_url = video_info["videoResource"]["dash"]["dynamic_video"][
+                "dynamic_audio_list"
+            ][-1]["backup_url_1"]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["dash"]["dynamic_video"][
+                "dynamic_video_list"
+            ][-1]["vwidth"]
+            video_height = video_info["videoResource"]["dash"]["dynamic_video"][
+                "dynamic_video_list"
+            ][-1]["vheight"]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+        else:
+            video_url_dict["video_url"] = ""
+            video_url_dict["audio_url"] = ""
+            video_url_dict["video_width"] = 0
+            video_url_dict["video_height"] = 0
+
+    elif "normal" in video_info["videoResource"]:
+        if (
+            "video_list" in video_info["videoResource"]["normal"]
+            and "video_4" in video_info["videoResource"]["normal"]["video_list"]
+        ):
+            video_url = video_info["videoResource"]["normal"]["video_list"]["video_4"][
+                "backup_url_1"
+            ]
+            audio_url = video_info["videoResource"]["normal"]["video_list"]["video_4"][
+                "backup_url_1"
+            ]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["normal"]["video_list"][
+                "video_4"
+            ]["vwidth"]
+            video_height = video_info["videoResource"]["normal"]["video_list"][
+                "video_4"
+            ]["vheight"]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+        elif (
+            "video_list" in video_info["videoResource"]["normal"]
+            and "video_3" in video_info["videoResource"]["normal"]["video_list"]
+        ):
+            video_url = video_info["videoResource"]["normal"]["video_list"]["video_3"][
+                "backup_url_1"
+            ]
+            audio_url = video_info["videoResource"]["normal"]["video_list"]["video_3"][
+                "backup_url_1"
+            ]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["normal"]["video_list"][
+                "video_3"
+            ]["vwidth"]
+            video_height = video_info["videoResource"]["normal"]["video_list"][
+                "video_3"
+            ]["vheight"]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+        elif (
+            "video_list" in video_info["videoResource"]["normal"]
+            and "video_2" in video_info["videoResource"]["normal"]["video_list"]
+        ):
+            video_url = video_info["videoResource"]["normal"]["video_list"]["video_2"][
+                "backup_url_1"
+            ]
+            audio_url = video_info["videoResource"]["normal"]["video_list"]["video_2"][
+                "backup_url_1"
+            ]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["normal"]["video_list"][
+                "video_2"
+            ]["vwidth"]
+            video_height = video_info["videoResource"]["normal"]["video_list"][
+                "video_2"
+            ]["vheight"]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+        elif (
+            "video_list" in video_info["videoResource"]["normal"]
+            and "video_1" in video_info["videoResource"]["normal"]["video_list"]
+        ):
+            video_url = video_info["videoResource"]["normal"]["video_list"]["video_1"][
+                "backup_url_1"
+            ]
+            audio_url = video_info["videoResource"]["normal"]["video_list"]["video_1"][
+                "backup_url_1"
+            ]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["normal"]["video_list"][
+                "video_1"
+            ]["vwidth"]
+            video_height = video_info["videoResource"]["normal"]["video_list"][
+                "video_1"
+            ]["vheight"]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+
+        elif (
+            "dynamic_video" in video_info["videoResource"]["normal"]
+            and "dynamic_video_list"
+            in video_info["videoResource"]["normal"]["dynamic_video"]
+            and "dynamic_audio_list"
+            in video_info["videoResource"]["normal"]["dynamic_video"]
+            and len(
+                video_info["videoResource"]["normal"]["dynamic_video"][
+                    "dynamic_video_list"
+                ]
+            )
+            != 0
+            and len(
+                video_info["videoResource"]["normal"]["dynamic_video"][
+                    "dynamic_audio_list"
+                ]
+            )
+            != 0
+        ):
+            video_url = video_info["videoResource"]["normal"]["dynamic_video"][
+                "dynamic_video_list"
+            ][-1]["backup_url_1"]
+            audio_url = video_info["videoResource"]["normal"]["dynamic_video"][
+                "dynamic_audio_list"
+            ][-1]["backup_url_1"]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["normal"]["dynamic_video"][
+                "dynamic_video_list"
+            ][-1]["vwidth"]
+            video_height = video_info["videoResource"]["normal"]["dynamic_video"][
+                "dynamic_video_list"
+            ][-1]["vheight"]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+        else:
+            video_url_dict["video_url"] = ""
+            video_url_dict["audio_url"] = ""
+            video_url_dict["video_width"] = 0
+            video_url_dict["video_height"] = 0
+
+    else:
+        video_url_dict["video_url"] = ""
+        video_url_dict["audio_url"] = ""
+        video_url_dict["video_width"] = 0
+        video_url_dict["video_height"] = 0
+
+    return video_url_dict
+
+
+def get_comment_cnt(item_id):
+    url = "https://www.ixigua.com/tlb/comment/article/v5/tab_comments/?"
+    params = {
+        "tab_index": "0",
+        "count": "10",
+        "offset": "10",
+        "group_id": str(item_id),
+        "item_id": str(item_id),
+        "aid": "1768",
+        "msToken": "50-JJObWB07HfHs-BMJWT1eIDX3G-6lPSF_i-QwxBIXE9VVa-iN0jbEXR5pG2DKjXBmP299n6ZTuXzY-GAy968CCvouSAYIS4GzvGQT3pNlKNejr5G4-1g==",
+        "X-Bogus": "DFSzswVOyGtANVeWtCLMqR/F6q9U",
+        "_signature": random_signature(),
+    }
+    headers = {
+        "authority": "www.ixigua.com",
+        "accept": "application/json, text/plain, */*",
+        "accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
+        "cache-control": "no-cache",
+        "cookie": "MONITOR_WEB_ID=67cb5099-a022-4ec3-bb8e-c4de6ba51dd0; passport_csrf_token=72b2574f3c99f8ba670e42df430218fd; passport_csrf_token_default=72b2574f3c99f8ba670e42df430218fd; sid_guard=c7472b508ea631823ba765a60cf8757f%7C1680867422%7C3024002%7CFri%2C+12-May-2023+11%3A37%3A04+GMT; uid_tt=c13f47d51767f616befe32fb3e9f485a; uid_tt_ss=c13f47d51767f616befe32fb3e9f485a; sid_tt=c7472b508ea631823ba765a60cf8757f; sessionid=c7472b508ea631823ba765a60cf8757f; sessionid_ss=c7472b508ea631823ba765a60cf8757f; sid_ucp_v1=1.0.0-KGUzNWYxNmRkZGJiZjgxY2MzZWNkMTEzMTkwYjY1Yjg5OTY5NzVlNmMKFQiu3d-eqQIQ3oDAoQYYGCAMOAhACxoCaGwiIGM3NDcyYjUwOGVhNjMxODIzYmE3NjVhNjBjZjg3NTdm; ssid_ucp_v1=1.0.0-KGUzNWYxNmRkZGJiZjgxY2MzZWNkMTEzMTkwYjY1Yjg5OTY5NzVlNmMKFQiu3d-eqQIQ3oDAoQYYGCAMOAhACxoCaGwiIGM3NDcyYjUwOGVhNjMxODIzYmE3NjVhNjBjZjg3NTdm; odin_tt=b893608d4dde2e1e8df8cd5d97a0e2fbeafc4ca762ac72ebef6e6c97e2ed19859bb01d46b4190ddd6dd17d7f9678e1de; SEARCH_CARD_MODE=7168304743566296612_0; support_webp=true; support_avif=false; csrf_session_id=a5355d954d3c63ed1ba35faada452b4d; tt_scid=7Pux7s634-z8DYvCM20y7KigwH5u7Rh6D9C-RROpnT.aGMEcz6Vsxp.oai47wJqa4f86; ttwid=1%7CHHtv2QqpSGuSu8r-zXF1QoWsvjmNi1SJrqOrZzg-UCY%7C1683858689%7Ca5223fe1500578e01e138a0d71d6444692018296c4c24f5885af174a65873c95; ixigua-a-s=3; msToken=50-JJObWB07HfHs-BMJWT1eIDX3G-6lPSF_i-QwxBIXE9VVa-iN0jbEXR5pG2DKjXBmP299n6ZTuXzY-GAy968CCvouSAYIS4GzvGQT3pNlKNejr5G4-1g==; __ac_nonce=0645dcbf0005064517440; __ac_signature=_02B4Z6wo00f01FEGmAwAAIDBKchzCGqn-MBRJpyAAHAjieFC5GEg6gGiwz.I4PRrJl7f0GcixFrExKmgt6QI1i1S-dQyofPEj2ugWTCnmKUdJQv-wYuDofeKNe8VtMtZq2aKewyUGeKU-5Ud21; ixigua-a-s=3",
+        "pragma": "no-cache",
+        "referer": f"https://www.ixigua.com/{item_id}?logTag=3c5aa86a8600b9ab8540",
+        "sec-ch-ua": '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
+        "sec-ch-ua-mobile": "?0",
+        "sec-ch-ua-platform": '"macOS"',
+        "sec-fetch-dest": "empty",
+        "sec-fetch-mode": "cors",
+        "sec-fetch-site": "same-origin",
+        "tt-anti-token": "cBITBHvmYjEygzv-f9c78c1297722cf1f559c74b084e4525ce4900bdcf9e8588f20cc7c2e3234422",
+        "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.35",
+        "x-secsdk-csrf-token": "000100000001f8e733cf37f0cd255a51aea9a81ff7bc0c09490cfe41ad827c3c5c18ec809279175e4d9f5553d8a5",
+    }
+    response = requests.get(
+        url=url, headers=headers, params=params, proxies=tunnel_proxies(), timeout=5
+    )
+    response.close()
+    if (
+        response.status_code != 200
+        or "total_number" not in response.json()
+        or response.json() == {}
+    ):
+        return 0
+    return response.json().get("total_number", 0)
+
+
+class XiGuaAuthor:
+    def __init__(self, platform, mode, rule_dict, env, user_list):
+        self.platform = platform
+        self.mode = mode
+        self.rule_dict = rule_dict
+        self.env = env
+        self.user_list = user_list
+        self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
+        self.download_count = 0
+
+    def get_author_list(self):
+        # 每轮只抓取定量的数据,到达数量后自己退出
+        max_count = int(self.rule_dict.get("videos_cnt", {}).get("min", 300))
+        for user_dict in self.user_list[:1]:
+            if self.download_count <= max_count:
+                self.get_video_list(user_dict)
+                time.sleep(random.randint(1, 15))
+            else:
+                AliyunLogger.logging(
+                    code="2000",
+                    platform=self.platform,
+                    mode=self.mode,
+                    env=self.env,
+                    message="本轮已经抓取足够数量的视频,已经自动退出",
+                )
+                return
+
+    def get_video_list(self, user_dict):
+        offset = 0
+        signature = random_signature()
+        url = "https://www.ixigua.com/api/videov2/author/new_video_list?"
+        while True:
+            params = {
+                "to_user_id": str(
+                    user_dict["link"].replace("https://www.ixigua.com/home/", "")
+                ),
+                "offset": str(offset),
+                "limit": "30",
+                "maxBehotTime": "0",
+                "order": "new",
+                "isHome": "0",
+                # 'msToken': 'G0eRzNkw189a8TLaXjc6nTHVMQwh9XcxVAqTbGKi7iPJdQcLwS3-XRrJ3MZ7QBfqErpxp3EX1WtvWOIcZ3NIgr41hgcd-v64so_RRj3YCRw1UsKW8mIssNLlIMspsg==',
+                # 'X-Bogus': 'DFSzswVuEkUANjW9ShFTgR/F6qHt',
+                "_signature": signature,
+            }
+            headers = {
+                "referer": f'https://www.ixigua.com/home/{user_dict["link"].replace("https://www.ixigua.com/home/", "")}/video/?preActiveKey=hotsoon&list_entrance=userdetail',
+                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41",
+            }
+            response = requests.get(
+                url=url,
+                headers=headers,
+                params=params,
+                proxies=tunnel_proxies(),
+                timeout=5,
+            )
+            offset += 30
+            if "data" not in response.text or response.status_code != 200:
+                AliyunLogger.logging(
+                    code="2000",
+                    platform=self.platform,
+                    mode=self.mode,
+                    env=self.env,
+                    message=f"get_videoList:{response.text}\n",
+                )
+                return
+            elif not response.json()["data"]["videoList"]:
+                AliyunLogger.logging(
+                    code="2000",
+                    platform=self.platform,
+                    mode=self.mode,
+                    env=self.env,
+                    message=f"没有更多数据啦~\n",
+                )
+                return
+            else:
+                feeds = response.json()["data"]["videoList"]
+                for video_obj in feeds:
+                    try:
+                        AliyunLogger.logging(
+                            code="1001",
+                            platform=self.platform,
+                            mode=self.mode,
+                            env=self.env,
+                            message="扫描到一条视频",
+                        )
+                        self.process_video_obj(video_obj, user_dict)
+                    except Exception as e:
+                        AliyunLogger.logging(
+                            code="3000",
+                            platform=self.platform,
+                            mode=self.mode,
+                            env=self.env,
+                            data=video_obj,
+                            message="抓取单条视频异常, 报错原因是: {}".format(e),
+                        )
+
+    def process_video_obj(self, video_obj, user_dict):
+        trace_id = self.platform + str(uuid.uuid1())
+        item_id = video_obj.get("item_id", "")
+        if not item_id:
+            AliyunLogger.logging(
+                code="2005",
+                platform=self.platform,
+                mode=self.mode,
+                env=self.env,
+                message="无效视频",
+                data=video_obj,
+                trace_id=trace_id,
+            )
+            return
+        # 获取视频信息
+        video_dict = self.get_video_info(item_id=item_id, trace_id=trace_id)
+        video_dict["out_user_id"] = video_dict["user_id"]
+        video_dict["platform"] = self.platform
+        video_dict["strategy"] = self.mode
+        video_dict["out_video_id"] = video_dict["video_id"]
+        video_dict["width"] = video_dict["video_width"]
+        video_dict["height"] = video_dict["video_height"]
+        video_dict["crawler_rule"] = json.dumps(self.rule_dict)
+        video_dict["user_id"] = user_dict["uid"]
+        video_dict["publish_time"] = video_dict["publish_time_str"]
+        video_dict["strategy_type"] = self.mode
+        video_dict["update_time_stamp"] = int(time.time())
+        pipeline = PiaoQuanPipeline(
+            platform=self.platform,
+            mode=self.mode,
+            rule_dict=self.rule_dict,
+            env=self.env,
+            item=video_dict,
+            trace_id=trace_id,
+        )
+        flag = pipeline.process_item()
+        if flag:
+            print(json.dumps(video_dict, ensure_ascii=False, indent=4))
+            self.mq.send_msg(video_dict)
+            self.download_count += 1
+            AliyunLogger.logging(
+                code="1002",
+                platform=self.platform,
+                mode=self.mode,
+                env=self.env,
+                data=video_dict,
+                trace_id=trace_id,
+                message="成功发送 MQ 至 ETL",
+            )
+
+    def get_video_info(self, item_id, trace_id):
+        url = "https://www.ixigua.com/api/mixVideo/information?"
+        headers = {
+            "accept-encoding": "gzip, deflate",
+            "accept-language": "zh-CN,zh-Hans;q=0.9",
+            "user-agent": FakeUserAgent().random,
+            "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
+        }
+        params = {
+            "mixId": str(item_id),
+            "msToken": "IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC"
+            "NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA",
+            "X-Bogus": "DFSzswVupYTANCJOSBk0P53WxM-r",
+            "_signature": "_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px"
+            "fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94",
+        }
+        cookies = {
+            "ixigua-a-s": "1",
+            "msToken": "IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB"
+            "NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA",
+            "ttwid": "1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7"
+            "6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8",
+            "tt_scid": "QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3",
+            "MONITOR_WEB_ID": "0a49204a-7af5-4e96-95f0-f4bafb7450ad",
+            "__ac_nonce": "06304878000964fdad287",
+            "__ac_signature": "_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb"
+            "FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8",
+            "ttcid": "e56fabf6e85d4adf9e4d91902496a0e882",
+            "_tea_utm_cache_1300": "undefined",
+            "support_avif": "false",
+            "support_webp": "false",
+            "xiguavideopcwebid": "7134967546256016900",
+            "xiguavideopcwebid.sig": "xxRww5R1VEMJN_dQepHorEu_eAc",
+        }
+        response = requests.get(
+            url=url,
+            headers=headers,
+            params=params,
+            cookies=cookies,
+            proxies=tunnel_proxies(),
+            timeout=5,
+        )
+        if (
+            response.status_code != 200
+            or "data" not in response.json()
+            or response.json()["data"] == {}
+        ):
+            AliyunLogger.logging(
+                code="2000",
+                platform=self.platform,
+                mode=self.mode,
+                env=self.env,
+                message="获取视频信息失败",
+                trace_id=trace_id,
+            )
+            return None
+        else:
+            video_info = (
+                response.json()["data"]
+                .get("gidInformation", {})
+                .get("packerData", {})
+                .get("video", {})
+            )
+            if video_info == {}:
+                return None
+            video_detail = get_video_url(video_info)
+
+            video_dict = {
+                "video_title": video_info.get("title", ""),
+                "video_id": video_info.get("videoResource", {}).get("vid", ""),
+                "gid": str(item_id),
+                "play_cnt": int(video_info.get("video_watch_count", 0)),
+                "like_cnt": int(video_info.get("video_like_count", 0)),
+                "comment_cnt": int(get_comment_cnt(item_id)),
+                "share_cnt": 0,
+                "favorite_cnt": 0,
+                "duration": int(video_info.get("video_duration", 0)),
+                "video_width": int(video_detail["video_width"]),
+                "video_height": int(video_detail["video_height"]),
+                "publish_time_stamp": int(video_info.get("video_publish_time", 0)),
+                "publish_time_str": time.strftime(
+                    "%Y-%m-%d %H:%M:%S",
+                    time.localtime(int(video_info.get("video_publish_time", 0))),
+                ),
+                "user_name": video_info.get("user_info", {}).get("name", ""),
+                "user_id": str(video_info.get("user_info", {}).get("user_id", "")),
+                "avatar_url": str(
+                    video_info.get("user_info", {}).get("avatar_url", "")
+                ),
+                "cover_url": video_info.get("poster_url", ""),
+                "audio_url": video_detail["audio_url"],
+                "video_url": video_detail["video_url"],
+                "session": f"xigua-search-{int(time.time())}",
+            }
+            return video_dict
+
+
+if __name__ == "__main__":
+    user_list = [
+        {
+            "uid": 6267140,
+            "source": "xigua",
+            "link": "https://www.ixigua.com/home/2779177225827568",
+            "nick_name": "秋晴爱音乐",
+            "avatar_url": "",
+            "mode": "author",
+        },
+        {
+            "uid": 6267140,
+            "source": "xigua",
+            "link": "https://www.ixigua.com/home/2885546124776780",
+            "nick_name": "朗诵放歌的老山羊",
+            "avatar_url": "",
+            "mode": "author",
+        },
+        {
+            "uid": 6267140,
+            "source": "xigua",
+            "link": "https://www.ixigua.com/home/5880938217",
+            "nick_name": "天原声疗",
+            "avatar_url": "",
+            "mode": "author",
+        },
+    ]
+    # rule = {'period': {'min': 30, 'max': 30}, 'duration': {'min': 20, 'max': 0}, 'play_cnt': {'min': 100000, 'max': 0}}
+    # XGA = XiGuaAuthor(
+    #     platform="xigua",
+    #     mode="author",
+    #     rule_dict=rule,
+    #     env="prod",
+    #     user_list=user_list
+    # )
+    # XGA.get_author_list()

+ 1 - 132
xigua/xigua_author/xigua_author_scheduling.py

@@ -5,11 +5,9 @@ import base64
 import json
 import os
 import random
-import shutil
 import string
 import sys
 import time
-from hashlib import md5
 import requests
 import urllib3
 from requests.adapters import HTTPAdapter
@@ -18,8 +16,6 @@ sys.path.append(os.getcwd())
 from common.userAgent import get_random_user_agent
 from common.scheduling_db import MysqlHelper
 from common.common import Common
-from common.feishu import Feishu
-from common.publish import Publish
 from common.public import get_config_from_mysql, download_rule
 
 
@@ -622,12 +618,6 @@ class XiguaauthorScheduling:
                         Common.logger(log_type, crawler).info('视频已下载\n')
                         Common.logging(log_type, crawler, env, "视频已下载\n")
                     else:
-                        # cls.download_publish(log_type=log_type,
-                        #                      crawler=crawler,
-                        #                      user_dict=user_dict,
-                        #                      video_dict=video_dict,
-                        #                      rule_dict=rule_dict,
-                        #                      env=env)
                         video_dict["out_user_id"] = video_dict["user_id"]
                         video_dict["platform"] = crawler
                         video_dict["strategy"] = log_type
@@ -642,6 +632,7 @@ class XiguaauthorScheduling:
                 except Exception as e:
                     Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
                     Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
+
     @classmethod
     def repeat_video(cls, log_type, crawler, video_id, env):
         # sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{video_id}"; """
@@ -649,128 +640,6 @@ class XiguaauthorScheduling:
         repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
         return len(repeat_video)
 
-    # 下载 / 上传
-    @classmethod
-    def download_publish(cls, log_type, crawler, user_dict, video_dict, rule_dict, env):
-        # 下载视频
-        Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video', title=video_dict['video_title'], url=video_dict['video_url'])
-        # 下载音频
-        Common.download_method(log_type=log_type, crawler=crawler, text='xigua_audio', title=video_dict['video_title'], url=video_dict['audio_url'])
-        # 合成音视频
-        Common.video_compose(log_type=log_type, crawler=crawler, video_dir=f"./{crawler}/videos/{video_dict['video_title']}")
-        md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
-        try:
-            if os.path.getsize(f"./{crawler}/videos/{md_title}/video.mp4") == 0:
-                # 删除视频文件夹
-                shutil.rmtree(f"./{crawler}/videos/{md_title}")
-                Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
-                Common.logging(log_type, crawler, env, "视频size=0,删除成功\n")
-                return
-        except FileNotFoundError:
-            # 删除视频文件夹
-            shutil.rmtree(f"./{crawler}/videos/{md_title}")
-            Common.logger(log_type, crawler).info("视频文件不存在,删除文件夹成功\n")
-            Common.logging(log_type, crawler, env, "视频文件不存在,删除文件夹成功\n")
-            return
-        # 下载封面
-        Common.download_method(log_type=log_type, crawler=crawler, text='cover', title=video_dict['video_title'], url=video_dict['cover_url'])
-        # 保存视频信息至txt
-        Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
-
-        # 上传视频
-        Common.logger(log_type, crawler).info("开始上传视频...")
-        Common.logging(log_type, crawler, env, "开始上传视频...")
-        if env == "dev":
-            oss_endpoint = "out"
-            our_video_id = Publish.upload_and_publish(log_type=log_type,
-                                                      crawler=crawler,
-                                                      strategy="定向抓取策略",
-                                                      our_uid=user_dict["uid"],
-                                                      env=env,
-                                                      oss_endpoint=oss_endpoint)
-            our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-        else:
-            oss_endpoint = "inner"
-            our_video_id = Publish.upload_and_publish(log_type=log_type,
-                                                      crawler=crawler,
-                                                      strategy="定向抓取策略",
-                                                      our_uid=user_dict["uid"],
-                                                      env=env,
-                                                      oss_endpoint=oss_endpoint)
-
-            our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-
-        if our_video_id is None:
-            try:
-                # 删除视频文件夹
-                shutil.rmtree(f"./{crawler}/videos/{md_title}")
-                return
-            except FileNotFoundError:
-                return
-
-        # 视频信息保存数据库
-        insert_sql = f""" insert into crawler_video(video_id,
-                        user_id,
-                        out_user_id,
-                        platform,
-                        strategy,
-                        out_video_id,
-                        video_title,
-                        cover_url,
-                        video_url,
-                        duration,
-                        publish_time,
-                        play_cnt,
-                        crawler_rule,
-                        width,
-                        height)
-                        values({our_video_id},
-                        {user_dict["uid"]},
-                        "{video_dict['user_id']}",
-                        "{cls.platform}",
-                        "定向爬虫策略",
-                        "{video_dict['video_id']}",
-                        "{video_dict['video_title']}",
-                        "{video_dict['cover_url']}",
-                        "{video_dict['video_url']}",
-                        {int(video_dict['duration'])},
-                        "{video_dict['publish_time_str']}",
-                        {int(video_dict['play_cnt'])},
-                        '{json.dumps(rule_dict)}',
-                        {int(video_dict['video_width'])},
-                        {int(video_dict['video_height'])}) """
-        Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
-        Common.logging(log_type, crawler, env, f"insert_sql:{insert_sql}")
-        MysqlHelper.update_values(log_type, crawler, insert_sql, env)
-        Common.logger(log_type, crawler).info('视频信息写入数据库成功')
-        Common.logging(log_type, crawler, env, '视频信息写入数据库成功')
-
-        # 视频写入飞书
-        Feishu.insert_columns(log_type, crawler, "e075e9", "ROWS", 1, 2)
-        upload_time = int(time.time())
-        values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
-                   "定向榜",
-                   video_dict['video_title'],
-                   str(video_dict['video_id']),
-                   our_video_link,
-                   video_dict['gid'],
-                   video_dict['play_cnt'],
-                   video_dict['comment_cnt'],
-                   video_dict['like_cnt'],
-                   video_dict['share_cnt'],
-                   video_dict['duration'],
-                   str(video_dict['video_width']) + '*' + str(video_dict['video_height']),
-                   video_dict['publish_time_str'],
-                   video_dict['user_name'],
-                   video_dict['user_id'],
-                   video_dict['avatar_url'],
-                   video_dict['cover_url'],
-                   video_dict['video_url'],
-                   video_dict['audio_url']]]
-        time.sleep(0.5)
-        Feishu.update_values(log_type, crawler, "e075e9", "F2:Z2", values)
-        Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
-        Common.logging(log_type, crawler, env, f"视频已保存至云文档\n")
 
     @classmethod
     def get_author_videos(cls, log_type, crawler, user_list, rule_dict, env):

+ 111 - 71
xigua/xigua_main/run_xg_author.py

@@ -1,15 +1,13 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/6/7
 import argparse
 from mq_http_sdk.mq_client import *
 from mq_http_sdk.mq_consumer import *
 from mq_http_sdk.mq_exception import MQExceptionBase
+
 sys.path.append(os.getcwd())
-from common.common import Common
 from common.public import get_consumer, ack_message, task_fun_mq
 from common.scheduling_db import MysqlHelper
-from xigua.xigua_author.xigua_author_scheduling import XiguaauthorScheduling
+from xigua.xigua_author import XiGuaAuthor
+from common.aliyun_log import AliyunLogger
 
 
 def main(log_type, crawler, topic_name, group_id, env):
@@ -19,92 +17,134 @@ def main(log_type, crawler, topic_name, group_id, env):
     wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
     batch = 1
-    Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
-                                          f'WaitSeconds:{wait_seconds}\n'
-                                          f'TopicName:{topic_name}\n'
-                                          f'MQConsumer:{group_id}')
-    Common.logging(log_type, crawler, env, f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
-                                           f'WaitSeconds:{wait_seconds}\n'
-                                           f'TopicName:{topic_name}\n'
-                                           f'MQConsumer:{group_id}')
+    AliyunLogger.logging(
+        code="1000",
+        platform=crawler,
+        mode=log_type,
+        env=env,
+        message=f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
+        f"WaitSeconds:{wait_seconds}\n"
+        f"TopicName:{topic_name}\n"
+        f"MQConsumer:{group_id}",
+    )
     while True:
         try:
             # 长轮询消费消息。
             recv_msgs = consumer.consume_message(batch, wait_seconds)
             for msg in recv_msgs:
                 xg_author_start_time = int(time.time())
-                Common.logger(log_type, crawler).info(f"Receive\n"
-                                                      f"MessageId:{msg.message_id}\n"
-                                                      f"MessageBodyMD5:{msg.message_body_md5}\n"
-                                                      f"MessageTag:{msg.message_tag}\n"
-                                                      f"ConsumedTimes:{msg.consumed_times}\n"
-                                                      f"PublishTime:{msg.publish_time}\n"
-                                                      f"Body:{msg.message_body}\n"
-                                                      f"NextConsumeTime:{msg.next_consume_time}\n"
-                                                      f"ReceiptHandle:{msg.receipt_handle}\n"
-                                                      f"Properties:{msg.properties}")
-                Common.logging(log_type, crawler, env, f"Receive\n"
-                                                       f"MessageId:{msg.message_id}\n"
-                                                       f"MessageBodyMD5:{msg.message_body_md5}\n"
-                                                       f"MessageTag:{msg.message_tag}\n"
-                                                       f"ConsumedTimes:{msg.consumed_times}\n"
-                                                       f"PublishTime:{msg.publish_time}\n"
-                                                       f"Body:{msg.message_body}\n"
-                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
-                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
-                                                       f"Properties:{msg.properties}")
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    message=f"Receive\n"
+                    f"MessageId:{msg.message_id}\n"
+                    f"MessageBodyMD5:{msg.message_body_md5}\n"
+                    f"MessageTag:{msg.message_tag}\n"
+                    f"ConsumedTimes:{msg.consumed_times}\n"
+                    f"PublishTime:{msg.publish_time}\n"
+                    f"Body:{msg.message_body}\n"
+                    f"NextConsumeTime:{msg.next_consume_time}\n"
+                    f"ReceiptHandle:{msg.receipt_handle}\n"
+                    f"Properties:{msg.properties}",
+                )
                 # ack_mq_message
-                ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
+                ack_message(
+                    log_type=log_type,
+                    crawler=crawler,
+                    recv_msgs=recv_msgs,
+                    consumer=consumer,
+                )
 
                 # 处理爬虫业务
-                task_dict = task_fun_mq(msg.message_body)['task_dict']
-                rule_dict = task_fun_mq(msg.message_body)['rule_dict']
-                task_id = task_dict['id']
-                select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
-                user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
-                Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
-                Common.logging(log_type, crawler, env, f"调度任务:{task_dict}")
-                Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
-                Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}")
-                # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
-                Common.logger(log_type, crawler).info(f'开始抓取:{task_dict["taskName"]}\n')
-                Common.logging(log_type, crawler, env, f'开始抓取:{task_dict["taskName"]}\n')
-                XiguaauthorScheduling.get_author_videos(log_type=log_type,
-                                                        crawler=crawler,
-                                                        rule_dict=rule_dict,
-                                                        user_list=user_list,
-                                                        env=env)
-                Common.del_logs(log_type, crawler)
-                Common.logger(log_type, crawler).info('抓取一轮结束\n')
-                Common.logging(log_type, crawler, env, '抓取一轮结束\n')
+                task_dict = task_fun_mq(msg.message_body)["task_dict"]
+                rule_dict = task_fun_mq(msg.message_body)["rule_dict"]
+                task_id = task_dict["id"]
+                select_user_sql = (
+                    f"""select * from crawler_user_v3 where task_id={task_id}"""
+                )
+                user_list = MysqlHelper.get_values(
+                    log_type, crawler, select_user_sql, env, action=""
+                )
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message=f"调度任务:{task_dict}",
+                )
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message=f"抓取规则:{rule_dict}",
+                )
+                AliyunLogger.logging(
+                    code="1003",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message=f'开始抓取:{task_dict["taskName"]}\n',
+                )
+                XiGuaAuthor(
+                    mode=log_type,
+                    platform=crawler,
+                    rule_dict=rule_dict,
+                    user_list=user_list,
+                    env=env,
+                )
                 xg_author_end_time = int(time.time())
+                AliyunLogger.logging(
+                    code="1004",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message=f'完成抓取:{task_dict["taskName"]}\n',
+                )
                 xg_author_duration = xg_author_start_time - xg_author_end_time
-                Common.logger(log_type, crawler).info(f"duration {xg_author_duration}")
-                Common.logging(log_type, crawler, env, f"duration {xg_author_duration}")
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message=f"duration {xg_author_duration}",
+                )
 
         except MQExceptionBase as err:
             # Topic中没有消息可消费。
             if err.type == "MessageNotExist":
-                Common.logger(log_type, crawler).info(f"No new message! RequestId:{err.req_id}\n")
-                Common.logging(log_type, crawler, env, f"No new message! RequestId:{err.req_id}\n")
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=crawler,
+                    mode=log_type,
+                    env=env,
+                    message=f"No new message! RequestId:{err.req_id}\n",
+                )
                 continue
-
-            Common.logger(log_type, crawler).info(f"Consume Message Fail! Exception:{err}\n")
-            Common.logging(log_type, crawler, env, f"Consume Message Fail! Exception:{err}\n")
+            AliyunLogger.logging(
+                code="1000",
+                platform=crawler,
+                mode=log_type,
+                env=env,
+                message=f"Consume Message Fail! Exception:{err}\n",
+            )
             time.sleep(2)
             continue
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()  ## 新建参数解释器对象
-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
-    parser.add_argument('--crawler')  ## 添加参数
-    parser.add_argument('--topic_name')  ## 添加参数
-    parser.add_argument('--group_id')  ## 添加参数
-    parser.add_argument('--env')  ## 添加参数
+    parser.add_argument("--log_type", type=str)  ## 添加参数,注明参数类型
+    parser.add_argument("--crawler")  ## 添加参数
+    parser.add_argument("--topic_name")  ## 添加参数
+    parser.add_argument("--group_id")  ## 添加参数
+    parser.add_argument("--env")  ## 添加参数
     args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
-    main(log_type=args.log_type,
-         crawler=args.crawler,
-         topic_name=args.topic_name,
-         group_id=args.group_id,
-         env=args.env)
+    main(
+        log_type=args.log_type,
+        crawler=args.crawler,
+        topic_name=args.topic_name,
+        group_id=args.group_id,
+        env=args.env,
+    )