Selaa lähdekoodia

若 kimi 失败,则返回报错

罗俊辉 11 kuukautta sitten
vanhempi
commit
12fccecfca

+ 1 - 1
app.py

@@ -2,7 +2,7 @@
 @author: luojunhui
 """
 from quart import Quart
-from applications.log import logging
+from applications.functions.log import logging
 from applications.routes import my_blueprint
 
 # 初始化 App

+ 0 - 43
applications/functions/ask_kimi.py

@@ -1,43 +0,0 @@
-"""
-@author: luojunhui
-"""
-import json
-from openai import OpenAI
-
-
-def ask_kimi(question):
-    """
-    Ask Kimi for information
-    :param question: tiny text
-    :return: "{}"
-    """
-    single_title_prompt = """
-        我会给你一个视频标题,需要你帮我用你所学的知识来帮我分析出以下信息,信息我都写到 json 里面了
-        {
-            "key_words": [],  # 返回三个关键词
-            "search_keys": [], # 标题可能的搜索关键词,返回 3 个
-            "extra_keys": [], # 关心这个视频的用户还会关心哪些关键词, 返回 3 个
-            "theme": 标题的主题, 用一个词概括
-        }
-        只需要返回一个 json,key 和上面的一样,
-        我给你的标题是: 
-        """
-    client = OpenAI(
-        api_key='sk-tz1VaKqksTzk0F8HxlU4YVGwj7oa1g0c0puGNUZrdn9MDtzm',
-        base_url="https://api.moonshot.cn/v1"
-    )
-    chat_completion = client.chat.completions.create(
-        messages=[
-            {
-                "role": "user",
-                "content": single_title_prompt + question,
-            }
-        ],
-        model="moonshot-v1-8k",
-    )
-    response = chat_completion.choices[0].message.content.replace('```json', '').replace('```', '')
-    try:
-        response = json.loads(response)
-        return response
-    except:
-        return {}

+ 0 - 54
applications/functions/auto_white.py

@@ -1,54 +0,0 @@
-"""
-@author: luojunhui
-"""
-import json
-import requests
-
-
-def get_cookie():
-    """
-    获取 cookie
-    :return:
-    """
-    url = "https://admin.piaoquantv.com/manager/login?account=luojunhui&passWd=e10adc3949ba59abbe56e057f20f883e&muid=7"
-    payload = {}
-    headers = {
-        'accept': 'application/json, text/plain, */*',
-        'accept-language': 'en',
-        'priority': 'u=1, i',
-        'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
-        'sec-ch-ua-mobile': '?0',
-        'sec-ch-ua-platform': '"macOS"',
-        'sec-fetch-dest': 'empty',
-        'sec-fetch-mode': 'cors',
-        'sec-fetch-site': 'same-origin',
-        'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
-    }
-    response = requests.request("GET", url, headers=headers, data=payload)
-    return response.cookies.values()[0]
-
-
-def auto_white(root_share_id):
-    """
-    自动加入白名单, 保证公众号百分百出广告
-    :param root_share_id:
-    :return:
-    """
-    url = "https://admin.piaoquantv.com/manager/ad/own/white/rootShare/save"
-    dd = {
-        "rootShareId": root_share_id,
-        "commit": "算法自动加入白名单--"
-    }
-    payload = json.dumps(dd)
-    cookie = get_cookie()
-    headers = {
-        'accept': 'application/json',
-        'accept-language': 'en',
-        'content-type': 'application/json;',
-        'cookie': "SESSION=" + cookie,
-        'origin': 'https://admin.piaoquantv.com',
-        'priority': 'u=1, i',
-        'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
-    }
-    response = requests.request("POST", url, headers=headers, data=payload)
-    return response.json()['content']

+ 300 - 140
applications/functions/common.py

@@ -5,165 +5,325 @@ import os
 import json
 import uuid
 import requests
+import pymysql
 import urllib.parse
+from openai import OpenAI
 
-from applications.functions.auto_white import auto_white
-from applications.functions.mysql import select, select_sensitive_words
-from applications.functions.ask_kimi import ask_kimi
-from applications.log import logging
+from applications.functions.log import logging
 
 
-def sensitive_flag(title):
+class Functions(object):
     """
-    判断标题是否命中过滤词
-    :param title:
-    :return:
+    通用工具代码
     """
-    sensitive_words = select_sensitive_words()
-    for word in sensitive_words:
-        if word in title:
-            # title = title.replace(word, "*")
-            return False
-    return True
 
+    # 敏感词逻辑
+    @classmethod
+    def sensitive_flag(cls, title):
+        """
+        判断标题是否命中过滤词
+        :param title:
+        :return:
+        """
+        sensitive_words = MySQLServer().select_sensitive_words()
+        for word in sensitive_words:
+            if word in title:
+                # title = title.replace(word, "*")
+                return False
+        return True
 
-def ask_kimi_and_save_to_local(info_tuple):
-    """
-    save file to local
-    :return:
-    """
-    title, trace_id, save_path = info_tuple[0], info_tuple[1], info_tuple[2]
-    if os.path.exists(save_path):
-        logging(
-            code="2001",
-            info="该 video 信息已经挖掘完成---{}".format(title),
-            function="ask_kimi_and_save_to_local",
-            trace_id=trace_id,
-        )
-    else:
-        os.makedirs(os.path.dirname(save_path), exist_ok=True)
-        if not title:
-            result = {}
-        else:
-            result = ask_kimi(title)
-        logging(
-            code="2001",
-            info="kimi-result",
-            data=result,
-            trace_id=trace_id,
-            function="ask_kimi_and_save_to_local"
+    # 自动加入白名单逻辑
+    @classmethod
+    def auto_white(cls, root_share_id):
+        """
+        自动加入白名单, 保证公众号百分百出广告
+        :param root_share_id:
+        :return:
+        """
+
+        def get_cookie():
+            """
+            获取 cookie
+            :return:
+            """
+            url = "https://admin.piaoquantv.com/manager/login?account=luojunhui&passWd=e10adc3949ba59abbe56e057f20f883e&muid=7"
+            payload = {}
+            headers = {
+                'accept': 'application/json, text/plain, */*',
+                'accept-language': 'en',
+                'priority': 'u=1, i',
+                'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
+                'sec-ch-ua-mobile': '?0',
+                'sec-ch-ua-platform': '"macOS"',
+                'sec-fetch-dest': 'empty',
+                'sec-fetch-mode': 'cors',
+                'sec-fetch-site': 'same-origin',
+                'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
+            }
+            response = requests.request("GET", url, headers=headers, data=payload)
+            return response.cookies.values()[0]
+
+        url = "https://admin.piaoquantv.com/manager/ad/own/white/rootShare/save"
+        dd = {
+            "rootShareId": root_share_id,
+            "commit": "算法自动加入白名单--"
+        }
+        payload = json.dumps(dd)
+        cookie = get_cookie()
+        headers = {
+            'accept': 'application/json',
+            'accept-language': 'en',
+            'content-type': 'application/json;',
+            'cookie': "SESSION=" + cookie,
+            'origin': 'https://admin.piaoquantv.com',
+            'priority': 'u=1, i',
+            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
+        }
+        response = requests.request("POST", url, headers=headers, data=payload)
+        return response.json()['content']
+
+    # 创建公众号分享卡片
+    @classmethod
+    def create_gzh_path(cls, video_id, shared_uid):
+        """
+        :param video_id: 视频 id
+        :param shared_uid: 分享 id
+        """
+        root_share_id = str(uuid.uuid4())
+        url = f"pages/user-videos?id={video_id}&su={shared_uid}&fromGzh=1&rootShareId={root_share_id}&shareId={root_share_id}"
+        # 自动把 root_share_id 加入到白名单
+        cls.auto_white(root_share_id)
+        return root_share_id, f"pages/category?jumpPage={urllib.parse.quote(url, safe='')}"
+
+    # 从票圈请求视频
+    @classmethod
+    def request_for_info(cls, video_id):
+        """
+        请求数据
+        :param video_id:
+        :return:
+        """
+        url = "https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/batchSelectVideoInfo"
+        data = {
+            "videoIdList": [video_id]
+        }
+        header = {
+            "Content-Type": "application/json",
+        }
+        response = requests.post(url, headers=header, data=json.dumps(data))
+        return response.json()
+
+    # 清理标题
+    @classmethod
+    def clean_title(cls, strings):
+        """
+        :param strings:
+        :return:
+        """
+        return (
+            strings.strip()
+            .replace("\n", "")
+            .replace("/", "")
+            .replace("\r", "")
+            .replace("#", "")
+            .replace(".", "。")
+            .replace("\\", "")
+            .replace("&NBSP", "")
+            .replace(":", "")
+            .replace("*", "")
+            .replace("?", "")
+            .replace("?", "")
+            .replace('"', "")
+            .replace("<", "")
+            .replace(">", "")
+            .replace("|", "")
+            .replace(" ", "")
+            .replace('"', "")
+            .replace("'", "")
         )
-        with open(save_path, "w", encoding="utf-8") as f:
-            f.write(json.dumps(result, ensure_ascii=False))
 
 
-def create_gzh_path(video_id, shared_uid):
+class MySQLServer(object):
     """
-    :param video_id: 视频 id
-    :param shared_uid: 分享 id
+    MySql 服务
     """
-    root_share_id = str(uuid.uuid4())
-    url = f"pages/user-videos?id={video_id}&su={shared_uid}&fromGzh=1&rootShareId={root_share_id}&shareId={root_share_id}"
-    # 自动把 root_share_id 加入到白名单
-    auto_white(root_share_id)
-    return root_share_id, f"pages/category?jumpPage={urllib.parse.quote(url, safe='')}"
+    @classmethod
+    def select_download_videos(cls, trace_id):
+        """
+        查询
+        :param trace_id:
+        :return:
+        """
+        sql = "select video_id, video_title from crawler_video where out_user_id = '{}' limit 5;".format(trace_id)
+        connection = pymysql.connect(
+            host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
+            port=3306,  # 端口号
+            user="crawler",  # mysql用户名
+            passwd="crawler123456@",  # mysql用户登录密码
+            db="piaoquan-crawler",  # 数据库名
+            charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的,charset指定是utf8
+        )
+        cursor = connection.cursor()
+        cursor.execute(sql)
+        out_video_list = cursor.fetchall()
+        if len(out_video_list) > 0:
+            vid_list = [i[0] for i in out_video_list if i[0] != 0]
+            vid_list = [vid_list[0]]
+            # dir_path = os.path.join(os.getcwd(), 'applications', 'static', "out_videos")
+            # os.makedirs(os.path.dirname(dir_path), exist_ok=True)
+            # done_list = os.listdir(dir_path)
+            # process_list = [
+            #     (
+            #         i[1],
+            #         trace_id,
+            #         os.path.join(dir_path, "{}.json".format(i[0]))
+            #     ) for i in out_video_list if not "{}.json".format(i[0]) in done_list
+            # ]
+            # if process_list:
+            #     ask_kimi_and_save_to_local(process_list[0])
+            logging(
+                code="2003",
+                trace_id=trace_id,
+                info="recall_search_list",
+                function="find_videos_in_mysql",
+                data=vid_list
+            )
+            return {
+                "search_videos": "success",
+                "trace_id": trace_id,
+                "video_list": vid_list
+            }
+        else:
+            return {
+                "search_videos": "failed",
+                "trace_id": trace_id,
+                "video_list": []
+            }
 
+    @classmethod
+    def select_pq_videos(cls):
+        """
+        查询
+        :return: info_list
+        """
+        connection = pymysql.connect(
+            host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
+            port=3306,  # 端口号
+            user="wx2016_longvideo",  # mysql用户名
+            passwd="wx2016_longvideoP@assword1234",  # mysql用户登录密码
+            db="incentive",  # 数据库名
+            charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的,charset指定是utf8
+        )
+        sql = "select video_id, key_words, search_keys, extra_keys from video_content"
+        cursor = connection.cursor()
+        cursor.execute(sql)
+        data = cursor.fetchall()
+        result = [
+            {
+                "video_id": line[0],
+                "key_words": json.loads(line[1]),
+                "search_keys": json.loads(line[2]),
+                "extra_keys": json.loads(line[3]),
+            }
+            for line in data
+        ]
+        return result
 
-def request_for_info(video_id):
-    """
-    请求数据
-    :param video_id:
-    :return:
-    """
-    url = "https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/batchSelectVideoInfo"
-    data = {
-        "videoIdList": [video_id]
-    }
-    header = {
-        "Content-Type": "application/json",
-    }
-    response = requests.post(url, headers=header, data=json.dumps(data))
-    return response.json()
-
-
-def choose_video(result):
-    """
-    :param result: 计算出来的结果
-    :return: uid, video_id
-    """
-    score1 = result['s1_score']
-    if score1 > 0:
-        return result['s1_uid'], result['s1_vid']
-    else:
-        return None, None
+    # 敏感词
+    @classmethod
+    def select_sensitive_words(cls):
+        """
+        sensitive words
+        :return:
+        """
+        connection = pymysql.connect(
+            host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
+            port=3306,  # 端口号
+            user="wx2016_longvideo",  # mysql用户名
+            passwd="wx2016_longvideoP@assword1234",  # mysql用户登录密码
+            db="longvideo",  # 数据库名
+            charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的,charset指定是utf8
+        )
+        sql = "select `keyword` from wx_sensitive_word where `data_status` = 0"
+        cursor = connection.cursor()
+        cursor.execute(sql)
+        data = cursor.fetchall()
+        result = [line[0] for line in data]
+        return result
 
 
-def find_videos_in_mysql(trace_id):
+class KimiServer(object):
     """
-    通过 trace_id去 pq_spider_mysql 搜索视频
-    :param trace_id:
-    :return:
+    Kimi Server
     """
-    sql = "select video_id, video_title from crawler_video where out_user_id = '{}' limit 5;".format(trace_id)
-    out_video_list = select(sql=sql)
-    if len(out_video_list) > 0:
-        vid_list = [i[0] for i in out_video_list if i[0] != 0]
-        vid_list = [vid_list[0]]
-        # dir_path = os.path.join(os.getcwd(), 'applications', 'static', "out_videos")
-        # os.makedirs(os.path.dirname(dir_path), exist_ok=True)
-        # done_list = os.listdir(dir_path)
-        # process_list = [
-        #     (
-        #         i[1],
-        #         trace_id,
-        #         os.path.join(dir_path, "{}.json".format(i[0]))
-        #     ) for i in out_video_list if not "{}.json".format(i[0]) in done_list
-        # ]
-        # if process_list:
-        #     ask_kimi_and_save_to_local(process_list[0])
-        logging(
-            code="2003",
-            trace_id=trace_id,
-            info="recall_search_list",
-            function="find_videos_in_mysql",
-            data=vid_list
+    @classmethod
+    def ask_kimi(cls, question):
+        """
+        Ask Kimi for information
+        :param question: tiny text
+        :return: "{}"
+        """
+        single_title_prompt = """
+            我会给你一个视频标题,需要你帮我用你所学的知识来帮我分析出以下信息,信息我都写到 json 里面了
+            {
+                "key_words": [],  # 返回三个关键词
+                "search_keys": [], # 标题可能的搜索关键词,返回 3 个
+                "extra_keys": [], # 关心这个视频的用户还会关心哪些关键词, 返回 3 个
+                "theme": 标题的主题, 用一个词概括
+            }
+            只需要返回一个 json,key 和上面的一样,
+            我给你的标题是: 
+            """
+        client = OpenAI(
+            api_key='sk-tz1VaKqksTzk0F8HxlU4YVGwj7oa1g0c0puGNUZrdn9MDtzm',
+            base_url="https://api.moonshot.cn/v1"
         )
-        return {
-            "search_videos": "success",
-            "trace_id": trace_id,
-            "video_list": vid_list
-        }
-    else:
-        return {
-            "search_videos": "failed",
-            "trace_id": trace_id,
-            "video_list": []
-        }
+        chat_completion = client.chat.completions.create(
+            messages=[
+                {
+                    "role": "user",
+                    "content": single_title_prompt + question,
+                }
+            ],
+            model="moonshot-v1-8k",
+        )
+        response = chat_completion.choices[0].message.content.replace('```json', '').replace('```', '')
+        try:
+            response = json.loads(response)
+            return response
+        except:
+            return {}
+
+    @classmethod
+    def ask_kimi_and_save_to_local(cls, info_tuple):
+        """
+        save file to local
+        :return:
+        """
+        title, trace_id, save_path = info_tuple[0], info_tuple[1], info_tuple[2]
+        if os.path.exists(save_path):
+            logging(
+                code="2001",
+                info="该 video 信息已经挖掘完成---{}".format(title),
+                function="ask_kimi_and_save_to_local",
+                trace_id=trace_id,
+            )
+        else:
+            os.makedirs(os.path.dirname(save_path), exist_ok=True)
+            if not title:
+                result = {}
+            else:
+                result =  cls.ask_kimi(title)
+            logging(
+                code="2001",
+                info="kimi-result",
+                data=result,
+                trace_id=trace_id,
+                function="ask_kimi_and_save_to_local"
+            )
+            with open(save_path, "w", encoding="utf-8") as f:
+                f.write(json.dumps(result, ensure_ascii=False))
+
+
 
 
-def clean_title(strings):
-    """
-    :param strings:
-    :return:
-    """
-    return (
-        strings.strip()
-        .replace("\n", "")
-        .replace("/", "")
-        .replace("\r", "")
-        .replace("#", "")
-        .replace(".", "。")
-        .replace("\\", "")
-        .replace("&NBSP", "")
-        .replace(":", "")
-        .replace("*", "")
-        .replace("?", "")
-        .replace("?", "")
-        .replace('"', "")
-        .replace("<", "")
-        .replace(">", "")
-        .replace("|", "")
-        .replace(" ", "")
-        .replace('"', "")
-        .replace("'", "")
-    )

+ 0 - 33
applications/functions/date.py

@@ -1,33 +0,0 @@
-"""
-@author: luojunhui
-"""
-from datetime import datetime, timedelta
-
-
-def five_days_before(ori_dt):
-    """
-    Generate date in 3 days
-    :param ori_dt:
-    :return:
-    """
-    now_date = datetime.strptime(ori_dt, "%Y%m%d")
-    seven_before = now_date - timedelta(days=5)
-    return seven_before.strftime("%Y%m%d")
-
-
-def generate_daily_strings(start_date, end_date):
-    """
-    Generate daily date_str
-    :param start_date:
-    :param end_date:
-    :return:
-    """
-    start = datetime.strptime(start_date, "%Y%m%d")
-    end = datetime.strptime(end_date, "%Y%m%d")
-    current = start
-    date_strings = []
-    while current <= end:
-        date_strings.append(current.strftime("%Y%m%d"))
-        current += timedelta(days=1)
-    return date_strings
-

+ 2 - 2
applications/functions/item.py

@@ -3,7 +3,7 @@
 """
 import time
 
-from applications.functions.common import clean_title
+from applications.functions.common import Functions
 
 
 class VideoItem(object):
@@ -29,7 +29,7 @@ class VideoItem(object):
         3. 需要后出理的字段: video_title, publish_time
         """
         if self.item.get("video_title"):
-            self.item["video_title"] = clean_title(self.item["video_title"])
+            self.item["video_title"] = Functions().clean_title(self.item["video_title"])
         else:
             return False
         if self.item.get("publish_time_stamp"):

+ 0 - 0
applications/log.py → applications/functions/log.py


+ 0 - 0
applications/mq.py → applications/functions/mq.py


+ 0 - 77
applications/functions/mysql.py

@@ -1,77 +0,0 @@
-"""
-@author: luojunhui
-mysql 方法
-"""
-import json
-
-import pymysql
-
-
-def select(sql):
-    """
-    查询
-    :param sql:
-    :return:
-    """
-    connection = pymysql.connect(
-        host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
-        port=3306,  # 端口号
-        user="crawler",  # mysql用户名
-        passwd="crawler123456@",  # mysql用户登录密码
-        db="piaoquan-crawler",  # 数据库名
-        charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的,charset指定是utf8
-    )
-    cursor = connection.cursor()
-    cursor.execute(sql)
-    data = cursor.fetchall()
-    return data
-
-
-def select_pq_videos():
-    """
-    查询
-    :return: info_list
-    """
-    connection = pymysql.connect(
-        host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
-        port=3306,  # 端口号
-        user="wx2016_longvideo",  # mysql用户名
-        passwd="wx2016_longvideoP@assword1234",  # mysql用户登录密码
-        db="incentive",  # 数据库名
-        charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的,charset指定是utf8
-    )
-    sql = "select video_id, key_words, search_keys, extra_keys from video_content"
-    cursor = connection.cursor()
-    cursor.execute(sql)
-    data = cursor.fetchall()
-    result = [
-        {
-            "video_id": line[0],
-            "key_words": json.loads(line[1]),
-            "search_keys": json.loads(line[2]),
-            "extra_keys": json.loads(line[3]),
-        }
-        for line in data
-    ]
-    return result
-
-
-def select_sensitive_words():
-    """
-    sensitive words
-    :return:
-    """
-    connection = pymysql.connect(
-        host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
-        port=3306,  # 端口号
-        user="wx2016_longvideo",  # mysql用户名
-        passwd="wx2016_longvideoP@assword1234",  # mysql用户登录密码
-        db="longvideo",  # 数据库名
-        charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的,charset指定是utf8
-    )
-    sql = "select `keyword` from wx_sensitive_word where `data_status` = 0"
-    cursor = connection.cursor()
-    cursor.execute(sql)
-    data = cursor.fetchall()
-    result = [line[0] for line in data]
-    return result

+ 1 - 1
applications/match_alg/rank.py

@@ -2,7 +2,7 @@
 @author: luojunhui
 """
 from applications.match_alg.recall import recall_videos
-from applications.log import logging
+from applications.functions.log import logging
 
 
 def jac_score(d1, d2):

+ 3 - 305
applications/match_alg/recall.py

@@ -1,311 +1,9 @@
 """
 @author: luojunhui
 """
-import os
-import json
 
-from applications.log import logging
-from applications.functions.mysql import select_pq_videos
-
-
-gh_id_dict = {
-    "gh_01f8afd03366": {
-        "uid": 69637520,
-        "nick_name": "非亲非故"
-    },
-    "gh_058e41145a0c": {
-        "uid": 69637476,
-        "nick_name": "甜腻梦话"
-    },
-    "gh_084a485e859a": {
-        "uid": 69637472,
-        "nick_name": "梦星月"
-    },
-    "gh_0921c03402cd": {
-        "uid": 69637531,
-        "nick_name": "你的女友"
-    },
-    "gh_0c89e11f8bf3": {
-        "uid": 69637508,
-        "nick_name": "粟米"
-    },
-    "gh_171cec079b2a": {
-        "uid": 69637501,
-        "nick_name": "海上"
-    },
-    "gh_183d80deffb8": {
-        "uid": 69637491,
-        "nick_name": "论趣"
-    },
-    "gh_1ee2e1b39ccf": {
-        "uid": 69637473,
-        "nick_name": "纵有疾风起"
-    },
-    "gh_234ef02cdee5": {
-        "uid": 69637513,
-        "nick_name": "夹逼"
-    },
-    "gh_26a307578776": {
-        "uid": 69637490,
-        "nick_name": "最宝贝的宝贝"
-    },
-    "gh_29074b51f2b7": {
-        "uid": 69637530,
-        "nick_name": "沉舸"
-    },
-    "gh_2b8c6aa035ae": {
-        "uid": 69637470,
-        "nick_name": "懶得取名"
-    },
-    "gh_34318194fd0e": {
-        "uid": 69637517,
-        "nick_name": "徒四壁"
-    },
-    "gh_3845af6945d0": {
-        "uid": 69637545,
-        "nick_name": "秋水娉婷"
-    },
-    "gh_3ac6d7208961": {
-        "uid": 69637497,
-        "nick_name": "小熊的少女梦"
-    },
-    "gh_3c7d38636846": {
-        "uid": 69637519,
-        "nick_name": "油腻腻"
-    },
-    "gh_3df10391639c": {
-        "uid": 69637541,
-        "nick_name": "六郎娇面"
-    },
-    "gh_40a0ad154478": {
-        "uid": 69637516,
-        "nick_name": "禁止"
-    },
-    "gh_424c8eeabced": {
-        "uid": 69637522,
-        "nick_name": "认命"
-    },
-    "gh_4568b5a7e2fe": {
-        "uid": 69637482,
-        "nick_name": "香腮"
-    },
-    "gh_45beb952dc74": {
-        "uid": 69637488,
-        "nick_name": "毋庸"
-    },
-    "gh_484de412b0ef": {
-        "uid": 69637481,
-        "nick_name": "婪"
-    },
-    "gh_4c058673c07e": {
-        "uid": 69637474,
-        "nick_name": "影帝"
-    },
-    "gh_538f78f9d3aa": {
-        "uid": 69637478,
-        "nick_name": "伤痕"
-    },
-    "gh_56a6765df869": {
-        "uid": 69637514,
-        "nick_name": "风月"
-    },
-    "gh_56ca3dae948c": {
-        "uid": 69637538,
-        "nick_name": "留下太多回忆"
-    },
-    "gh_5e543853d8f0": {
-        "uid": 69637543,
-        "nick_name": "不知春秋"
-    },
-    "gh_5ff48e9fb9ef": {
-        "uid": 69637494,
-        "nick_name": "寻她找他"
-    },
-    "gh_671f460c856c": {
-        "uid": 69637523,
-        "nick_name": "绝不改悔"
-    },
-    "gh_6b7c2a257263": {
-        "uid": 69637528,
-        "nick_name": "奶牙"
-    },
-    "gh_6d205db62f04": {
-        "uid": 69637509,
-        "nick_name": "怕羞"
-    },
-    "gh_6d9f36e3a7be": {
-        "uid": 69637498,
-        "nick_name": "望长安"
-    },
-    "gh_73be0287bb94": {
-        "uid": 69637537,
-        "nick_name": "戏剧"
-    },
-    "gh_744cb16f6e16": {
-        "uid": 69637505,
-        "nick_name": "反駁"
-    },
-    "gh_7b4a5f86d68c": {
-        "uid": 69637477,
-        "nick_name": "我很想你"
-    },
-    "gh_7bca1c99aea0": {
-        "uid": 69637511,
-        "nick_name": "从小就很傲"
-    },
-    "gh_7e5818b2dd83": {
-        "uid": 69637532,
-        "nick_name": "二八佳人"
-    },
-    "gh_89ef4798d3ea": {
-        "uid": 69637533,
-        "nick_name": "彼岸花"
-    },
-    "gh_901b0d722749": {
-        "uid": 69637518,
-        "nick_name": "深情不为我"
-    },
-    "gh_9161517e5676": {
-        "uid": 69637495,
-        "nick_name": "折磨"
-    },
-    "gh_93e00e187787": {
-        "uid": 69637504,
-        "nick_name": "理会"
-    },
-    "gh_9877c8541764": {
-        "uid": 69637506,
-        "nick_name": "我沿着悲伤"
-    },
-    "gh_9cf3b7ff486b": {
-        "uid": 69637492,
-        "nick_name": "hoit"
-    },
-    "gh_9e559b3b94ca": {
-        "uid": 69637471,
-        "nick_name": "我与你相遇"
-    },
-    "gh_9f8dc5b0c74e": {
-        "uid": 69637496,
-        "nick_name": "港口"
-    },
-    "gh_a182cfc94dad": {
-        "uid": 69637539,
-        "nick_name": "四海八荒"
-    },
-    "gh_a2901d34f75b": {
-        "uid": 69637535,
-        "nick_name": "听腻了谎话"
-    },
-    "gh_a307072c04b9": {
-        "uid": 69637521,
-        "nick_name": "踏步"
-    },
-    "gh_a6351b447819": {
-        "uid": 69637540,
-        "nick_name": "七猫酒馆"
-    },
-    "gh_ac43e43b253b": {
-        "uid": 69637499,
-        "nick_name": "一厢情愿"
-    },
-    "gh_adca24a8f429": {
-        "uid": 69637483,
-        "nick_name": "对你何止一句喜欢"
-    },
-    "gh_b15de7c99912": {
-        "uid": 69637536,
-        "nick_name": "糖炒板栗"
-    },
-    "gh_b32125c73861": {
-        "uid": 69637493,
-        "nick_name": "发尾"
-    },
-    "gh_b3ffc1ca3a04": {
-        "uid": 69637546,
-        "nick_name": "主宰你心"
-    },
-    "gh_b8baac4296cb": {
-        "uid": 69637489,
-        "nick_name": "生性"
-    },
-    "gh_b9b99173ff8a": {
-        "uid": 69637524,
-        "nick_name": "养一只月亮"
-    },
-    "gh_bd57b6978e06": {
-        "uid": 69637527,
-        "nick_name": "厌遇"
-    },
-    "gh_be8c29139989": {
-        "uid": 69637502,
-        "nick_name": "不负"
-    },
-    "gh_bfe5b705324a": {
-        "uid": 69637529,
-        "nick_name": "乐极"
-    },
-    "gh_bff0bcb0694a": {
-        "uid": 69637534,
-        "nick_name": "简迷离"
-    },
-    "gh_c69776baf2cd": {
-        "uid": 69637512,
-        "nick_name": "骄纵"
-    },
-    "gh_c91b42649690": {
-        "uid": 69637503,
-        "nick_name": "荟萃"
-    },
-    "gh_d2cc901deca7": {
-        "uid": 69637487,
-        "nick_name": "恶意调笑"
-    },
-    "gh_d5f935d0d1f2": {
-        "uid": 69637500,
-        "nick_name": "青少年哪吒"
-    },
-    "gh_da76772d8d15": {
-        "uid": 69637526,
-        "nick_name": "独揽风月"
-    },
-    "gh_de9f9ebc976b": {
-        "uid": 69637475,
-        "nick_name": "剑出鞘恩怨了"
-    },
-    "gh_e0eb490115f5": {
-        "uid": 69637486,
-        "nick_name": "赋别"
-    },
-    "gh_e24da99dc899": {
-        "uid": 69637484,
-        "nick_name": "恋雨夏季"
-    },
-    "gh_e2576b7181c6": {
-        "uid": 69637515,
-        "nick_name": "满天星"
-    },
-    "gh_e75dbdc73d80": {
-        "uid": 69637542,
-        "nick_name": "情战"
-    },
-    "gh_e9d819f9e147": {
-        "uid": 69637525,
-        "nick_name": "与卿"
-    },
-    "gh_efaf7da157f5": {
-        "uid": 69637547,
-        "nick_name": "心野性子浪"
-    },
-    "gh_f4594783f5b8": {
-        "uid": 69637544,
-        "nick_name": "自缚"
-    },
-    "gh_fe6ef3a65a48": {
-        "uid": 69637480,
-        "nick_name": "风间"
-    }
-}
+from applications.functions.log import logging
+from applications.functions.common import MySQLServer
 
 
 async def recall_videos(trace_id, s_videos):
@@ -322,7 +20,7 @@ async def recall_videos(trace_id, s_videos):
 
     # 在两边召回视频
     # pq_videos
-    recall_video_list = select_pq_videos()
+    recall_video_list = MySQLServer().select_pq_videos()
     # dirs_1 = os.path.join(os.getcwd(), 'applications', 'static', 'out_videos')
     # file_list = [os.path.join(dirs_1, "{}.json".format(vid)) for vid in s_videos]
     # search_list = []

+ 5 - 6
applications/routes.py

@@ -7,10 +7,9 @@ import uuid
 import asyncio
 from quart import Blueprint, jsonify, request
 
-from applications.log import logging
-from applications.process import ProcessParams
-from applications.search import search_videos
-from applications.functions.common import find_videos_in_mysql, ask_kimi_and_save_to_local
+from applications.functions.log import logging
+from applications.schedule import ProcessParams, search_videos
+from applications.functions.common import KimiServer, MySQLServer
 
 my_blueprint = Blueprint('kimi', __name__)
 
@@ -57,7 +56,7 @@ async def search_videos_from_the_web():
                 trace_id=trace_id
             )
         else:
-            ask_kimi_and_save_to_local((title, trace_id, title_p))
+            KimiServer().ask_kimi_and_save_to_local((title, trace_id, title_p))
         await asyncio.sleep(2)
         search_videos(
             title=title,
@@ -93,7 +92,7 @@ async def find_in_mysql():
         trace_id=trace_id,
         function="find_in_mysql"
     )
-    res = find_videos_in_mysql(trace_id=trace_id)
+    res = MySQLServer().select_download_videos(trace_id=trace_id)
     return jsonify(res)
 
 

+ 5 - 0
applications/schedule/__init__.py

@@ -0,0 +1,5 @@
+"""
+@author: luojunhui
+"""
+from .process_schedule import ProcessParams
+from .search_schedule import search_videos

+ 0 - 0
applications/process.py → applications/schedule/process_schedule.py


+ 7 - 7
applications/search.py → applications/schedule/search_schedule.py

@@ -6,11 +6,11 @@ import json
 import time
 import requests
 
-from applications.mq import MQ
-from applications.log import logging
-from applications.config import gh_id_dict
+from applications.functions.mq import MQ
+from applications.functions.log import logging
+from applications.static.config import gh_id_dict
 from applications.functions.item import VideoItem
-from applications.functions.common import sensitive_flag
+from applications.functions.common import Functions
 
 
 def wx_search(keys):
@@ -93,7 +93,7 @@ def return_video(video_path, title, trace_id):
                     title = obj['items'][0]['title'].replace('<em class=\"highlight\">', '').replace('</em>',
                                                                                                  '').replace("#",
                                                                                                         "")
-                    if sensitive_flag(title):
+                    if Functions().sensitive_flag(title):
                         return obj
                     else:
                         continue
@@ -116,7 +116,7 @@ def return_video(video_path, title, trace_id):
                     title = obj['items'][0]['title'].replace('<em class=\"highlight\">', '').replace('</em>',
                                                                                                      '').replace("#",
                                                                                                                  "")
-                    if sensitive_flag(title):
+                    if Functions().sensitive_flag(title):
                         return obj
                     else:
                         continue
@@ -139,7 +139,7 @@ def return_video(video_path, title, trace_id):
                     title = obj['items'][0]['title'].replace('<em class=\"highlight\">', '').replace('</em>',
                                                                                                      '').replace("#",
                                                                                                                  "")
-                    if sensitive_flag(title):
+                    if Functions().sensitive_flag(title):
                         return obj
                     else:
                         continue

+ 6 - 0
applications/search/__init__.py

@@ -0,0 +1,6 @@
+"""
+@author: luojunhui
+"""
+from .hksp_search import hksp_search
+from .weixin_search import wx_search
+from .xigua_search import xigua_search

+ 94 - 0
applications/search/hksp_search.py

@@ -0,0 +1,94 @@
+"""
+@author: luojunhui
+好看视频搜索爬虫
+"""
+import requests
+import urllib.parse
+import time
+import hashlib
+
+
+def get_video_detail(video_id):
+    """
+    获取好看视频的视频链接
+    :param video_id:
+    :return:
+    """
+    url = "https://haokan.baidu.com/v"
+    params = {
+        'vid': video_id,
+        '_format': 'json',
+        # 'hk_nonce': 'f47386e95fe657182aa3c1826d9a6b85',
+        # 'hk_timestamp': '1715225386',
+        # 'hk_sign': '4b219f5e3971e42b3e23dc2a209fc9d9',
+        # 'hk_token': 'Dg8DdAVwdwNzDHcFcXF+D3gHBQA'
+    }
+
+    headers = {
+        'Accept': '*/*',
+        'cookie': "BIDUPSID='",
+        'Accept-Language': 'en,zh;q=0.9,zh-CN;q=0.8',
+        'Cache-Control': 'no-cache',
+        'Connection': 'keep-alive',
+        'Content-Type': 'application/x-www-form-urlencoded',
+        'Referer': 'https://haokan.baidu.com',
+        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
+    }
+    response = requests.request("GET", url, headers=headers, params=params).json()
+    # print(json.dumps(response['data']['apiData']['curVideoMeta'], ensure_ascii=False, indent=4))
+    return response['data']['apiData']['curVideoMeta']
+
+
+def hksp_search(key):
+    """
+    好看视频搜索爬虫
+    """
+    timestamp_seconds = time.time()
+    timestamp_milliseconds = int(timestamp_seconds * 1000)
+    url = 'https://haokan.baidu.com/haokan/ui-search/pc/search/video'
+    # 定义请求的参数
+    strings = "{}_{}_{}_{}_{}".format(1, urllib.parse.quote(key), 10, timestamp_milliseconds, 1)
+    sign = hashlib.md5(strings.encode()).hexdigest()
+    params = {
+        'pn': 1,
+        'rn': 10,
+        'type': 'video',
+        'query': key,
+        'sign': sign,
+        'version': 1,
+        'timestamp': timestamp_milliseconds
+    }
+    # 定义请求头
+    headers = {
+        'authority': 'haokan.baidu.com',
+        'accept': '*/*',
+        'accept-language': 'zh,en;q=0.9,zh-CN;q=0.8',
+        'cookie': "BIDUPSID='",
+        # 'referer': 'https://haokan.baidu.com/web/search/page?query=%E8%80%81%E4%BA%BA',
+        'sec-ch-ua': '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
+        'sec-ch-ua-mobile': '?0',
+        'sec-ch-ua-platform': '"macOS"',
+        'sec-fetch-dest': 'empty',
+        'sec-fetch-mode': 'cors',
+        'sec-fetch-site': 'same-origin',
+        'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
+        'x-requested-with': 'xmlhttprequest',
+    }
+    # 发送GET请求
+    response = requests.get(url, headers=headers, params=params).json()
+    # print(json.dumps(response, ensure_ascii=False, indent=4))
+    data_list = response['data']['list']
+    L = []
+    for data in data_list[:5]:
+        try:
+            video_id = data['vid']
+            res = get_video_detail(video_id)
+            temp = ["haokanshipin", res['title'], res['playurl'], "https://haokan.baidu.com/v?vid={}".format(video_id)]
+            L.append(temp)
+        except:
+            pass
+    return L
+
+
+if __name__ == '__main__':
+    hksp_search("美国竟对中国提出4个荒唐的条件,真是好大的口气")

+ 24 - 0
applications/search/weixin_search.py

@@ -0,0 +1,24 @@
+"""
+@author: luojunhui
+"""
+import json
+import requests
+
+
+def wx_search(keys):
+    """
+    WeChat search
+    :param keys:
+    :return:
+    """
+    url = "http://8.217.190.241:8888/crawler/wei_xin/keyword"
+    payload = json.dumps({
+        "keyword": keys,
+        "cursor": "0",
+        "content_type": "video"
+    })
+    headers = {
+        'Content-Type': 'application/json'
+    }
+    response = requests.request("POST", url, headers=headers, data=payload)
+    return response.json()

+ 238 - 0
applications/search/xigua_search.py

@@ -0,0 +1,238 @@
+"""
+@author: luojunhui
+西瓜视频搜索爬虫
+"""
+import re
+import json
+import time
+import random
+import base64
+import urllib.parse
+
+import requests
+from lxml import etree
+from Crypto.Cipher import AES
+from Crypto.Util.Padding import unpad
+from fake_useragent import FakeUserAgent
+
+
+def byte_dance_cookie(item_id):
+    """
+    获取西瓜视频的 cookie
+    :param item_id:
+    """
+    sess = requests.Session()
+    sess.headers.update({
+        'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
+        'referer': 'https://www.ixigua.com/home/{}/'.format(item_id),
+    })
+
+    # 获取 cookies
+    sess.get('https://i.snssdk.com/slardar/sdk.js?bid=xigua_video_web_pc')
+    data = '{"region":"cn","aid":1768,"needFid":false,"service":"www.ixigua.com","migrate_info":{"ticket":"","source":"node"},"cbUrlProtocol":"https","union":true}'
+    r = sess.post('https://ttwid.bytedance.com/ttwid/union/register/', data=data)
+    # print(r.text)
+    return r.cookies.values()[0]
+
+
+def aes_decrypt(data: str, key: str) -> str:
+    """
+    XiGua AES decrypt
+    :param data:
+    :param key:
+    :return:
+    """
+    password = key.encode()
+    iv = password[:16]
+    try:
+        ct = base64.b64decode(data.encode())
+        cipher = AES.new(password, AES.MODE_CBC, iv)
+        pt = unpad(cipher.decrypt(ct), AES.block_size)
+        return base64.b64decode(pt).decode()
+    except Exception as e:
+        print("Incorrect decryption {}".format(e))
+        return None
+
+
+def extract_video_url(text):
+    """
+    获取视频 video_url
+    :param text:
+    :return:
+    """
+    HTML = etree.HTML(text)
+    str_2 = HTML.xpath('//script[@id="SSR_HYDRATED_DATA"]/text()')[0]
+    json_2 = str_2[str_2.find('{'):str_2.rfind('}') + 1]
+    Irregulars = ['null', 'undefined', '=false', '=true', 'false', 'true']
+    # python中不规则的定义
+    for I in Irregulars:
+        if I in ['=false', '=true']:
+            json_2 = json_2.replace(I, '=' + I[1:].capitalize())
+        else:
+            json_2 = json_2.replace(I, '12')
+    dict_2 = json.loads(json_2)["anyVideo"]["gidInformation"]["packerData"]["video"]["videoResource"]
+    if dict_2['dash'] == 12:
+        obj = dict_2['normal']
+        ptk = obj['ptk']
+        main_url = obj['video_list']['video_3']['main_url']
+        real_video_url = aes_decrypt(data=main_url, key=ptk)
+    else:
+        obj = dict_2['dash']
+        ptk = obj["ptk"]
+        video_url = obj['dynamic_video']['main_url']
+        real_video_url = aes_decrypt(data=video_url, key=ptk)
+    return real_video_url
+
+
+def extract_info_by_re(text):
+    """
+    通过正则表达式获取文本中的信息
+    :param text:
+    :return:
+    """
+    # 标题
+    title_match = re.search(r'<title[^>]*>(.*?)</title>', text)
+    if title_match:
+        title_content = title_match.group(1)
+        title_content = title_content.split(" - ")[0]
+        title_content = bytes(title_content, "latin1").decode()
+    else:
+        title_content = ""
+
+    # video_id
+    video_id = re.search(r'"vid":"(.*?)"', text).group(1)
+
+    # like_count
+    like_count = re.search(r'"video_like_count":(.*?),', text).group(1)
+
+    # cover_url
+    cover_url = re.search(r'"avatar_url":"(.*?)"', text).group(1)
+
+    # video_play
+    video_watch_count = re.search(r'"video_watch_count":(.*?),', text).group(1)
+
+    # "video_publish_time"
+    publish_time = re.search(r'"video_publish_time":"(.*?)"', text).group(1)
+
+    # video_duration
+    duration = re.search(r'("video_duration":)(.*?)"', text).group(2).replace(",", "")
+
+    return {
+        "title": title_content,
+        "url": extract_video_url(text),
+        "video_id": video_id,
+        "like_count": like_count,
+        "cover_url": cover_url,
+        "play_count": video_watch_count,
+        "publish_time": publish_time,
+        "duration": duration
+    }
+
+
+def byte_dance_cookie(item_id):
+    """
+    获取西瓜视频的 cookie
+    :param item_id:
+    """
+    sess = requests.Session()
+    sess.headers.update({
+        'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
+        'referer': 'https://www.ixigua.com/home/{}/'.format(item_id),
+    })
+
+    # 获取 cookies
+    sess.get('https://i.snssdk.com/slardar/sdk.js?bid=xigua_video_web_pc')
+    data = '{"region":"cn","aid":1768,"needFid":false,"service":"www.ixigua.com","migrate_info":{"ticket":"","source":"node"},"cbUrlProtocol":"https","union":true}'
+    r = sess.post('https://ttwid.bytedance.com/ttwid/union/register/', data=data)
+    # print(r.text)
+    return r.cookies.values()[0]
+
+
+def get_video_info(item_id):
+    """
+    获取视频信息
+    """
+    url = "https://www.ixigua.com/{}".format(item_id)
+    headers = {
+        "accept-encoding": "gzip, deflate",
+        "accept-language": "zh-CN,zh-Hans;q=0.9",
+        "cookie": "ttwid={}".format(byte_dance_cookie(item_id)),
+        "user-agent": FakeUserAgent().random,
+        "referer": "https://www.ixigua.com/{}/".format(item_id),
+    }
+    response = requests.get(
+        url=url,
+        headers=headers,
+        # proxies=tunnel_proxies(),
+        timeout=5,
+    )
+    time.sleep(random.randint(1, 5))
+    video_info = extract_info_by_re(response.text)
+
+    video_dict = {
+        "video_title": video_info.get("title", ""),
+        "video_id": video_info.get("video_id"),
+        "gid": str(item_id),
+        "play_cnt": int(video_info.get("play_count", 0)),
+        "like_cnt": int(video_info.get("like_count", 0)),
+        "comment_cnt": 0,
+        "share_cnt": 0,
+        "favorite_cnt": 0,
+        "duration": int(video_info.get("duration", 0)),
+        "video_width": 0,
+        "video_height": 0,
+        "publish_time_stamp": int(video_info.get("publish_time", 0)),
+        "publish_time_str": time.strftime(
+            "%Y-%m-%d %H:%M:%S",
+            time.localtime(int(video_info.get("publish_time", 0))),
+        ),
+        "avatar_url": str(
+            video_info.get("user_info", {}).get("avatar_url", "")
+        ),
+        "cover_url": video_info.get("cover_url", "").replace("\\u002F", "/"),
+        "video_url": video_info.get("url"),
+        "session": f"xigua-author-{int(time.time())}",
+    }
+    return video_dict
+
+
+def xigua_search(keyword):
+    """
+    搜索
+    """
+    keyword = urllib.parse.quote(keyword)
+    base_url = "https://www.ixigua.com/search/{}/ab_name=search&fss=input".format(
+        keyword
+    )
+    headers = {
+        "authority": "www.ixigua.com",
+        "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
+        "accept-language": "zh,en;q=0.9,zh-CN;q=0.8",
+        "cache-control": "max-age=0",
+        "cookie": "ixigua-a-s=1; support_webp=true; support_avif=true; csrf_session_id=a5355d954d3c63ed1ba35faada452b4d; tt_scid=Ur23fgYD2pMJOvi1BpILyfaobg8wA7IhGwmQx260ULRa8Dvjaxc5ZA63BUIP-6Vi473f; ttwid=1%7CNtTtSp4Iej-v0nWtepdZH3d3Ts6uGNMFzTN20ps1cdo%7C1708236945%7Cc1f301c64aa3bf69cdaa41f28856e2bb7b7eed16583f8c92d50cffa2d9944fc6; msToken=rr418opQf04vm8n9s8FAGdr1AoCUsvAOGKSDPbBEfwVS1sznxxZCvcZTI93qVz5uAXlX9yRwcKlNQZ4wMro2DmlHw5yWHAVeKr_SzgO1KtVVnjUMTUNEux_cq1-EIkI=",
+        "sec-ch-ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
+        "sec-ch-ua-mobile": "?0",
+        "sec-ch-ua-platform": '"macOS"',
+        "sec-fetch-dest": "document",
+        "sec-fetch-mode": "navigate",
+        "sec-fetch-site": "none",
+        "sec-fetch-user": "?1",
+        "upgrade-insecure-requests": "1",
+        "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
+    }
+    basic_response = requests.get(url=base_url, headers=headers)
+    html = etree.HTML(basic_response.text)
+    result = html.xpath(
+        '//a[@class="HorizontalFeedCard__coverWrapper disableZoomAnimation"]/@href'
+    )
+    res_list = []
+    for page_id in result[:5]:
+        doc_id = page_id[1:].split("?")[0]
+        try:
+            res = get_video_info(doc_id)
+            temp = ["xigua", res['video_title'], res['video_url'], "https://www.ixigua.com/{}".format(doc_id)]
+            res_list.append(temp)
+        except:
+            pass
+    return res_list
+

+ 0 - 0
applications/config.py → applications/static/config.py


+ 2 - 0
requirements.txt

@@ -102,3 +102,5 @@ wsproto==1.2.0
 WTForms==3.1.2
 yarl==1.9.4
 zipp==3.16.2
+
+lxml~=5.2.1

+ 23 - 38
test.py

@@ -1,44 +1,29 @@
-"""
-@author: luojunhui
-"""
+# encoding: utf-8
 import json
 import time
-import requests
-import argparse
-from concurrent.futures import ThreadPoolExecutor
 
+import requests
 
-def request_data(url):
-    # index = _url.split("#")[0]
-    # url = _url.split("#")[1]
-    body = {
-        "accountName": "魔法美学馆",
-        "content": "8月20日,最高人民法院举行新闻发布会,发布新修订的《最高人民法院关于审理民间借贷案件适用法律若干问题的规定》(以下简称《规定》)并回答记者提问。",
-        "title": "🔴日本收到俄罗斯令人惊慌的消息😱",
-        "search_keys": ["日本核污水排海"],
-        "ghId": "gh_efaf7da157f5"
-    }
-    t = time.time()
-    res = requests.post(url, json=body)
-    e = time.time()
-    # print(index)
-    print(e - t)
-    print(json.dumps(res.json(), ensure_ascii=False, indent=4))
-    # print(res.json())
-
+url = "http://61.48.133.26:8111/title_to_video"
+body = {
+    "accountName": "票圈极速版",
+    "content": "",
+    "cover": "",
+    "ghId": "gh_d2cc901deca7",
+    "title": "江泽民"
+}
+a = time.time()
+header = {
+    "Content-Type": "application/json",
+}
 
-if __name__ == "__main__":
-    # parser = argparse.ArgumentParser()  # 新建参数解释器对象
-    # parser.add_argument("--thread")
-    # args = parser.parse_args()
-    # thread = int(args.thread)
-    dt = ["http://61.48.133.26:8111/title_to_video"]
-    # total_s = time.time()
-    request_data(dt[0])
-    # with ThreadPoolExecutor(max_workers=thread) as pool:
-    #     pool.map(request_data, dt)
-    # total_e = time.time()
-    # print(total_e - total_s)
-import uuid
-import urllib.parse
+response = requests.post(url, json=body, headers=header, timeout=60)
+b = time.time()
+print(b - a)
+print(json.dumps(response.json(), ensure_ascii=False, indent=4))
 
+"""
+curl --location 'http://61.48.133.26:8111/title_to_video' \
+--header 'Content-Type: application/json' \
+--data '{"accountName":"魔法美学馆","ghId":"gh_d2cc901deca7","content":"\n8月20日,最高人民法院举行新闻发布会,发布新修订的《最高人民法院关于审理民间借贷案件适用法律若干问题的规定》(以下简称《规定》)并回答记者提问。","title":"最高法发布新修订的《关于审理民间借贷案件适用法律若干问题的规定》(附全文)"}'
+"""