Bladeren bron

2024-0514-搜索策略实验上线

罗俊辉 1 jaar geleden
bovenliggende
commit
1db0bd31ec

+ 1 - 115
applications/functions/common.py

@@ -1,14 +1,13 @@
+# encoding: utf-8
 """
 @author: luojunhui
 """
-import os
 import json
 import time
 import uuid
 import requests
 import pymysql
 import urllib.parse
-from openai import OpenAI
 
 from applications.functions.log import logging
 
@@ -257,116 +256,3 @@ class MySQLServer(object):
         else:
             return out_video_list[0][0]
 
-
-class KimiServer(object):
-    """
-    Kimi Server
-    """
-
-    @classmethod
-    def ask_kimi(cls, question):
-        """
-        Ask Kimi for information
-        :param question: tiny text
-        :return: "{}"
-        """
-        single_title_prompt = """
-            我会给你一个视频标题,需要你帮我用你所学的知识来帮我分析出以下信息,信息我都写到 json 里面了
-            {
-                "key_words": [],  # 返回三个关键词
-                "search_keys": [], # 标题可能的搜索关键词,返回 3 个
-                "extra_keys": [], # 关心这个视频的用户还会关心哪些关键词, 返回 3 个
-                "theme": 标题的主题, 用一个词概括
-            }
-            只需要返回一个 json,key 和上面的一样,
-            我给你的标题是: 
-            """
-        client = OpenAI(
-            api_key='sk-tz1VaKqksTzk0F8HxlU4YVGwj7oa1g0c0puGNUZrdn9MDtzm',
-            base_url="https://api.moonshot.cn/v1"
-        )
-        chat_completion = client.chat.completions.create(
-            messages=[
-                {
-                    "role": "user",
-                    "content": single_title_prompt + question,
-                }
-            ],
-            model="moonshot-v1-8k",
-        )
-        response = chat_completion.choices[0].message.content.replace('```json', '').replace('```', '')
-        try:
-            response = json.loads(response)
-            return response
-        except:
-            return {}
-
-    @classmethod
-    def ask_kimi_and_save_to_local(cls, info_tuple):
-        """
-        save file to local
-        :return:
-        """
-        title, trace_id, save_path = info_tuple[0], info_tuple[1], info_tuple[2]
-        if os.path.exists(save_path):
-            logging(
-                code="2001",
-                info="该 video 信息已经挖掘完成---{}".format(title),
-                function="ask_kimi_and_save_to_local",
-                trace_id=trace_id,
-            )
-        else:
-            os.makedirs(os.path.dirname(save_path), exist_ok=True)
-            if not title:
-                result = {}
-            else:
-                result = cls.ask_kimi(title)
-            logging(
-                code="2001",
-                info="kimi-result",
-                data=result,
-                trace_id=trace_id,
-                function="ask_kimi_and_save_to_local"
-            )
-            with open(save_path, "w", encoding="utf-8") as f:
-                f.write(json.dumps(result, ensure_ascii=False))
-
-    @classmethod
-    def kimi_title(cls, ori_title):
-        """
-        prompt + kimi + ori_title generate new title
-        :param ori_title:
-        :return:
-        """
-        single_title_prompt = """
-        请将以上标题改写成适合小程序点击和传播的小程序标题,小程序标题的写作规范如下,请学习后进行小程序标题的编写。直接输出最终的小程序标题
-        小程序标题写作规范:
-        1.要点前置:将最重要的信息放在标题的最前面,以快速吸引读者的注意力。例如,“5月一辈子同学,三辈子亲,送给我的老同学,听哭无数人!”中的“5月”和“一辈子同学,三辈子亲”都是重要的信息点。
-        2.激发情绪:使用能够触动人心的语言,激发读者的情感共鸣。如“只剩两人同学聚会,看后感动落泪。”使用“感动落泪”激发读者的同情和怀旧情绪。
-        3.使用数字和特殊符号:数字可以提供具体性,而特殊符号如“🔴”、“😄”、“🔥”等可以吸引视觉注意力,增加点击率。
-        4.悬念和好奇心:创建悬念或提出问题,激发读者的好奇心。例如,“太神奇了!长江水位下降,重庆出现惊奇一幕!”中的“惊奇一幕”就是一个悬念。
-        5.名人效应:如果内容与知名人士相关,提及他们的名字可以增加标题的吸引力。
-        6.社会价值观:触及读者的文化和社会价值观,如家庭、友情、国家荣誉等。
-        7.标点符号的运用:使用感叹号、问号等标点来增强语气和情感表达。
-        8.直接的语言:使用直白、口语化的语言,易于理解,如“狗屁股,笑死我了!”。
-        9.热点人物或事件:提及当前的热点人物或事件,利用热点效应吸引读者。
-        10.字数适中:保持标题在10-20个字之间,既不过长也不过短,确保信息的完整性和吸引力。
-        11.适当的紧迫感:使用“最新”、“首次”、“紧急”等词汇,创造一种紧迫感,促使读者立即行动。
-        12.情感或价值诉求:使用如“感动”、“泪目”、“经典”等词汇,直接与读者的情感或价值观产生共鸣。
-        避免误导:确保标题准确反映内容,避免夸大或误导读者。
-        """
-        client = OpenAI(
-            api_key='sk-tz1VaKqksTzk0F8HxlU4YVGwj7oa1g0c0puGNUZrdn9MDtzm',
-            base_url="https://api.moonshot.cn/v1"
-        )
-        chat_completion = client.chat.completions.create(
-            messages=[
-                {
-                    "role": "user",
-                    "content": ori_title + "\n" + single_title_prompt,
-                }
-            ],
-            model="moonshot-v1-8k",
-        )
-        response = chat_completion.choices[0].message.content
-        return response

+ 190 - 0
applications/functions/kimi.py

@@ -0,0 +1,190 @@
+"""
+@author: luojunhui
+"""
+import os
+import json
+from openai import OpenAI
+
+from applications.functions.log import logging
+
+
+class KimiServer(object):
+    """
+    Kimi Server
+    """
+
+    @classmethod
+    async def search_kimi_schedule(cls, params):
+        """
+        搜索阶段 kimi 操作
+        :param params:
+        :return:
+        """
+        title = params['title'].split("@@")[-1]
+        contents = params['content']
+        trace_id = params['trace_id']
+        title_p = os.path.join(os.getcwd(), 'applications', 'static', "titles", "{}.json".format(title))
+        if os.path.exists(title_p):
+            logging(
+                code="2001",
+                info="该标题已经被 kimi 处理过,跳过请求 kimi 操作--- {}".format(title),
+                function="search_videos_from_the_web",
+                trace_id=trace_id
+            )
+        else:
+            await cls.ask_kimi_and_save_to_local((title, trace_id, title_p))
+        kimi_title = await cls.kimi_title(title)
+        kimi_info = await cls.kimi_mining(contents)
+        kimi_info['k_title'] = kimi_title
+        kimi_info['ori_title'] = title
+        logging(
+            code="8000",
+            info="kimi_mining",
+            data=kimi_info,
+            trace_id=trace_id
+        )
+        return kimi_info
+
+    @classmethod
+    async def ask_kimi(cls, question):
+        """
+        Ask Kimi for information
+        :param question: tiny text
+        :return: "{}"
+        """
+        single_title_prompt = """
+            我会给你一个视频标题,需要你帮我用你所学的知识来帮我分析出以下信息,信息我都写到 json 里面了
+            {
+                "key_words": [],  # 返回三个关键词
+                "search_keys": [], # 标题可能的搜索关键词,返回 3 个
+                "extra_keys": [], # 关心这个视频的用户还会关心哪些关键词, 返回 3 个
+                "theme": 标题的主题, 用一个词概括
+            }
+            只需要返回一个 json,key 和上面的一样,
+            我给你的标题是: 
+            """
+        client = OpenAI(
+            api_key='sk-tz1VaKqksTzk0F8HxlU4YVGwj7oa1g0c0puGNUZrdn9MDtzm',
+            base_url="https://api.moonshot.cn/v1"
+        )
+        chat_completion = client.chat.completions.create(
+            messages=[
+                {
+                    "role": "user",
+                    "content": single_title_prompt + question,
+                }
+            ],
+            model="moonshot-v1-8k",
+        )
+        response = chat_completion.choices[0].message.content.replace('```json', '').replace('```', '')
+        try:
+            response = json.loads(response)
+            return response
+        except:
+            return {}
+
+    @classmethod
+    async def ask_kimi_and_save_to_local(cls, info_tuple):
+        """
+        save file to local
+        :return:
+        """
+        title, trace_id, save_path = info_tuple[0], info_tuple[1], info_tuple[2]
+        if os.path.exists(save_path):
+            logging(
+                code="2001",
+                info="该 video 信息已经挖掘完成---{}".format(title),
+                function="ask_kimi_and_save_to_local",
+                trace_id=trace_id,
+            )
+        else:
+            os.makedirs(os.path.dirname(save_path), exist_ok=True)
+            if not title:
+                result = {}
+            else:
+                result = await cls.ask_kimi(title)
+            logging(
+                code="2001",
+                info="kimi-result",
+                data=result,
+                trace_id=trace_id,
+                function="ask_kimi_and_save_to_local"
+            )
+            with open(save_path, "w", encoding="utf-8") as f:
+                f.write(json.dumps(result, ensure_ascii=False))
+
+    @classmethod
+    async def kimi_title(cls, ori_title):
+        """
+        prompt + kimi + ori_title generate new title
+        :param ori_title:
+        :return:
+        """
+        single_title_prompt = """
+        请将以上标题改写成适合小程序点击和传播的小程序标题,小程序标题的写作规范如下,请学习后进行小程序标题的编写。直接输出最终的小程序标题
+        小程序标题写作规范:
+        1.要点前置:将最重要的信息放在标题的最前面,以快速吸引读者的注意力。例如,“5月一辈子同学,三辈子亲,送给我的老同学,听哭无数人!”中的“5月”和“一辈子同学,三辈子亲”都是重要的信息点。
+        2.激发情绪:使用能够触动人心的语言,激发读者的情感共鸣。如“只剩两人同学聚会,看后感动落泪。”使用“感动落泪”激发读者的同情和怀旧情绪。
+        3.使用数字和特殊符号:数字可以提供具体性,而特殊符号如“🔴”、“😄”、“🔥”等可以吸引视觉注意力,增加点击率。
+        4.悬念和好奇心:创建悬念或提出问题,激发读者的好奇心。例如,“太神奇了!长江水位下降,重庆出现惊奇一幕!”中的“惊奇一幕”就是一个悬念。
+        5.名人效应:如果内容与知名人士相关,提及他们的名字可以增加标题的吸引力。
+        6.社会价值观:触及读者的文化和社会价值观,如家庭、友情、国家荣誉等。
+        7.标点符号的运用:使用感叹号、问号等标点来增强语气和情感表达。
+        8.直接的语言:使用直白、口语化的语言,易于理解,如“狗屁股,笑死我了!”。
+        9.热点人物或事件:提及当前的热点人物或事件,利用热点效应吸引读者。
+        10.字数适中:保持标题在10-20个字之间,既不过长也不过短,确保信息的完整性和吸引力。
+        11.适当的紧迫感:使用“最新”、“首次”、“紧急”等词汇,创造一种紧迫感,促使读者立即行动。
+        12.情感或价值诉求:使用如“感动”、“泪目”、“经典”等词汇,直接与读者的情感或价值观产生共鸣。
+        避免误导:确保标题准确反映内容,避免夸大或误导读者。
+        """
+        client = OpenAI(
+            api_key='sk-tz1VaKqksTzk0F8HxlU4YVGwj7oa1g0c0puGNUZrdn9MDtzm',
+            base_url="https://api.moonshot.cn/v1"
+        )
+        chat_completion = client.chat.completions.create(
+            messages=[
+                {
+                    "role": "user",
+                    "content": ori_title + "\n" + single_title_prompt,
+                }
+            ],
+            model="moonshot-v1-8k",
+        )
+        response = chat_completion.choices[0].message.content
+        return response
+
+    @classmethod
+    async def kimi_mining(cls, text):
+        """
+        通过文章来挖掘出有效的信息
+        :param text:
+        :return:
+        """
+        text_prompt = """
+        请从我给你的文章中挖掘出以下信息并且返回如下结果。
+        你返回的结果是一个 json, 格式如下:
+        {
+            "content_keys": [] # 同时提供三个与文章内容高度相关的关键词,这些关键词将用于网络上搜索相关视频内容,
+            "content_title": 一个总结性的标题,该标题应简洁并能够反映文章的主要内容
+        }
+        你需要处理的文本是:
+        """
+        client = OpenAI(
+            api_key='sk-tz1VaKqksTzk0F8HxlU4YVGwj7oa1g0c0puGNUZrdn9MDtzm',
+            base_url="https://api.moonshot.cn/v1"
+        )
+        chat_completion = client.chat.completions.create(
+            messages=[
+                {
+                    "role": "user",
+                    "content": text_prompt + text,
+                }
+            ],
+            model="moonshot-v1-8k",
+        )
+        response = chat_completion.choices[0].message.content.replace('```json', '').replace('```', '')
+        try:
+            response = json.loads(response)
+            return response
+        except:
+            return {}

+ 1 - 1
applications/functions/video_item.py

@@ -202,7 +202,7 @@ class VideoProducer(object):
         return mq_obj
 
 
-def video_mq_sender(video_obj, user, trace_id, platform):
+async def video_mq_sender(video_obj, user, trace_id, platform):
     """
     异步处理微信 video_obj
     公众号和站内账号一一对应

+ 23 - 34
applications/routes.py

@@ -1,15 +1,15 @@
 """
 @author: luojunhui
 """
-import os
+import json
 import time
 import uuid
-import asyncio
 from quart import Blueprint, jsonify, request
 
 from applications.functions.log import logging
 from applications.schedule import ProcessParams, search_videos
-from applications.functions.common import KimiServer, MySQLServer
+from applications.functions.common import MySQLServer
+from applications.functions.kimi import KimiServer
 from applications.schedule.main_schedule import AskForInfo
 
 my_blueprint = Blueprint('kimi', __name__)
@@ -36,7 +36,7 @@ async def search_videos_from_the_web():
     :return:
     """
     params = await request.get_json()
-    title = params['title'].replace("【非头次】", "").replace("【头次】", "")
+    K = KimiServer()
     gh_id = params['ghId']
     trace_id = "search-{}-{}".format(str(uuid.uuid4()), str(int(time.time())))
     params['trace_id'] = trace_id
@@ -47,36 +47,25 @@ async def search_videos_from_the_web():
         function="search_videos_from_the_web",
         trace_id=trace_id
     )
-    try:
-        title_p = os.path.join(os.getcwd(), 'applications', 'static', "titles", "{}.json".format(title))
-        if os.path.exists(title_p):
-            logging(
-                code="2001",
-                info="该标题已经被 kimi 处理过,跳过请求 kimi 操作--- {}".format(title),
-                function="search_videos_from_the_web",
-                trace_id=trace_id
-            )
-        else:
-            KimiServer().ask_kimi_and_save_to_local((title, trace_id, title_p))
-        await asyncio.sleep(1)
-        kimi_title = KimiServer().kimi_title(title)
-        search_videos(
-            title=title,
-            video_path=title_p,
-            trace_id=trace_id,
-            gh_id=gh_id,
-        )
-        res = {
-            "trace_id": trace_id,
-            "code": 0,
-            "kimi_title": kimi_title
-        }
-    except Exception as e:
-        res = {
-            "trace_id": trace_id,
-            "code": 1,
-            "message": str(e)
-        }
+    # try:
+    kimi_info = await K.search_kimi_schedule(params=params)
+    await search_videos(
+        kimi_info=kimi_info,
+        trace_id=trace_id,
+        gh_id=gh_id
+    )
+    print(json.dumps(kimi_info, ensure_ascii=False, indent=4))
+    res = {
+        "trace_id": trace_id,
+        "code": 0,
+        "kimi_title": kimi_info['k_title']
+    }
+    # except Exception as e:
+    #     res = {
+    #         "trace_id": trace_id,
+    #         "code": 1,
+    #         "message": str(e)
+    #     }
     return jsonify(res)
 
 

+ 1 - 0
applications/schedule/main_schedule.py

@@ -25,6 +25,7 @@ class AskForInfo:
         url = "{}/title_to_search".format(self.base_url)
         body = {
             "title": self.params["title"],
+            "content": self.params['content'],
             "ghId": self.params["ghId"]
         }
         res = requests.post(url, json=body, timeout=120)

+ 161 - 58
applications/schedule/search_schedule.py

@@ -2,84 +2,191 @@
 @author: luojunhui
 调用接口在微信内搜索视频
 """
-import json
 
 from applications.search import *
-from applications.static.config import gh_id_dict
+from applications.static.config import gh_id_dict, ab_test_config
 from applications.functions.log import logging
 from applications.functions.video_item import video_mq_sender
 
 
-def recall_search_video(video_path, title, trace_id):
+class SearchABTest(object):
     """
-    search and send msg to ETL
-    :param trace_id:
-    :param title:  视频标题
-    :param video_path:  视频路径
-    :return:
+    搜索策略实验方案
     """
-    with open(video_path, encoding='utf-8') as f:
-        my_obj = json.loads(f.read())
-    if my_obj:
-        wx_result = wx_search(keys=title)
+
+    ori_title = None
+    article_summary = None
+    article_keys = None
+    gh_id = None
+    trace_id = None
+
+    def __init__(self, info, gh_id):
+        SearchABTest.set_class_properties(info, gh_id)
+
+    @classmethod
+    def set_class_properties(cls, info, gh_id):
+        """
+        初始化搜索策略实验类
+        :param info: kimi 挖掘的基本信息
+        :param gh_id: 公众号账号 id
+        :return:
+        """
+        cls.ori_title = info["ori_title"]
+        cls.article_summary = info["content_title"]
+        cls.article_keys = info["content_keys"]
+        cls.trace_id = info["trace_id"]
+        cls.gh_id = gh_id
+
+    @classmethod
+    def ab_0(cls):
+        """
+        默认搜索逻辑
+        :return:
+        """
+        wx_result = wx_search(keys=cls.ori_title)
         if wx_result:
-            return {
-                "platform": "wx_search",
-                "result": wx_result[0]
-            }
+            return {"platform": "wx_search", "result": wx_result[0]}
         else:
             logging(
                 code="7001",
-                info="通过微信搜索失败---{}".format(title),
-                trace_id=trace_id
+                info="通过微信搜索失败---{}".format(cls.ori_title),
+                trace_id=cls.trace_id,
             )
             # 微信搜不到的话,采用好看视频搜索
-            baidu_result = hksp_search(key=title)
+            baidu_result = hksp_search(key=cls.ori_title)
             if baidu_result:
-                return {
-                    "platform": "baidu_search",
-                    "result": baidu_result[0]
-                }
+                return {"platform": "baidu_search", "result": baidu_result[0]}
             else:
                 # 若好看视频未搜到,则采用西瓜搜索
                 logging(
                     code="7001",
-                    info="通过baidu搜索失败---{}".format(title),
-                    trace_id=trace_id
+                    info="通过baidu搜索失败---{}".format(cls.ori_title),
+                    trace_id=cls.trace_id,
+                )
+                return None
+
+    @classmethod
+    def ab_1(cls):
+        """
+        :return:
+        """
+        wx_result = wx_search(keys=cls.article_summary)
+        if wx_result:
+            return {"platform": "wx_search", "result": wx_result[0]}
+        else:
+            logging(
+                code="7001",
+                info="通过微信搜索失败---{}".format(cls.article_summary),
+                trace_id=cls.trace_id,
+            )
+            # 微信搜不到的话,采用好看视频搜索
+            baidu_result = hksp_search(key=cls.article_summary)
+            if baidu_result:
+                return {"platform": "baidu_search", "result": baidu_result[0]}
+            else:
+                # 若好看视频未搜到,则采用西瓜搜索
+                logging(
+                    code="7001",
+                    info="通过baidu搜索失败---{}".format(cls.article_summary),
+                    trace_id=cls.trace_id,
                 )
                 return None
-                # xigua_result = xigua_search(title)
-                # if xigua_result:
-                #     return {
-                #         "platform": "xg_search",
-                #         "result": xigua_result[0]
-                #     }
-                # else:
-                #     logging(
-                #         code="7001",
-                #         info="通过西瓜搜索失败---{}".format(title),
-                #         trace_id=trace_id
-                #     )
-                #     return None
-    else:
-        logging(
-            code="7000",
-            info="标题--{}--kimi 挖掘数据失败".format(title),
-            trace_id=trace_id
-        )
-        return None
 
+    @classmethod
+    def ab_2(cls):
+        """
+        ori_title + wx
+        :return:
+        """
+        wx_result = wx_search(keys=",".join(cls.article_keys))
+        if wx_result:
+            return {"platform": "wx_search", "result": wx_result[0]}
+        else:
+            logging(
+                code="7001",
+                info="通过微信搜索失败---{}".format(",".join(cls.article_keys)),
+                trace_id=cls.trace_id,
+            )
+            # 微信搜不到的话,采用好看视频搜索
+            baidu_result = hksp_search(key=",".join(cls.article_keys))
+            if baidu_result:
+                return {"platform": "baidu_search", "result": baidu_result[0]}
+            else:
+                # 若好看视频未搜到,则采用西瓜搜索
+                logging(
+                    code="7001",
+                    info="通过baidu搜索失败---{}".format(",".join(cls.article_keys)),
+                    trace_id=cls.trace_id,
+                )
+                return None
+
+    @classmethod
+    def ab_3(cls):
+        """
+        article_summary + baidu
+        :return:
+        """
+        result = hksp_search(key=cls.article_summary)
+        return {"platform": "baidu_search", "result": result[0] if result else []}
+
+    @classmethod
+    def ab_4(cls):
+        """
+        article_summary + weixin
+        :return:
+        """
+        result = wx_search(keys=cls.article_summary)
+        return {"platform": "wx_search", "result": result[0] if result else []}
 
-def search_videos(video_path, title, trace_id, gh_id):
+    @classmethod
+    def ab_5(cls):
+        """
+        article_keys + weixin
+        :return:
+        """
+        result = wx_search(keys=",".join(cls.article_keys))
+        return {"platform": "wx_search", "result": result[0] if result else []}
+
+    @classmethod
+    def ab_6(cls):
+        """
+        article_keys + baidu
+        :return:
+        """
+        result = hksp_search(key=",".join(cls.article_keys))
+        return {"platform": "baidu_search", "result": result[0] if result else []}
+
+
+async def search_videos(kimi_info, trace_id, gh_id):
     """
     search and send msg to ETL
-    :param gh_id:
-    :param video_path:
-    :param title:
+    :param kimi_info:
+    :param gh_id: 通过账号 id 来控制实验策略
     :param trace_id:
     :return:
     """
-    recall_obj = recall_search_video(video_path, title, trace_id)
+    kimi_info["trace_id"] = trace_id
+    SearchAB = SearchABTest(info=kimi_info, gh_id=gh_id)
+    if ab_test_config.get(gh_id):
+        test_id = ab_test_config[gh_id]
+        if test_id == 0:
+            recall_obj = SearchAB.ab_0()
+        elif test_id == 1:
+            recall_obj = SearchAB.ab_1()
+        elif test_id == 2:
+            recall_obj = SearchAB.ab_2()
+        # elif test_id == 3:
+        #     recall_obj = SearchAB.ab_3()
+        # elif test_id == 4:
+        #     recall_obj = SearchAB.ab_4()
+        # elif test_id == 5:
+        #     recall_obj = SearchAB.ab_5()
+        # elif test_id == 6:
+        #     recall_obj = SearchAB.ab_6()
+        else:
+            recall_obj = {}
+    else:
+        recall_obj = SearchAB.ab_0()
     if recall_obj:
         platform = recall_obj["platform"]
         recall_video = recall_obj["result"]
@@ -88,17 +195,13 @@ def search_videos(video_path, title, trace_id, gh_id):
                 code="7002",
                 info="视频搜索成功, 搜索平台为--{}".format(platform),
                 trace_id=trace_id,
-                data=recall_video
+                data=recall_video,
             )
-            video_mq_sender(
+            await video_mq_sender(
                 video_obj=recall_video,
                 user=gh_id_dict.get(gh_id),
                 trace_id=trace_id,
-                platform=platform
+                platform=platform,
             )
     else:
-        logging(
-            code="7003",
-            info="视频搜索失败",
-            trace_id=trace_id
-        )
+        logging(code="7003", info="视频搜索失败", trace_id=trace_id)

+ 15 - 12
applications/search/hksp_search.py

@@ -2,10 +2,13 @@
 @author: luojunhui
 好看视频搜索爬虫
 """
-import requests
-import urllib.parse
+import json
 import time
+import base64
 import hashlib
+import requests
+import urllib.parse
+from uuid import uuid4
 from fake_useragent import FakeUserAgent
 
 from applications.functions.common import MySQLServer
@@ -20,16 +23,13 @@ def get_video_detail(video_id):
     url = "https://haokan.baidu.com/v"
     params = {
         'vid': video_id,
-        '_format': 'json',
-        # 'hk_nonce': 'f47386e95fe657182aa3c1826d9a6b85',
-        # 'hk_timestamp': '1715225386',
-        # 'hk_sign': '4b219f5e3971e42b3e23dc2a209fc9d9',
-        # 'hk_token': 'Dg8DdAVwdwNzDHcFcXF+D3gHBQA'
+        '_format': 'json'
     }
 
+    base_64_string = base64.b64encode(str(uuid4()).encode()).decode()
     headers = {
         'Accept': '*/*',
-        'cookie': "BIDUPSID='",
+        'cookie': "BIDUPSID={}".format(base_64_string),
         'Accept-Language': 'en,zh;q=0.9,zh-CN;q=0.8',
         'Cache-Control': 'no-cache',
         'Connection': 'keep-alive',
@@ -79,12 +79,13 @@ def hksp_search(key):
         'timestamp': timestamp_milliseconds
     }
     # 定义请求头
+    base_64_string = base64.b64encode(str(uuid4()).encode()).decode()
     headers = {
         'authority': 'haokan.baidu.com',
         'accept': '*/*',
         'accept-language': 'zh,en;q=0.9,zh-CN;q=0.8',
-        'cookie': "BIDUPSID=",
-        'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
+        'cookie': "BIDUPSID={}".format(base_64_string),
+        'user-agent': FakeUserAgent().chrome,
         'x-requested-with': 'xmlhttprequest',
     }
     # 发送GET请求
@@ -95,8 +96,10 @@ def hksp_search(key):
         for data in data_list:
             try:
                 video_id = data['vid']
-                res = get_video_detail(video_id)
-                if sensitive_flag(sensitive_words, ['title']) and int(res['duration']) <= 300:
+                title = data['title']
+                duration = int(data['duration'].split(":")[0]) * 60 + int(data['duration'].split(":")[1])
+                if sensitive_flag(sensitive_words, title) and int(duration) <= 300:
+                    res = get_video_detail(video_id)
                     L.append(res)
                 else:
                     continue

+ 39 - 1
applications/static/config.py

@@ -324,4 +324,42 @@ sensitive_words = [
     "省委书记",
     "国防部长",
     "外交部长"
-]
+]
+
+# 实验配置文件
+buy_accounts = [
+    "gh_084a485e859a",
+    "gh_e24da99dc899",
+    "gh_e0eb490115f5",
+    "gh_183d80deffb8",
+    "gh_5ff48e9fb9ef",
+    "gh_9f8dc5b0c74e",
+    "gh_6d9f36e3a7be"
+]
+
+dyy = [
+    "gh_9877c8541764",
+    "gh_6d205db62f04",
+    "gh_c69776baf2cd",
+    "gh_7e5818b2dd83",
+    "gh_89ef4798d3ea",
+    "gh_a2901d34f75b",
+    "gh_b15de7c99912"
+]
+
+ab_test_config = {
+    "gh_084a485e859a": 0,
+    "gh_e24da99dc899": 1,
+    "gh_e0eb490115f5": 2,
+    "gh_183d80deffb8": 0,
+    "gh_5ff48e9fb9ef": 1,
+    "gh_9f8dc5b0c74e": 2,
+    "gh_6d9f36e3a7be": 0,
+    "gh_9877c8541764": 1,
+    "gh_6d205db62f04": 2,
+    "gh_c69776baf2cd": 0,
+    "gh_7e5818b2dd83": 1,
+    "gh_89ef4798d3ea": 2,
+    "gh_a2901d34f75b": 1,
+    "gh_b15de7c99912": 2
+}

+ 74 - 0
dev/notes

@@ -0,0 +1,74 @@
+gh_2b8c6aa035ae	魔法美学馆
+gh_9e559b3b94ca	票圈大事件
+gh_084a485e859a	生活情感叁读
+gh_1ee2e1b39ccf	票圈最新消息
+gh_4c058673c07e	探马再探再报
+gh_de9f9ebc976b	赵师傅厨房秘笈
+gh_058e41145a0c	小琪故事馆
+gh_7b4a5f86d68c	八卦不断线
+gh_538f78f9d3aa	张阿姨爱美食
+gh_fe6ef3a65a48	心灵智慧馆
+gh_484de412b0ef	充电宝宝
+gh_4568b5a7e2fe	王小八娱乐
+gh_adca24a8f429	兔子爱蹬鹰
+gh_e24da99dc899	缘来养心厅
+gh_e0eb490115f5	心灵情感驿站
+gh_d2cc901deca7	票圈极速版
+gh_45beb952dc74	票圈乐活
+gh_b8baac4296cb	票圈原创视频精选
+gh_26a307578776	票圈美文速递
+gh_183d80deffb8	生活良读
+gh_9cf3b7ff486b	票圈热门
+gh_b32125c73861	票圈奇闻
+gh_5ff48e9fb9ef	祝福养心厅
+gh_9161517e5676	宝娃趣味游戏
+gh_9f8dc5b0c74e	音药金曲厅
+gh_3ac6d7208961	异次元玩家
+gh_6d9f36e3a7be	音药养心馆
+gh_ac43e43b253b	小阳看天下
+gh_d5f935d0d1f2	半仙社评
+gh_171cec079b2a	观察家王小姐
+gh_be8c29139989	心灵书局
+gh_c91b42649690	心理调色盘
+gh_93e00e187787	小惠爱厨房
+gh_744cb16f6e16	美味在人间
+gh_9877c8541764	退休老年圈
+gh_0c89e11f8bf3	幸福启示
+gh_6d205db62f04	指尖奇文
+gh_7bca1c99aea0	慧行社
+gh_c69776baf2cd	老友欢聚地
+gh_234ef02cdee5	姜子丫
+gh_56a6765df869	婉央女子
+gh_e2576b7181c6	六八评价
+gh_40a0ad154478	所见畅谈
+gh_34318194fd0e	老新说事
+gh_901b0d722749	壹姐八卦
+gh_3c7d38636846	圈内侃八卦
+gh_01f8afd03366	奇闻有约
+gh_a307072c04b9	生活智慧正能量
+gh_424c8eeabced	爱姨生活妙招
+gh_671f460c856c	日日有妙招
+gh_b9b99173ff8a	实在妙招
+gh_e9d819f9e147	热血军中事
+gh_da76772d8d15	娱乐在前
+gh_bd57b6978e06	八点说故事
+gh_6b7c2a257263	幸福晚年知音
+gh_bfe5b705324a	奇趣百味生活
+gh_29074b51f2b7	老来生活家
+gh_0921c03402cd	俏生活秘籍
+gh_7e5818b2dd83	便捷生活好方法
+gh_89ef4798d3ea	生活百态观
+gh_bff0bcb0694a	喜乐生活派
+gh_a2901d34f75b	畅聊奇闻
+gh_b15de7c99912	人生百事观
+gh_73be0287bb94	军莫愁
+gh_56ca3dae948c	老友闲谈
+gh_a182cfc94dad	冀中轶事
+gh_a6351b447819	冀中精彩生活
+gh_3df10391639c	冀中生活谈
+gh_e75dbdc73d80	票圈正能量
+gh_5e543853d8f0	票圈精彩
+gh_f4594783f5b8	俏丽音乐相册
+gh_3845af6945d0	新品女装特价
+gh_b3ffc1ca3a04	票圈内容精选
+gh_efaf7da157f5	票圈热议

File diff suppressed because it is too large
+ 2 - 6
dev/test.py


File diff suppressed because it is too large
+ 13 - 0
dev/title_to_search.py


Some files were not shown because too many files changed in this diff