Forráskód Böngészése

2024-0511-上线 kimi 修改标题能力

罗俊辉 1 éve
szülő
commit
0bfd15c1cf

+ 43 - 5
applications/functions/common.py

@@ -147,6 +147,7 @@ class MySQLServer(object):
     """
     MySql 服务
     """
+
     @classmethod
     def select_download_videos(cls, trace_id):
         """
@@ -154,7 +155,7 @@ class MySQLServer(object):
         :param trace_id:
         :return:
         """
-        sql = "select video_id, video_title from crawler_video where out_user_id = '{}' limit 5;".format(trace_id)
+        sql = "select video_id from crawler_video where out_user_id = '{}' and video_title = '{}';".format(trace_id, trace_id)
         connection = pymysql.connect(
             host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
             port=3306,  # 端口号
@@ -256,6 +257,7 @@ class KimiServer(object):
     """
     Kimi Server
     """
+
     @classmethod
     def ask_kimi(cls, question):
         """
@@ -313,7 +315,7 @@ class KimiServer(object):
             if not title:
                 result = {}
             else:
-                result =  cls.ask_kimi(title)
+                result = cls.ask_kimi(title)
             logging(
                 code="2001",
                 info="kimi-result",
@@ -324,6 +326,42 @@ class KimiServer(object):
             with open(save_path, "w", encoding="utf-8") as f:
                 f.write(json.dumps(result, ensure_ascii=False))
 
-
-
-
+    @classmethod
+    def kimi_title(cls, ori_title):
+        """
+        prompt + kimi + ori_title generate new title
+        :param ori_title:
+        :return:
+        """
+        single_title_prompt = """
+        请将以上标题改写成适合小程序点击和传播的小程序标题,小程序标题的写作规范如下,请学习后进行小程序标题的编写。直接输出最终的小程序标题
+        小程序标题写作规范:
+        1.要点前置:将最重要的信息放在标题的最前面,以快速吸引读者的注意力。例如,“5月一辈子同学,三辈子亲,送给我的老同学,听哭无数人!”中的“5月”和“一辈子同学,三辈子亲”都是重要的信息点。
+        2.激发情绪:使用能够触动人心的语言,激发读者的情感共鸣。如“只剩两人同学聚会,看后感动落泪。”使用“感动落泪”激发读者的同情和怀旧情绪。
+        3.使用数字和特殊符号:数字可以提供具体性,而特殊符号如“🔴”、“😄”、“🔥”等可以吸引视觉注意力,增加点击率。
+        4.悬念和好奇心:创建悬念或提出问题,激发读者的好奇心。例如,“太神奇了!长江水位下降,重庆出现惊奇一幕!”中的“惊奇一幕”就是一个悬念。
+        5.名人效应:如果内容与知名人士相关,提及他们的名字可以增加标题的吸引力。
+        6.社会价值观:触及读者的文化和社会价值观,如家庭、友情、国家荣誉等。
+        7.标点符号的运用:使用感叹号、问号等标点来增强语气和情感表达。
+        8.直接的语言:使用直白、口语化的语言,易于理解,如“狗屁股,笑死我了!”。
+        9.热点人物或事件:提及当前的热点人物或事件,利用热点效应吸引读者。
+        10.字数适中:保持标题在10-20个字之间,既不过长也不过短,确保信息的完整性和吸引力。
+        11.适当的紧迫感:使用“最新”、“首次”、“紧急”等词汇,创造一种紧迫感,促使读者立即行动。
+        12.情感或价值诉求:使用如“感动”、“泪目”、“经典”等词汇,直接与读者的情感或价值观产生共鸣。
+        避免误导:确保标题准确反映内容,避免夸大或误导读者。
+        """
+        client = OpenAI(
+            api_key='sk-tz1VaKqksTzk0F8HxlU4YVGwj7oa1g0c0puGNUZrdn9MDtzm',
+            base_url="https://api.moonshot.cn/v1"
+        )
+        chat_completion = client.chat.completions.create(
+            messages=[
+                {
+                    "role": "user",
+                    "content": ori_title + "\n" + single_title_prompt,
+                }
+            ],
+            model="moonshot-v1-8k",
+        )
+        response = chat_completion.choices[0].message.content
+        return response

+ 4 - 2
applications/routes.py

@@ -57,7 +57,8 @@ async def search_videos_from_the_web():
             )
         else:
             KimiServer().ask_kimi_and_save_to_local((title, trace_id, title_p))
-        await asyncio.sleep(2)
+        await asyncio.sleep(1)
+        kimi_title = KimiServer().kimi_title(title)
         search_videos(
             title=title,
             video_path=title_p,
@@ -66,7 +67,8 @@ async def search_videos_from_the_web():
         )
         res = {
             "trace_id": trace_id,
-            "code": 0
+            "code": 0,
+            "kimi_title": kimi_title
         }
     except Exception as e:
         res = {

+ 10 - 7
applications/schedule/process_schedule.py

@@ -2,10 +2,13 @@
 @author: luojunhui
 对请求进行操作
 """
+import json
 import time
+import os
 
 from applications.match_alg import best_choice
-from applications.functions.common import *
+from applications.functions.common import Functions
+from applications.functions.log import logging
 
 
 class ProcessParams(object):
@@ -38,6 +41,7 @@ class ProcessParams(object):
         """执行代码"""
         params = self.get_params(data)
         title = params['title']
+        kimi_title = params['kimi_title']
         # account_name = params['accountName']
         # ghId = params['ghId']
         video_list = params['videoList']
@@ -46,14 +50,12 @@ class ProcessParams(object):
 
         with open(title_p, encoding="utf-8") as f:
             params_obj = json.loads(f.read())
-        s = time.time()
+
         best_video_id = await best_choice(
             params_obj=params_obj,
             trace_id=self.trace_id,
             search_videos=video_list
         )
-        e = time.time()
-        print(e - s)
         logging(
             code="1002",
             info="best video_id --{}".format(best_video_id),
@@ -63,16 +65,17 @@ class ProcessParams(object):
 
         if best_video_id:
             print(best_video_id)
-            response = request_for_info(best_video_id)
+            response = Functions().request_for_info(best_video_id)
             productionCover = response['data'][0]['shareImgPath']
-            productionName = response["data"][0]['title']
+            # productionName = response["data"][0]['title']
+            productionName = kimi_title
             videoUrl = response['data'][0]['videoPath']
             user_id = response['data'][0]['user']['uid']
             programAvatar = "/static/logo.png"
             programId = "wx0b7d95eb293b783b"
             programName = "天天美好祝福好生活"
             source = "Web"
-            root_share_id, productionPath = create_gzh_path(video_id=best_video_id, shared_uid=user_id)
+            root_share_id, productionPath = Functions().create_gzh_path(video_id=best_video_id, shared_uid=user_id)
             logging(
                 code="1002",
                 info="root_share_id --{}, productionPath -- {}".format(root_share_id, productionPath),

+ 46 - 45
applications/schedule/search_schedule.py

@@ -10,7 +10,6 @@ from applications.functions.mq import MQ
 from applications.functions.log import logging
 from applications.static.config import gh_id_dict
 from applications.functions.item import VideoItem
-from applications.functions.common import Functions
 
 
 def wx_search(keys):
@@ -32,11 +31,10 @@ def wx_search(keys):
     return response.json()
 
 
-def process_weixin_video_obj(video_obj, user, trace_id, title):
+def process_weixin_video_obj(video_obj, user, trace_id):
     """
     异步处理微信 video_obj
     公众号和站内账号一一对应
-    :param title:
     :param trace_id:
     :param user:
     :param video_obj:
@@ -49,7 +47,7 @@ def process_weixin_video_obj(video_obj, user, trace_id, title):
     item.add_video_info("user_id", user["uid"])
     item.add_video_info("user_name", user["nick_name"])
     item.add_video_info("video_id", video_obj['hashDocID'])
-    item.add_video_info("video_title", title)
+    item.add_video_info("video_title", trace_id)
     item.add_video_info("publish_time_stamp", int(publish_time_stamp))
     item.add_video_info("video_url", video_obj["videoUrl"])
     item.add_video_info("cover_url", video_obj["image"])
@@ -88,20 +86,22 @@ def return_video(video_path, title, trace_id):
             )
         else:
             obj_list = title_result['data']['data']
-            for obj in obj_list:
-                try:
-                    title = obj['items'][0]['title'].replace('<em class=\"highlight\">', '').replace('</em>',
-                                                                                                 '').replace("#",
-                                                                                                        "")
-                    if Functions().sensitive_flag(title):
-                        return obj
-                    else:
-                        continue
-                except Exception as e:
-                    print(e)
-                    continue
+            if obj_list:
+                return obj_list[0]
+            # for obj in obj_list:
+            #     try:
+            #         title = obj['items'][0]['title'].replace('<em class=\"highlight\">', '').replace('</em>',
+            #                                                                                      '').replace("#",
+            #                                                                                             "")
+            #         if Functions().sensitive_flag(title):
+            #             return obj
+            #         else:
+            #             continue
+            #     except Exception as e:
+            #         print(e)
+            #         continue
 
-        # search_keys
+        # # search_keys
         search_keys_result = wx_search(keys=my_obj['search_keys'][0])
         if search_keys_result['msg'] == '未知错误':
             logging(
@@ -111,18 +111,20 @@ def return_video(video_path, title, trace_id):
             )
         else:
             obj_list = search_keys_result['data']['data']
-            for obj in obj_list:
-                try:
-                    title = obj['items'][0]['title'].replace('<em class=\"highlight\">', '').replace('</em>',
-                                                                                                     '').replace("#",
-                                                                                                                 "")
-                    if Functions().sensitive_flag(title):
-                        return obj
-                    else:
-                        continue
-                except Exception as e:
-                    print(e)
-                    continue
+            if obj_list:
+                return obj_list[0]
+        #     for obj in obj_list:
+        #         try:
+        #             title = obj['items'][0]['title'].replace('<em class=\"highlight\">', '').replace('</em>',
+        #                                                                                              '').replace("#",
+        #                                                                                                          "")
+        #             if Functions().sensitive_flag(title):
+        #                 return obj
+        #             else:
+        #                 continue
+        #         except Exception as e:
+        #             print(e)
+        #             continue
 
         # theme
         theme_result = wx_search(keys=my_obj['theme'])
@@ -134,18 +136,20 @@ def return_video(video_path, title, trace_id):
             )
         else:
             obj_list = theme_result['data']['data']
-            for obj in obj_list:
-                try:
-                    title = obj['items'][0]['title'].replace('<em class=\"highlight\">', '').replace('</em>',
-                                                                                                     '').replace("#",
-                                                                                                                 "")
-                    if Functions().sensitive_flag(title):
-                        return obj
-                    else:
-                        continue
-                except Exception as e:
-                    print(e)
-                    continue
+            if obj_list:
+                return obj_list[0]
+            # for obj in obj_list:
+            #     try:
+            #         title = obj['items'][0]['title'].replace('<em class=\"highlight\">', '').replace('</em>',
+            #                                                                                          '').replace("#",
+            #                                                                                                      "")
+            #         if Functions().sensitive_flag(title):
+            #             return obj
+            #         else:
+            #             continue
+            #     except Exception as e:
+            #         print(e)
+            #         continue
         return None
     else:
         logging(
@@ -173,13 +177,10 @@ def search_videos(video_path, title, trace_id, gh_id):
             trace_id=trace_id,
             data=video_obj
         )
-        title = video_obj['items'][0]['title'].replace('<em class=\"highlight\">', '').replace('</em>', '').replace("#",
-                                                                                                                    "")
         process_weixin_video_obj(
             video_obj=video_obj['items'][0],
             user=gh_id_dict.get(gh_id),
-            trace_id=trace_id,
-            title=title
+            trace_id=trace_id
         )
     else:
         logging(

+ 0 - 19
applications/search/xigua_search.py

@@ -129,25 +129,6 @@ def extract_info_by_re(text):
     }
 
 
-def byte_dance_cookie(item_id):
-    """
-    获取西瓜视频的 cookie
-    :param item_id:
-    """
-    sess = requests.Session()
-    sess.headers.update({
-        'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
-        'referer': 'https://www.ixigua.com/home/{}/'.format(item_id),
-    })
-
-    # 获取 cookies
-    sess.get('https://i.snssdk.com/slardar/sdk.js?bid=xigua_video_web_pc')
-    data = '{"region":"cn","aid":1768,"needFid":false,"service":"www.ixigua.com","migrate_info":{"ticket":"","source":"node"},"cbUrlProtocol":"https","union":true}'
-    r = sess.post('https://ttwid.bytedance.com/ttwid/union/register/', data=data)
-    # print(r.text)
-    return r.cookies.values()[0]
-
-
 def get_video_info(item_id):
     """
     获取视频信息

+ 1 - 1
applications/static/config.py

@@ -299,4 +299,4 @@ gh_id_dict = {
         "uid": 69637480,
         "nick_name": "风间"
     }
-}
+}