zhangyong 4 月之前
父節點
當前提交
e3794cbeaa
共有 4 個文件被更改,包括 229 次插入48 次删除
  1. 53 45
      common/feishu_form.py
  2. 150 0
      common/gpt4o_mini_help.py
  3. 6 0
      common/sql_help.py
  4. 20 3
      video_rewriting/video_processor.py

+ 53 - 45
common/feishu_form.py

@@ -101,16 +101,17 @@ class Material():
                 tags = row[3]
                 piaoquan_id = row[4]
                 number = row[5]
-                video_share = row[6]
-                video_ending = row[7]
-                voice = row[8]
-                crop_tool = row[9]
-                gg_duration = row[10]
-                title = row[11]
+                limit_number = row[6]
+                video_share = row[7]
+                video_ending = row[8]
+                voice = row[9]
+                crop_tool = row[10]
+                gg_duration = row[11]
+                title = row[12]
                 if channel_url == None or channel_url == "" or len(channel_url) == 0:
                     continue
-                first_category = row[13]  # 一级品类
-                secondary_category = row[14]  # 二级品类
+                first_category = row[14]  # 一级品类
+                secondary_category = row[15]  # 二级品类
                 def count_items(item, separator):
                     if item and item not in {'None', ''}:
                         return len(item.split(separator))
@@ -121,10 +122,10 @@ class Material():
                 values = [channel_id, video_id_total, piaoquan_id, video_share, video_ending_total, crop_tool, gg_duration, title_total, first_category]
                 filtered_values = [str(value) for value in values if value is not None and value != "None"]
                 task_mark = "_".join(map(str, filtered_values))
-                keyword_sort = row[15]  # 排序条件
-                keyword_time = row[16]  # 发布时间
-                keyword_duration = row[17]  # 视频时长
-                keyword_name = row[18]  # 负责人
+                keyword_sort = row[16]  # 排序条件
+                keyword_time = row[17]  # 发布时间
+                keyword_duration = row[18]  # 视频时长
+                keyword_name = row[19]  # 负责人
                 keyword_sort_list = keyword_sort.split(',')
                 keyword_duration_list = keyword_duration.split(',')
                 keyword_time_list = keyword_time.split(',')
@@ -151,7 +152,8 @@ class Material():
                             "secondary_category": secondary_category,  # 二级品类
                             "combo": combo,  # 搜索条件
                             "keyword_name": keyword_name,  # 品类负责人
-                            "tags": tags
+                            "tags": tags,
+                            "limit_number":limit_number
 
                         }
                         processed_list.append(json.dumps(number_dict, ensure_ascii=False))
@@ -175,20 +177,21 @@ class Material():
                 tags = row[3]
                 piaoquan_id = row[4]
                 number = row[5]
-                video_share = row[6]
-                video_ending = row[7]
-                voice = row[8]
-                crop_tool = row[9]
-                gg_duration = row[10]
-                title = row[11]
+                limit_number = row[6]
+                video_share = row[7]
+                video_ending = row[8]
+                voice = row[9]
+                crop_tool = row[10]
+                gg_duration = row[11]
+                title = row[12]
                 if channel_url == None or channel_url == "" or len( channel_url ) == 0:
                     continue
                 try:
-                    ls_number = int( row[12] )
+                    ls_number = int( row[13] )
                 except:
                     ls_number = None
-                first_category = row[13]
-                name = row[14]
+                first_category = row[14]
+                name = row[15]
 
                 def count_items(item, separator):
                     if item and item not in {'None', ''}:
@@ -222,7 +225,8 @@ class Material():
                             "voice": voice,
                             "first_category": first_category,  # 一级品类
                             "keyword_name":name,
-                            "tags": tags
+                            "tags": tags,
+                            "limit_number":limit_number
                         }
                         processed_list.append( json.dumps( number_dict, ensure_ascii=False ) )
                         if channel_id == "抖音" or channel_id == "快手" or channel_id == "视频号":
@@ -234,14 +238,14 @@ class Material():
                                 if channel_id == "视频号":
                                     new_channel_id = "视频号历史"
 
-                                values1 = [new_channel_id, video_id_total, piaoquan_id, video_share, video_ending_total,
-                                           crop_tool,
-                                           gg_duration, title_total]
-                                filtered_values1 = [str( value ) for value in values1 if
-                                                    value is not None and value != "None"]
-                                task_mark1 = "_".join( map( str, filtered_values1 ) )
+                                # values1 = [new_channel_id, video_id_total, piaoquan_id, video_share, video_ending_total,
+                                #            crop_tool,
+                                #            gg_duration, title_total]
+                                # filtered_values1 = [str( value ) for value in values1 if
+                                #                     value is not None and value != "None"]
+                                # task_mark1 = "_".join( map( str, filtered_values1 ) )
                                 number_dict = {
-                                    "task_mark": task_mark1,
+                                    "task_mark": task_mark,
                                     "channel_id": new_channel_id,
                                     "channel_url": user,
                                     "piaoquan_id": piaoquan_id,
@@ -254,7 +258,8 @@ class Material():
                                     "voice": voice,
                                     "first_category": first_category,  # 一级品类
                                     "keyword_name": name,
-                                    "tags": tags
+                                    "tags": tags,
+                                    "limit_number":limit_number
                                 }
                                 processed_list.append( json.dumps( number_dict, ensure_ascii=False ) )
                 else:
@@ -280,16 +285,17 @@ class Material():
                 tags = row[3]
                 piaoquan_id = row[4]
                 number = row[5]
-                video_share = row[6]
-                video_ending = row[7]
-                voice = row[8]
-                crop_tool = row[9]
-                gg_duration = row[10]
-                title = row[11]
+                limit_number = row[6]
+                video_share = row[7]
+                video_ending = row[8]
+                voice = row[9]
+                crop_tool = row[10]
+                gg_duration = row[11]
+                title = row[12]
                 if channel_url == None or channel_url == "" or len(channel_url) == 0:
                     continue
                 try:
-                    ls_number = int(row[12])
+                    ls_number = int(row[13])
                 except:
                     ls_number = None
                 def count_items(item, separator):
@@ -320,7 +326,8 @@ class Material():
                             "crop_total": crop_tool,
                             "gg_duration_total": gg_duration,
                             "voice": voice,
-                            "tags":tags
+                            "tags":tags,
+                            "limit_number":limit_number
                         }
                         processed_list.append(json.dumps(number_dict, ensure_ascii=False))
                         if channel_id == "抖音" or channel_id == "快手" or channel_id == "视频号":
@@ -332,12 +339,12 @@ class Material():
                                 if channel_id == "视频号":
                                     new_channel_id = "视频号历史"
 
-                                values1 = [new_channel_id, video_id_total, piaoquan_id, video_share, video_ending_total, crop_tool,
-                                          gg_duration, title_total]
-                                filtered_values1 = [str(value) for value in values1 if value is not None and value != "None"]
-                                task_mark1 = "_".join(map(str, filtered_values1))
+                                # values1 = [new_channel_id, video_id_total, piaoquan_id, video_share, video_ending_total, crop_tool,
+                                #           gg_duration, title_total]
+                                # filtered_values1 = [str(value) for value in values1 if value is not None and value != "None"]
+                                # task_mark1 = "_".join(map(str, filtered_values1))
                                 number_dict = {
-                                    "task_mark": task_mark1,
+                                    "task_mark": task_mark,
                                     "channel_id": new_channel_id,
                                     "channel_url": user,
                                     "piaoquan_id": piaoquan_id,
@@ -348,7 +355,8 @@ class Material():
                                     "crop_total": crop_tool,
                                     "gg_duration_total": gg_duration,
                                     "voice": voice,
-                                    "tags": tags
+                                    "tags": tags,
+                                    "limit_number":limit_number
                                 }
                                 processed_list.append(json.dumps(number_dict, ensure_ascii=False))
                 else:

+ 150 - 0
common/gpt4o_mini_help.py

@@ -0,0 +1,150 @@
+import json
+
+import requests
+class GPT4oMini:
+
+    @classmethod
+    def get_ai_mini_pw(cls, title):
+        url = "http://aigc-api.cybertogether.net//aigc/dev/test/gpt"
+        payload = json.dumps({
+            "imageList": [],
+            "model": "gpt-4o-mini-2024-07-18",
+            "prompt": (
+                "请针对微信平台视频类小程序场景,面向人群是中国中老年人,在单聊、群聊场景,"
+                "根据以下示例生成结尾引导分享的脚本,引导用户发生更多的分享行为。\n"
+                "示例case1:\n"
+                "标题: 几十年前的老歌,现在很少听到了!\n"
+                "返回分享引导脚本: 这个视频真是难得,太美太好听了,看完真是回忆满满,"
+                "让人怀念那个时代,多么淳朴多么美好,今天大家都在群里转发这个视频,"
+                "看了都说,那时候虽然手里不宽裕,但心里都满怀希望,那股子拼劲,真是让人热血沸腾,"
+                "老朋友你还记得吗?如果你也跟我一样深深的怀念,赶紧把这个视频转发到你所有群里去吧,"
+                "让大家伙都看看吧,多多转发,感恩有你!\n\n"
+                "示例case2:\n"
+                "标题: 🔺令人尊敬的老人❗❗这才是我们应该追的明星❗\n"
+                "返回分享引导脚本: 这个视频发出来5天了,转发的人却寥寥无几,"
+                "试想一次明星出轨就全民皆知,这样中国真正的正能量却无人关注,真是太讽刺了!"
+                "如果你看到了这个视频,请分享到你加入的每个群,向杨老这种无私奉献的大爱致敬!"
+                "千万别让那些低调做好事的人心寒!转发传递下去,让世界都看到,中华民族的正能量!\n\n"
+                f"请分析该标题,标题为:{title},返回结尾分享引导脚本。"
+            ),
+            "responseFormat": {
+                "type": "json_schema",
+                "json_schema": {
+                    "strict": True,
+                    "name": "share_script_result",
+                    "schema": {
+                        "type": "object",
+                        "properties": {
+                            "分享引导脚本": {
+                                "type": "string",
+                                "description": "生成的结尾分享引导脚本"
+                            }
+                        },
+                        "required": ["分享引导脚本"],
+                        "additionalProperties": False
+                    }
+                }
+            }
+        })
+        headers = {'Content-Type': 'application/json'}
+        try:
+            response = requests.post(url, headers=headers, data=payload)
+            response_data = response.json()
+
+            data = json.loads(response_data.get('data', '{}'))
+            pw  = data["分享引导脚本"]
+            return pw
+        except Exception as e:
+            pw_srt_text = '老友们,这个视频说的太好了,请你在看完之后,不要私藏,分享给身边的亲人朋友,转发传递这份美好,这不仅是分享一个好视频,更是在传递一份深厚的祝福和好运,愿每个看到这消息的人,都能收获健康长寿和平安,希望您在看完后能够回传给我,并且把这条视频再传给3个群和3个好友,祝愿你我,健健康康,平平安安,因为3个数字代表着健康和平安,这么好的视频,千万不要在你的手里终止了,分享给群友们,他们看到以后一定会感谢你的,感谢分享,感谢传递'
+            return pw_srt_text
+
+    @classmethod
+    def get_ai_mini_title(cls, title):
+        url = "http://aigc-api.cybertogether.net//aigc/dev/test/gpt"
+        payload = json.dumps({
+            "imageList": [],
+            "model": "gpt-4o-mini-2024-07-18",
+            "prompt": (
+                "CONTEXT"
+                "你是一名短视频标题优化专家,任务是为短视频生成吸引力高且符合规范的标题。"
+                "OBJECTIVE"
+                "基于优秀标题要求及示例,生成新的、吸引用户注意的标题。"
+                "TIME"
+                "在标题生成请求提出后的1分钟内生成。"
+                "ACTOR"
+                "短视频标题优化专家"
+                "RESOURCES"
+                "- 原标题列表"
+                "- 优秀标题示例"
+                "- 标题生成规范和约束条件"
+                "RESPONSE"
+                "为每个原标题生成符合规范的新标题。"
+                "生成示例标题案例及要求"
+                "1. 标题开头包含醒目emoji🔴,整体字符长度必须控制在10-25个字"
+                "2. 识别需要修改的标题中可能感兴趣的人群,并在标题中表现出来。人群示例如:群友们、退休人员、50~70后等"
+                "3. 标题中可增加一些对内容的观点/态度,用人格化的方式表达,示例:太香了、老外至今难以相信"
+                "4. 标题结尾可以根据标题内容增加一些引导语,格式参考示例但不必局限于示例。示例:你们见过吗、你听对不对、说的太好了、请听、太神奇了"
+                "5. 对于包含#话题和@人的标题,若标题中包含其他元素,则去除#话题和@人元素后利用其他元素生成标题。若标题去除#话题和@人外无其他元素,则仅利用#话题的内容生成标题"
+                "Goodcase示例:"
+                "⭕老外至今难以相信,中国人竟能把大桥建到天上,穿入云中"
+                "🔴未来酒店体验,群友们,请看!"
+                "⭕六七十年代的《忠字舞》,你们见过吗?"
+                "🔴哈哈哈!大哥说的太好了!太真实了"
+                "🔴今天,请记住那1700个集体赴死的年轻人,平均23岁!"
+                "🔴这才叫老同学聚会,到了这个年纪,还能聚在一起真不容易!"
+                "🔴百善孝为先,心凉了捂不热了"
+                "🔴养儿不如养狗,一件真实的事!"
+                "🔴让人受益的一段话,写得真好!"
+                "🔴“处暑三劝”!发给最好的朋友劝一劝!"
+                "🔴世间公道自在人心,善恶有报,人生智慧建议收藏!"
+                "🔴坐着电梯登上山顶!这怕是只有中国人敢想敢做!"
+                "🔴人老了!就应该这样去活!"
+                "生成约束"
+                "1. 标题不能包含#话题标签和@人名。"
+                "2. 不能编造:不能加入原标题没有的实体信息,如原标题没有养老金,不能在生成的标题中出现养老金。"
+                "3. 标题内不能使用强引导分享点击的词句,如:快来看看、大家都听一听、值得一看、都看看吧、你也来看看吧、大家注意、都听听等。"
+                "4. 不能使用无实质信息和强烈诱导点击、紧急、夸张、震惊的描述,避免使用“震惊国人”、“速看”、“太震撼了”等类似描述。"
+                "5. 标题需要简洁、清晰,不要使用网络流行语,如:太燃了、佛系、躺平、内卷等。"
+                "badcase示例:"
+                "🌸绝对不能错过,快打开看看,越快越好"
+                        "所有老年人一定要看"
+                        "天大的好消息,5月开始实施❗"
+                        "就在刚刚,中国突然传出重磅消息,所有人都不敢相信! 🚩"
+                        "丧尽天良!为什么生病的人越来越多,原来吃的是这些 🎈"
+                        "今年的端午节太特殊,一辈子难遇一次!一定要看!错过别后悔"
+                        "好消息来了,千万别划走!"
+                        "紧急!已爆发,错过就晚了😱"
+            f"请分析该标题,标题为:{title},返回新的标题。"
+            ),
+            "responseFormat": {
+                "type": "json_schema",
+                "json_schema": {
+                    "strict": True,
+                    "name": "share_script_result",
+                    "schema": {
+                        "type": "object",
+                        "properties": {
+                            "新标题": {
+                                "type": "string",
+                                "description": "生成新的标题"
+                            }
+                        },
+                        "required": ["新标题"],
+                        "additionalProperties": False
+                    }
+                }
+            }
+        })
+        headers = {'Content-Type': 'application/json'}
+        try:
+            response = requests.post(url, headers=headers, data=payload)
+            response_data = response.json()
+
+            data = json.loads(response_data.get('data', '{}'))
+            new_title = data["新标题"]
+            return new_title
+        except Exception as e:
+            return "这个视频,分享给我的老友,祝愿您能幸福安康"
+
+if __name__ == '__main__':
+    GPT4oMini.get_ai_mini_title("🔴这位美女说的太好了!这就是我们的大中国")

+ 6 - 0
common/sql_help.py

@@ -102,6 +102,12 @@ class sqlCollect():
         count = MysqlHelper.get_values(sql, (str(name)))
         return count
 
+    @classmethod
+    def get_mark_count(cls, mark):
+        sql = """SELECT count(0) FROM machine_making_data where task_mark = %s  and  DATE(data_time) = CURRENT_DATE AND HOUR(data_time) <= HOUR(NOW());"""
+        count = MysqlHelper.get_values(sql, (str(mark)))
+        return count
+
     @classmethod
     def get_bygj_all_count(cls):
         sql = """SELECT count(0) FROM machine_making_data where task_mark = %s  and  DATE(data_time) = CURRENT_DATE AND HOUR(data_time) <= HOUR(NOW());"""

+ 20 - 3
video_rewriting/video_processor.py

@@ -7,6 +7,7 @@ import shutil
 import time
 from datetime import datetime
 
+from common.gpt4o_mini_help import GPT4oMini
 from common.odps_data import OdpsDataCount
 from common.redis import get_data, get_first_value_with_prefix, increment_key
 from common.tag_video import Tag
@@ -154,7 +155,15 @@ class VideoProcessor:
             Common.logger(mark).info(f"{name}的{task_mark}下的ID{url} 获取视频完成,共{len(data_list)}条")
             try:
                 for video in data_list:
-
+                    limit_number = task["limit_number"]
+                    if limit_number:
+                        task_mark = task["task_mark"]
+                        makr_count = sqlCollect.get_mark_count(task_mark)
+                        if int(limit_number) <= int(makr_count[0][0]):
+                            AliyunLogger.logging((task["channel_id"]), name, task["channel_url"], '',
+                                                 f"{task_mark}标识任务每日指定条数已足够,指定条数{limit_number},实际生成条数{int(makr_count[0][0])}",
+                                                 "1111")
+                            return
                     cls.remove_files(mark)
                     video_path_url = cls.create_folders(mark)
                     new_title = cls.generate_title(video, title)
@@ -486,7 +495,7 @@ class VideoProcessor:
             if not new_title:
                 new_title = '这个视频,分享给我的老友,祝愿您能幸福安康'
             else:
-                new_title = GPT4o.get_ai_title(new_title)
+                new_title = GPT4oMini.get_ai_mini_title(new_title)
         else:
             titles = title.split('/') if '/' in title else [title]
             new_title = random.choice(titles)
@@ -539,7 +548,7 @@ class VideoProcessor:
         处理视频片尾
         """
         if video_ending == "AI片尾引导":
-            pw_srt_text = GPT4o.get_ai_pw(old_title)
+            pw_srt_text = GPT4oMini.get_ai_mini_pw(old_title)
             if pw_srt_text:
 
                 pw_url = TTS.get_pw_zm(pw_srt_text, voice)
@@ -665,6 +674,14 @@ class VideoProcessor:
             return
         task = json.loads(data)
         try:
+            limit_number = task["limit_number"]
+            if limit_number:
+                task_mark = task["task_mark"]
+                makr_count = sqlCollect.get_mark_count(task_mark)
+                if int(limit_number) <= int(makr_count[0][0]):
+                    AliyunLogger.logging((task["channel_id"]), name, task["channel_url"], '', f"{task_mark}标识任务每日指定条数已足够,指定条数{limit_number},实际生成条数{int(makr_count[0][0])}",
+                                         "1111")
+                    return
             if mark == 'dy-pl-gjc' and task['channel_id'] == '抖音搜索':
                 mark_count = 'dyss-count'
                 count = get_first_value_with_prefix(mark_count)