Ver código fonte

opt asr_res: stop_words, '\n'

liqian 2 anos atrás
pai
commit
d663daae11
2 arquivos alterados com 10 adições e 7 exclusões
  1. 3 0
      config.py
  2. 7 7
      gpt_process.py

+ 3 - 0
config.py

@@ -43,6 +43,9 @@ class BaseConfig(object):
     # 记录生成标题重试次数
     TITLE_GENERATE_RETRY_KEY_NAME_PREFIX = 'title:generate:retry:count:'
 
+    # stop_words
+    STOP_WORDS = ['啊', '嗯', '呗', '呢', '呐', '呀', '唉', '哎', '额', '呃', '哦', '呵']
+
     # video tags
     TAGS = ['舞蹈', '美食', '时尚', '旅行', '音乐', '运动', '影视', '搞笑', '科技', '综艺',
             '游戏', '情感', '健康', '人文', '社会', '热点', '财富', '生活']

+ 7 - 7
gpt_process.py

@@ -81,16 +81,16 @@ def title_generate(video_id, video_path):
     })
     # 4. gpt产出结果
     # log_.info(f"debug: title_generate 4")
-    prompt = f"{config_.GPT_PROMPT['title']['prompt2']}{asr_res.strip()}"
+    # 对asr结果进行清洗
+    asr_res = asr_res.strip().replace('\n', '')
+    for stop_word in config_.STOP_WORDS:
+        asr_res = asr_res.replace(stop_word, '')
+    # token限制: 字数 <= 2500
+    asr_res = asr_res[-2500:]
+    prompt = f"{config_.GPT_PROMPT['title']['prompt2']}{asr_res}"
     gpt_res = request_gpt(prompt=prompt)
 
     return gpt_res, generate_filepath
-    # except ConnectionResetError:
-    #     log_.info(video_id)
-    # except Exception as e:
-    #     log_.info(traceback.format_exc())
-    # else:
-    #     print(gpt_res)
 
 
 if __name__ == '__main__':