罗俊辉 1 anno fa
parent
commit
fa1701a694
3 ha cambiato i file con 63 aggiunte e 9 eliminazioni
  1. 2 3
      applications/pipeline.py
  2. 3 6
      deal/matchArticle_deal.py
  3. 58 0
      spider/baidu_imgs.py

+ 2 - 3
applications/pipeline.py

@@ -80,7 +80,7 @@ def generate_text(question):
     生成一篇小文章,文章需要有逻辑,有参考意义
     """
     text = kimi_ai(prompt)
-    return text
+    return text.replace("\n", "").replace("*", "").replace("#", "").replace(":", "").replace('"', "")
 
 
 # 第三步,清洗,筛选,总结, 生成文章
@@ -90,7 +90,6 @@ def summary_articles(materials):
     :param materials:
     :return:
     """
-    img_list = []
     materials_ = json.loads(materials.replace("\n", "").replace("#", ""))
     keys = []
     for key in materials_:
@@ -131,4 +130,4 @@ def summary_articles(materials):
         response = json.loads(response.replace("\n", ""))
     except:
         response = json.loads(response.replace("'", '"'))
-    return img_list, response['title'], response['text']
+    return response['title'], response['text']

+ 3 - 6
deal/matchArticle_deal.py

@@ -8,6 +8,7 @@ import time
 import requests
 from uuid import uuid4
 
+from spider.baidu_imgs import get_img_list
 from applications.config import db_config
 from applications.functions import whisper
 from applications.pipeline import question_fission, search_materials, summary_articles, generate_text
@@ -99,10 +100,8 @@ class MatchArticlesTask(object):
 
         async def ai_generate_text(task_tuple, mysql_client):
             task_id, video_title, materials = task_tuple
-            imgs, ai_title, ai_text = summary_articles(materials)
-            print(imgs)
-            print(ai_text)
-            print(ai_text)
+            ai_title, ai_text = summary_articles(materials)
+            imgs = get_img_list(video_title)
             update_sql = f"""
             UPDATE {db_config}
             SET ai_text = '{ai_text}', ai_title = '{ai_title}', img_list = '{json.dumps(imgs, ensure_ascii=False)}',status_code = 3
@@ -226,9 +225,7 @@ class MatchArticlesV2(object):
         FROM {db_config}
         WHERE task_id = '{self.task_id}';
         """
-        print(select_sql)
         result = await self.mysql_client.select(select_sql)
-        print(result)
         video_id, cover, images, ai_text, ai_title, status_code = result[0]
         match status_code:
             case 0:

+ 58 - 0
spider/baidu_imgs.py

@@ -0,0 +1,58 @@
+"""
+@author: luojunhui
+"""
+import requests
+
+
+def tunnel_proxies():
+    """
+    快代理
+    :return:
+    """
+    # 隧道域名:端口号
+    tunnel = "q796.kdltps.com:15818"
+    # 用户名密码方式
+    username = "t17772369458618"
+    password = "5zqcjkmy"
+    proxies = {
+        "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel},
+        "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel}
+    }
+    return proxies
+
+
+def get_img_list(search_title):
+    """
+    获取图片list
+    :return:
+    """
+    url = "https://lab.magiconch.com/api/baidu/images"
+    params = {
+        "text": search_title,
+        "index": 0,
+        "size": 60
+    }
+    headers = {
+        'accept': '*/*',
+        'accept-language': 'en,zh;q=0.9,zh-CN;q=0.8',
+        'content-type': 'application/json',
+        'cookie': 'Hm_lvt_f4e477c61adf5c145ce938a05611d5f0=1718784293; Hm_lpvt_f4e477c61adf5c145ce938a05611d5f0=1718784293',
+        'if-none-match': 'W/"5e03-9dK2z/6rD0/7aX0R6HraLuFnLjI"',
+        'priority': 'u=1, i',
+        'referer': 'https://lab.magiconch.com/baidu-images/',
+        'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"',
+        'sec-ch-ua-mobile': '?0',
+        'sec-ch-ua-platform': '"macOS"',
+        'sec-fetch-dest': 'empty',
+        'sec-fetch-mode': 'cors',
+        'sec-fetch-site': 'same-origin',
+        'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
+    }
+    response = requests.request("GET", url, headers=headers, params=params, proxies=tunnel_proxies())
+    res = response.json()
+    response = [i['ori'] for i in res[:15]]
+    return response
+
+
+img_list = get_img_list("破纪录!中国年龄最大的夫妻,有什么长寿秘诀?")
+print(img_list)