Browse Source

v0.3
readme

罗俊辉 10 months ago
parent
commit
b85bc2dc6a

+ 46 - 1
README.md

@@ -1,3 +1,48 @@
 # VideoToArticles
 # VideoToArticles
+## 1. 安装环境
 
 
-视频匹配文章
+pip install -r requirements.txt
+
+## 2. 启动服务
+hypercorn video_app:app --config config.toml
+
+## 3. 项目文件介绍
+#### applications
+###### ai.py
+    kimi && metaso方法, 可直接调用
+###### async_mysql.py
+    异步mysql连接池
+###### functions.py
+    封装的公共方法
+###### migrate.py
+    将data_works数据存储到数据库中(增量)
+###### mysql.py
+    mysql方法(待优化)
+###### odps_server.py
+    从data_works读取数据的python方法
+###### upload.py
+    自动发布到aigc系统到功能
+#### deal
+###### db_deal.py
+    待优化
+###### publish_deal.py
+    从接口接收信息,将文章发布到aigc平台
+###### video_deal.py
+    从接口获取视频信息
+###### whisper.py
+    调用接口实现whisper功能
+#### routes
+###### vta_routes.py
+    路由代码
+#### spider
+    爬虫代码
+#### temp
+    缓存文件夹
+#### test
+    测试文件夹
+#### video_app.py
+    服务app
+#### config.toml
+    服务config
+#### requirements.txt
+    服务配置文件

+ 22 - 0
applications/functions.py

@@ -1,6 +1,7 @@
 """
 """
 @author: luojunhui
 @author: luojunhui
 """
 """
+import hashlib
 from datetime import datetime, timedelta
 from datetime import datetime, timedelta
 
 
 import requests
 import requests
@@ -63,3 +64,24 @@ def get_text(video_id):
         headers=header
         headers=header
     )
     )
     return response.json()
     return response.json()
+
+
+def hash_title(title):
+    """
+    hash map
+    :param title:
+    :return:
+    """
+    # 创建md5哈希对象
+    hash_object = hashlib.md5()
+
+    # 对标题进行编码
+    title_bytes = title.encode('utf-8')
+
+    # 更新哈希对象
+    hash_object.update(title_bytes)
+
+    # 获取十六进制形式的哈希值
+    hash_hex = hash_object.hexdigest()
+
+    return hash_hex

+ 2 - 1
applications/upload.py

@@ -106,7 +106,8 @@ def auto_upload_aigc(title, text, img_list):
         'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'
         'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'
     }
     }
     response = requests.request("POST", url, headers=headers, data=payload)
     response = requests.request("POST", url, headers=headers, data=payload)
-    print(response.text)
+    # print(response.text)
+    return response.json()
 
 
 
 
 def upload_to_oss(local_path):
 def upload_to_oss(local_path):

+ 1 - 0
deal/__init__.py

@@ -2,4 +2,5 @@
 @author: luojunhui
 @author: luojunhui
 """
 """
 from .videos_deal import RequestDeal
 from .videos_deal import RequestDeal
+from .publish_deal import PublishDeal
 from .db_deal import insert_text_mysql, get_text_by_id
 from .db_deal import insert_text_mysql, get_text_by_id

+ 48 - 2
deal/publish_deal.py

@@ -2,6 +2,7 @@
 @author: luojunhui
 @author: luojunhui
 """
 """
 from applications.upload import *
 from applications.upload import *
+from applications.functions import hash_title
 
 
 
 
 class PublishDeal(object):
 class PublishDeal(object):
@@ -9,5 +10,50 @@ class PublishDeal(object):
     自动发布接口处理
     自动发布接口处理
     """
     """
     def __init__(self, params):
     def __init__(self, params):
-        self.title = params['title']
-        self.text = params['text']
+        self.video_id = None
+        self.img_list = None
+        self.text = None
+        self.title = None
+        self.params = params
+
+    def check_params(self):
+        """
+        check params
+        """
+        try:
+            self.title = self.params['title']
+            self.text = self.params['text']
+            self.img_list = self.params['img_list']
+            self.video_id = self.params['video_id']
+            return None
+        except Exception as e:
+            res = {
+                "error": "params error",
+                "info": "check your param: {}".format(e)
+            }
+            return res
+
+    def deal(self):
+        """
+        处理请求
+        """
+        params_error = self.check_params()
+        if params_error:
+            return params_error
+        else:
+            h_title = hash_title(self.title)
+            img_list = []
+            for index, url in enumerate(self.img_list, 1):
+                save_path = "temp/{}-{}.png".format(h_title, index)
+                local_p = download_image(save_path, url)
+                oss_key = upload_to_oss(local_p)
+                img_obj = {
+                    "fileName": save_path.replace("temo/", ""),
+                    "ossKey": oss_key,
+                    "type": "image/png",
+                    "size": 1234
+                }
+                img_list.append(img_obj)
+            res = auto_upload_aigc(title="{}video_id={}".format(self.title, self.video_id), text=self.text, img_list=self.img_list)
+            return res
+

+ 4 - 7
routes/vta_routes.py

@@ -1,12 +1,9 @@
 """
 """
 @author: luojunhui
 @author: luojunhui
 """
 """
-import time
-import uuid
-import asyncio
 from quart import Blueprint, jsonify, request
 from quart import Blueprint, jsonify, request
 
 
-from deal import RequestDeal, insert_text_mysql, get_text_by_id
+from deal import RequestDeal, insert_text_mysql, get_text_by_id, PublishDeal
 from applications.functions import whisper
 from applications.functions import whisper
 
 
 
 
@@ -68,9 +65,9 @@ def VTARoutes(mysql_client):
         auto publish article info to aigc system
         auto publish article info to aigc system
         :return:
         :return:
         """
         """
-        res = {
-            "info": "this api is developing"
-        }
+        params = await request.get_json()
+        P = PublishDeal(params=params)
+        res = P.deal()
         return jsonify(res)
         return jsonify(res)
 
 
     return bp
     return bp

+ 36 - 0
test/kimi_dev.py

@@ -0,0 +1,36 @@
+# encoding: utf-8
+"""
+@author: luojunhui
+"""
+import json
+from applications.ai import metaSo, kimi_ai
+
+with open("test_return.json", encoding="utf-8") as f:
+    data = json.loads(f.read())
+
+obj_list = data['data']
+
+for obj in obj_list:
+    video_id = obj['video_id']
+    title = obj['title']
+    video_text = obj['video_text']
+    prompt = f"""
+    我会给你一段文本和标题,需要你帮我判断文本和标题是否相关。
+    如果相关:则通过标题和文本帮我生成一个主题和简要总结
+    如果不相关,则通过标题帮我生成一个主题和简要总结
+    返回的结果是一个json格式,key有两个,分别是theme 和 summary
+    只需要返回json
+    给你的标题是: {title}
+    给你的文本是: {video_text}
+    """
+    response = kimi_ai(prompt)
+    out_path = "temo/{}.json".format(video_id)
+    with open(out_path, "w", encoding="utf-8") as f:
+        f.write(json.dumps(response, ensure_ascii=False, indent=4))
+# meta_prompt = f"""
+# 通过一个关键词列表{res_obj['keys']}
+# 和文本的总结:{res_obj['summary']},
+# 生成一篇2000字左右的文章,来叙述上面的内容
+# """
+# resp = metaSo(prompt=meta_prompt)
+# print(resp)

+ 28 - 0
test/metaso_dev.py

@@ -0,0 +1,28 @@
+"""
+@author: luojunhui
+"""
+import os
+import json
+from applications.ai import metaSo
+
+
+dirs = 'temo'
+file_list = [os.path.join(dirs, file) for file in os.listdir(dirs)]
+for file in file_list:
+    with open(file, encoding="utf-8") as f:
+        data = json.loads(f.read())
+    kimi_obj = json.loads(data) if type(data) == str else data
+    theme = kimi_obj['theme']
+    summary = kimi_obj['summary']
+    prompt = f"""
+    我会给你一个主题和一个总结性文本,需要你通过这两个信息生成一遍文章,
+    如果总结性文本和主题不太相关,则只用主题来生成
+    要注意的点1: 文章的主题要和我提供的一样
+    主题是: {theme}
+    总结性文本是:{summary}
+    """
+    result = metaSo(prompt=prompt)
+    print(json.dumps(result, ensure_ascii=False, indent=4))
+    kimi_obj['text'] = result['data']['msg']
+    with open(file, "w", encoding="utf-8") as f:
+        f.write(json.dumps(kimi_obj, ensure_ascii=False, indent=4))

+ 30 - 0
test/test4.py

@@ -0,0 +1,30 @@
+"""
+@author: luojunhui
+"""
+import json
+
+from spider.toutiao import parse_detail, search_article
+
+with open("test_return.json", encoding="utf-8") as f:
+    video_data = json.loads(f.read())
+
+L = []
+for video_obj in video_data['data']:
+    title = video_obj['title']
+    print(title)
+    urls = search_article(title)
+    if urls:
+        search_list = []
+        for url in urls:
+            try:
+                res_o = parse_detail(url)
+                search_list.append(res_o)
+            except Exception as e:
+                print(e)
+        video_obj['search_list'] = search_list
+        L.append(video_obj)
+    else:
+        continue
+
+with open("search_tt.json", "w", encoding="utf-8") as f:
+    f.write(json.dumps(L, ensure_ascii=False, indent=4))

File diff suppressed because it is too large
+ 12 - 0
test/upload_dev.py


+ 5 - 4
test_req.py → test/videos_dev.py

@@ -9,13 +9,13 @@ import requests
 cate: video_return, video_view, video_rov
 cate: video_return, video_view, video_rov
 """
 """
 
 
-url = "http://localhost:8888/videos"
+url = "http://47.99.132.47:8888/videos"
 
 
 body = {
 body = {
-    "cate": "video_rov",
+    "cate": "video_return",
     "start_date": "2024-05-28",
     "start_date": "2024-05-28",
     "end_date": "2024-05-29",
     "end_date": "2024-05-29",
-    "topN": 10
+    "topN": 20
 }
 }
 a = time.time()
 a = time.time()
 header = {
 header = {
@@ -26,5 +26,6 @@ response = requests.post(url, json=body, headers=header, timeout=600)
 b = time.time()
 b = time.time()
 print(b - a)
 print(b - a)
 print(json.dumps(response.json(), ensure_ascii=False, indent=4))
 print(json.dumps(response.json(), ensure_ascii=False, indent=4))
-with open("test.json", "w", encoding="utf-8") as f:
+
+with open("test_return.json", "w", encoding="utf-8") as f:
     f.write(json.dumps(response.json(), ensure_ascii=False, indent=4))
     f.write(json.dumps(response.json(), ensure_ascii=False, indent=4))

+ 0 - 0
dev.py → test/whisper_dev.py


Some files were not shown because too many files changed in this diff