罗俊辉 1 éve
szülő
commit
a3da69bc6f

+ 4 - 1
applications/config.py

@@ -357,4 +357,7 @@ minigram_info = {
     "avatar_url": "https://rescdn.yishihui.com/0temp/lehuo.png",
     "mid": "wxe8f8f0e23cecad0f",
     "name": "票圈乐活"
-}
+}
+
+# db_config
+db_config = "video_to_articles"

+ 27 - 0
applications/functions.py

@@ -29,6 +29,23 @@ def request_for_info(video_id):
     return response.json()
 
 
+def get_info_lists(vid_list):
+    """
+    获取视频list
+    :param vid_list:
+    :return:
+    """
+    url = "https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/batchSelectVideoInfo"
+    data = {
+        "videoIdList": vid_list
+    }
+    header = {
+        "Content-Type": "application/json",
+    }
+    response = requests.post(url, headers=header, data=json.dumps(data))
+    return response.json()
+
+
 def generate_daily_strings(start_date, end_date):
     """
     Generate daily date_str
@@ -131,3 +148,13 @@ def create_gzh_path(video_id, shared_uid):
     # 自动把 root_share_id 加入到白名单
     # auto_white(root_share_id)
     return root_share_id, source_id, f"pages/category?jumpPage={urllib.parse.quote(url, safe='')}"
+
+
+def chunks(chunk_list, chunk_size):
+    """
+    分页
+    :param chunk_list:
+    :param chunk_size:
+    """
+    for i in range(0, len(chunk_list), chunk_size):
+        yield chunk_list[i: i + chunk_size]

+ 2 - 1
applications/migrate.py

@@ -26,6 +26,7 @@ def migrate_daily(dt):
     print("{} successfully insert {} rows, totally cost {} seconds".format(dt, len(data), b - a))
 
 
-dt_list = generate_daily_strings("20240613", "20240616")
+dt_list = generate_daily_strings("20240617", "20240617")
 for dt in dt_list:
+    print(dt)
     migrate_daily(dt)

+ 2 - 5
deal/__init__.py

@@ -1,11 +1,8 @@
 """
 @author: luojunhui
 """
-from .videos_deal import RequestDeal
-from .publish_deal import PublishDeal
-from .db_deal import insert_text_mysql, get_text_by_id
-from .articles_deal import ArticleGeneral
+from .videos_deal import VideoDeal
 from .algorithm_deal import ArticleMatchAccount
 from .shareCard_deal import ShareCard
 from .single_video_deal import SingleVideo
-from .matchArticle_deal import MatchArticles
+from .matchArticle_deal import MatchArticlesTask, MatchArticlesV1, MatchArticlesV2

+ 0 - 62
deal/articles_deal.py

@@ -1,62 +0,0 @@
-"""
-@author: luojunhui
-生成文章&&搜索文章
-"""
-from applications.ai import tencent_ai, kimi_ai, metaSo
-
-
-class ArticleGeneral(object):
-    """
-    Generate article with AI
-    """
-
-    def __init__(self, params):
-        self.text = None
-        self.title = None
-        self.aic = None
-        self.params = params
-
-    def check_params(self):
-        """
-        校验params
-        :return:
-        """
-        try:
-            self.aic = self.params['aic']
-            self.title = self.params['title']
-            self.text = self.params['text']
-            return None
-        except AttributeError as err:
-            return {"error": "params error {}".format(err)}
-
-    def deal(self):
-        """
-        处理请求
-        :return:
-        """
-        params_error = self.check_params()
-        if params_error:
-            return params_error
-        else:
-            if self.aic == "tencent":
-                prompt = "请通过这个标题({}), 生成一篇文章".format(self.title)
-                response_text = tencent_ai(
-                    prompt=prompt
-                )
-                return {"text": response_text}
-            elif self.aic == "kimi":
-                if self.text:
-                    prompt = "请通过这个标题({})和这段正文({}), 生成一篇文章".format(self.title, self.text)
-                else:
-                    prompt = "请通过这个标题({}), 生成一篇文章".format(self.title)
-                response_text = kimi_ai(prompt)
-                return {"text": response_text}
-            elif self.aic == "meta":
-                if self.text:
-                    prompt = "请通过这个标题({})和这段正文({}), 生成一篇文章".format(self.title, self.text)
-                else:
-                    prompt = "请通过这个标题({}), 生成一篇文章".format(self.title)
-                response_text = metaSo(prompt)
-                return {"text": response_text}
-            else:
-                return {"error": "errors"}

+ 0 - 38
deal/db_deal.py

@@ -1,38 +0,0 @@
-"""
-@author: luojunhui
-"""
-
-
-async def insert_text_mysql(mysql_client, video_id, text, title):
-    """
-    :return:
-    """
-    select_sql = f"""select video_id from video_text where video_id = {video_id};"""
-    result = await mysql_client.select(select_sql)
-    print(result)
-    if result:
-        return {"duplicated": "vid already exists", "vid": video_id}
-    else:
-        insert_sql = f"""
-        insert into video_text
-        (video_id, video_text, title)
-        values 
-        ({video_id},'{text}', '{title}');
-        """
-        await mysql_client.async_insert(insert_sql)
-        return {"info": "success insert text into mysql", "vid": video_id}
-
-
-async def get_text_by_id(mysql_client, video_id):
-    """
-    获取视频id
-    :param mysql_client:
-    :param video_id:
-    :return:
-    """
-    select_sql = f"""select video_text from video_text where video_id = {video_id}"""
-    result = await mysql_client.select(select_sql)
-    if result:
-        return result[0][0]
-    else:
-        return None

A különbségek nem kerülnek megjelenítésre, a fájl túl nagy
+ 94 - 27
deal/matchArticle_deal.py


+ 0 - 75
deal/publish_deal.py

@@ -1,75 +0,0 @@
-"""
-@author: luojunhui
-"""
-from applications.upload import download_and_upload, auto_upload_aigc
-from applications.functions import hash_title
-
-
-class PublishDeal(object):
-    """
-    自动发布接口处理
-    """
-
-    def __init__(self, params):
-        self.process_list = []
-        self.taskName = None
-        self.params = params
-
-    def check_params(self):
-        """
-        check params
-        """
-        try:
-            self.taskName = self.params['task_name']
-            obj_list = self.params['article_list']
-            self.process_list = [
-                {
-                    "title": obj['title'],
-                    "text": obj['text'],
-                    "img_list": obj['img_list'],
-                    "video_id": obj['video_id'],
-                    "cover": obj['cover']
-                } for obj in obj_list
-            ]
-            return None
-        except Exception as e:
-            res = {
-                "error": "params error",
-                "info": "check your param: {}".format(e)
-            }
-            return res
-
-    def deal(self):
-        """
-        处理请求
-        """
-        params_error = self.check_params()
-        if params_error:
-            return params_error
-        else:
-            L = []
-            for article_obj in self.process_list:
-                # hash title
-                h_title = hash_title(article_obj['title'])
-                # process imgList
-                img_list = []
-                for index, url in enumerate(article_obj['img_list'], 1):
-                    save_path = "temp/{}-{}.png".format(h_title, index)
-                    img_obj = download_and_upload(save_path, url)
-                    img_list.append(img_obj)
-
-                # process Cover
-                if article_obj['cover']:
-                    cover_path = "temp/cover-{}.png".format(h_title)
-                    cover_obj = download_and_upload(cover_path, article_obj['cover'])
-                else:
-                    cover_obj = {}
-                result_obj = {
-                    "title": article_obj['title'] + "video_id={}".format(article_obj['video_id']),
-                    "text": article_obj['text'],
-                    "img_list": img_list,
-                    "cover": cover_obj
-                }
-                L.append(result_obj)
-            res = auto_upload_aigc(task_name=self.taskName, obj_list=L)
-            return res

+ 1 - 0
deal/single_video_deal.py

@@ -44,6 +44,7 @@ class SingleVideo(object):
             # user_id = response['user']['uid']
             # cover = response['coverImg']['coverImgPath']
             result = {
+
                 "videoId": self.video_id,
                 "title": title,
                 "videoUrl": video_url,

+ 22 - 16
deal/videos_deal.py

@@ -1,11 +1,11 @@
 """
 @author: luojunhui
 """
-from applications.functions import get_text, request_for_info
+from applications.functions import chunks, get_info_lists
 from applications.config import minigram_info
 
 
-class RequestDeal(object):
+class VideoDeal(object):
     """
     Deal Request from outside
     """
@@ -23,26 +23,32 @@ class RequestDeal(object):
         :param data_list:
         :return:video_id, title, view_, return_, video_url, rov
         """
-        result_list = [
-            {
-                "video_id": obj[0],
-                "title": obj[1],
-                "view": obj[2],
-                "return": obj[3],
-                "video_url": obj[4],
-                "rov": obj[5],
-                "video_text": get_text(video_id=obj[0])['text'],
-                "cover": request_for_info(video_id=obj[0])['data'][0]['coverImg']['coverImgPath'],
-                "minigram_info": minigram_info
-            } for obj in data_list
-        ]
+        L = []
+        sub_list = chunks(data_list, 20)
+        for temp_list in sub_list:
+            vid_list = [i[0] for i in temp_list]
+            pq_response = get_info_lists(vid_list=vid_list)
+            cover_list = [i['coverImg']['coverImgPath'] for i in pq_response['data']]
+            for index, obj in enumerate(temp_list):
+                temp = {
+                    "video_id": obj[0],
+                    "title": obj[1],
+                    "view": obj[2],
+                    "return": obj[3],
+                    "video_url": obj[4],
+                    "rov": obj[5],
+                    "video_text": "",
+                    "cover": cover_list[index],
+                    "minigram_info": minigram_info
+                }
+                L.append(temp)
         response = {
             "status": "success",
             "cate": self.cate,
             "start_date": self.start_dt,
             "end_dt": self.end_dt,
             "topN": self.topN,
-            "data": result_list
+            "data": L
         }
         return response
 

+ 0 - 3
deal/whisper_deal.py

@@ -1,3 +0,0 @@
-"""
-@author: luojunhui
-"""

+ 2 - 1
requirements.txt

@@ -9,4 +9,5 @@ fake_useragent
 tqdm~=4.66.2
 hypercorn
 openai~=1.21.2
-odps~=3.5.1
+odps~=3.5.1
+tencentcloud-sdk-python

+ 18 - 68
routes/vta_routes.py

@@ -3,15 +3,12 @@
 """
 from quart import Blueprint, jsonify, request
 
-from deal import RequestDeal
+from deal import VideoDeal
 from deal import ArticleMatchAccount
-from deal import ArticleGeneral
-from deal import PublishDeal
-from deal import insert_text_mysql, get_text_by_id
 from deal import ShareCard
 from deal import SingleVideo
-from deal import MatchArticles
-from applications.functions import whisper
+from deal import MatchArticlesV1
+from deal import MatchArticlesV2
 
 
 bp = Blueprint('VideosToArticle', __name__)
@@ -42,63 +39,10 @@ def VTARoutes(mysql_client):
         :return:
         """
         params = await request.get_json()
-        RD = RequestDeal(params, mysql_client)
-        result = await RD.deal()
+        VD = VideoDeal(params, mysql_client)
+        result = await VD.deal()
         return jsonify(result)
 
-    @bp.route('/whisper', methods=["POST"])
-    async def video_extracting():
-        """
-        whisper 处理文本
-        :return:
-        """
-        params = await request.get_json()
-        video_id = params['vid']
-        video_title = params['title']
-        try:
-            response = whisper(video_id)
-            result = await insert_text_mysql(mysql_client, video_id, response['text'], video_title)
-        except Exception as e:
-            result = {"error": str(e), "vid": video_id}
-        return jsonify(result)
-
-    @bp.route('/get_text', methods=["POST"])
-    async def get_video_text():
-        """
-        获取视频文本
-        :return:
-        """
-        params = await request.get_json()
-        video_id = params['vid']
-        text = await get_text_by_id(mysql_client, video_id)
-        if text:
-            result = {"text": text}
-        else:
-            result = {"text": None}
-        return jsonify(result)
-
-    @bp.route('/publish', methods=["POST"])
-    async def auto_publish():
-        """
-        auto publish article info to aigc system
-        :return:
-        """
-        params = await request.get_json()
-        P = PublishDeal(params=params)
-        res = P.deal()
-        return jsonify(res)
-
-    @bp.route('/article', methods=["POST"])
-    async def generate_text():
-        """
-        生成文本
-        :return:
-        """
-        params = await request.get_json()
-        A = ArticleGeneral(params=params)
-        res = A.deal()
-        return jsonify(res)
-
     @bp.route("/match", methods=["POST"])
     async def match_account():
         """
@@ -132,19 +76,25 @@ def VTARoutes(mysql_client):
         response = SV.deal()
         return jsonify(response)
 
-    @bp.route("/matchArticle", methods=["POST"])
+    @bp.route("/matchArticleV1", methods=["POST"])
     async def match_article():
         """
         匹配视频
         :return:
         """
         params = await request.get_json()
-        MA = MatchArticles(params=params)
-        result = MA.deal()
-        response = {
-            "status": "success",
-            "article": result
-        }
+        MA = MatchArticlesV1(params=params, mysql_client=mysql_client)
+        response = await MA.deal()
+        return jsonify(response)
+
+    @bp.route("/matchArticleV2", methods=["POST"])
+    async def recall_article():
+        """
+        Recall Article
+        """
+        params = await request.get_json()
+        MA2 = MatchArticlesV2(params=params, mysql_client=mysql_client)
+        response = await MA2.recall_articles()
         return jsonify(response)
 
     return bp

+ 15 - 0
spider/tencent_news.py

@@ -0,0 +1,15 @@
+"""
+@author: luojunhui
+"""
+import requests
+
+url = "https://new.qq.com/search?query=%E5%8C%97%E5%A4%A7%E6%95%99%E6%8E%88%E5%A4%AB%E4%BA%BA96%E5%B2%81%E9%80%9D%E4%B8%96%EF%BC%8C%E7%94%9F%E5%89%8D%E6%9B%BE%E6%96%AD%E9%A3%9F%E6%96%AD%E6%B0%B44%E5%A4%A9%EF%BC%8C%E7%95%99%E4%B8%8B%E4%B8%80%E5%8F%A5%E8%AF%9D%E4%BB%A4%E4%BA%BA%E6%B7%B1%E6%80%9D&page=1"
+
+
+headers = {
+    "Content-Type": "application/json",
+    "cookie": "RK=kreEdgt2YJ; ptcz=988b2dee721fc7f396a696a31bcfaca33cdb372f1b881ee5affbce5e5d978e8c; _qimei_uuid42=186031009051009d7cd1945011a64a99cb68d2482e; _qimei_q36=; _qimei_h38=428c111f7cd1945011a64a990300000ca18603; pgv_pvid=2616476048; pgv_pvi=2160320512; pgv_si=s1462014976; pgv_info=ssid=s1768029950; pac_uid=0_ddQwmCn3ZjrMh; _qimei_fingerprint=6326615306fcfb00937ca380512eb6b7; current-city-name=bj; ad_play_index=9; suid=0_ddQwmCn3ZjrMh; lcad_o_minduid=U_QlocTufXCe5zVOsVXp6pMQbmOA_IyY; lcad_appuser=603532C44F72F827; lcad_Lturn=937; lcad_LKBturn=178; lcad_LPVLturn=762; lcad_LPLFturn=93"
+}
+response = requests.get(url, headers=headers)
+
+print(response.text)

+ 6 - 0
test/google_dev.py

@@ -0,0 +1,6 @@
+"""
+@author: luojunhui
+"""
+
+
+url = "https://www.google.com/search?q=%E9%99%88%E6%80%9D%E5%AF%87"

+ 2 - 1
test/videos_dev.py

@@ -9,13 +9,14 @@ import requests
 cate: video_return, video_view, video_rov
 """
 
+# url = "http://47.99.132.47:8888/videos"
 url = "http://localhost:8888/videos"
 
 body = {
     "cate": "video_return",
     "start_date": "2024-06-05",
     "end_date": "2024-06-06",
-    "topN": 10
+    "topN": 500
 }
 a = time.time()
 header = {

Nem az összes módosított fájl került megjelenítésre, mert túl sok fájl változott