Browse Source

v0.7
修改判断是否存在的逻辑

罗俊辉 10 months ago
parent
commit
f920061c7b
1 changed files with 200 additions and 25 deletions
  1. 200 25
      test/test4.py

+ 200 - 25
test/test4.py

@@ -1,30 +1,205 @@
+# encoding: utf-8
 """
 @author: luojunhui
 """
 import json
+import requests
+
+from applications.ai import tencent_ai
+
+
+def get_score_list(
+        account_nickname_list,
+        text_list,
+        rate=0.1,
+        min_time=None,
+        max_time=None,
+        interest_type="by_avg",
+        sim_type="mean",
+        keys=[
+            "Title",
+            "show_view_count",
+        ],
+):
+    api_url = 'http://192.168.100.31:8179/score_list'
+    payload = json.dumps({
+        "account_nickname_list": account_nickname_list,
+        "text_list": text_list,
+        "max_time": max_time,
+        "min_time": min_time,
+        "interest_type": interest_type,
+        "sim_type": sim_type,
+        "rate": rate,
+    })
+    res = requests.request("POST", api_url, headers={}, data=payload).json()
+    return res
+
+
+def send_to_aigc(task_name, obj):
+    """
+    通过video_id获取文本
+    :param
+    :return:
+    """
+    url = "http://47.99.132.47:8888/publish"
+    body = {
+        "task_name": task_name,
+        "article_list": [obj, obj, obj, obj]
+    }
+    header = {
+        "Content-Type": "application/json"
+    }
+    response = requests.post(url, json=body, headers=header, timeout=500)
+    return response.json()
+
+
+class AutoMatchMain:
+    """
+    auto match
+    47.99.132.47
+    """
+    ip = "47.99.132.47"
+
+    @classmethod
+    def get_video_list(cls, start_dt, end_dt):
+        """
+        获取视频list
+        """
+        url = f"http://{cls.ip}:8888/videos"
+        body = {
+            "cate": "video_return",
+            "start_date": start_dt,
+            "end_date": end_dt,
+            "topN": 2000
+        }
+        header = {
+            "Content-Type": "application/json",
+        }
+        response = requests.post(url, json=body, headers=header, timeout=600)
+        # print(json.dumps(response.json(), ensure_ascii=False, indent=4))
+        return response.json()
+
+    @classmethod
+    def match_account(cls, account_list, article_list):
+        """
+        匹配账号
+        :param account_list:
+        :param article_list:
+        :return:
+        """
+        url = f"http://{cls.ip}:8888/match"
+        body = {
+            "accountList": account_list,
+            "textList": article_list
+        }
+        header = {
+            "Content-Type": "application/json"
+        }
+        response = requests.post(url, json=body, headers=header, timeout=500)
+        print(response.text)
+        return response.json()
+
+    @classmethod
+    def ask_whisper(cls, video_id, title):
+        """
+        :param title:
+        :param video_id:
+        :return:
+        """
+        url = f"http://{cls.ip}:8888/whisper"
+        body = {
+            "vid": video_id,
+            "title": title
+        }
+        header = {
+            "Content-Type": "application/json"
+        }
+        response = requests.post(url, json=body, headers=header, timeout=500)
+        return response.json()
+
+    @classmethod
+    def get_text(cls, video_id):
+        """
+        通过video_id获取文本
+        :param video_id:
+        :return:
+        """
+        url = f"http://{cls.ip}:8888/get_text"
+        body = {
+            "vid": video_id
+        }
+        header = {
+            "Content-Type": "application/json"
+        }
+        response = requests.post(url, json=body, headers=header, timeout=500)
+        return response.json()
+
+    @classmethod
+    def process(cls):
+        """
+        处理
+        :return:
+        """
+        video_list = cls.get_video_list(start_dt="2024-05-01", end_dt="2024-05-30")
+        # 去重
+        result = {}
+        for video in video_list['data']:
+            vid = video['video_id']
+            if result.get(vid):
+                continue
+            else:
+                result[vid] = video
+        # 去重后的title_dict
+        title_dict = {}
+        for i in result:
+            title_dict[result[i]['title']] = i
+        title_list = [title for title in title_dict]
+        account_list = ['生活良读']  # '票圈最新消息', '老友欢聚地'
+        print("开始匹配账号")
+        detail_score_obj = get_score_list(account_list, title_list)
+        print("账号匹配完成")
+        L = []
+        for key in detail_score_obj:
+            each_account = detail_score_obj[key]
+            value_list = list(zip(title_list, each_account['score_list'], each_account['text_list_max']))
+            top_5 = sorted(value_list, key=lambda x: x[1], reverse=True)[:5]
+            for item in top_5:
+                ort_title = item[0]
+                generate_title = item[2]
+                video_id = title_dict[ort_title]
+                cls.ask_whisper(video_id=video_id, title=ort_title)
+                video_text = cls.get_text(video_id=video_id)['text']
+                video_url = result[video_id]['video_url']
+                obj = {
+                    "account": key,
+                    "ori_title": ort_title,
+                    "generate_title": generate_title,
+                    "video_id": video_id,
+                    "video_text": video_text,
+                    "video_url": video_url
+                }
+                L.append(obj)
+        return L
+
+
+if __name__ == '__main__':
+    AM = AutoMatchMain()
+    target_list = AM.process()
+    for index, i in enumerate(target_list):
+        # print(json.dumps(i, ensure_ascii=False, indent=4))
+        m_text = i['video_text']
+        title = i['ori_title']
+        video_id = i['video_id']
+        prompt = f"通过这个标题({title}) 和这些文本({m_text}), 生成一篇1000字以上的文章"
+        ai_text = tencent_ai(prompt=prompt)
+        ppp = {
+            "title": i['ori_title'],
+            "video_id": i['video_id'],
+            "img_list": [],
+            "cover": "",
+            "text": ai_text
+        }
+        r = send_to_aigc("test_upload_by_luojunhui---{}".format(index), ppp)
+        print(r)
+        print(json.dumps(i, ensure_ascii=False, indent=4))
 
-from spider.toutiao import parse_detail, search_article
-
-with open("test_return.json", encoding="utf-8") as f:
-    video_data = json.loads(f.read())
-
-L = []
-for video_obj in video_data['data']:
-    title = video_obj['title']
-    print(title)
-    urls = search_article(title)
-    if urls:
-        search_list = []
-        for url in urls:
-            try:
-                res_o = parse_detail(url)
-                search_list.append(res_o)
-            except Exception as e:
-                print(e)
-        video_obj['search_list'] = search_list
-        L.append(video_obj)
-    else:
-        continue
-
-with open("search_tt.json", "w", encoding="utf-8") as f:
-    f.write(json.dumps(L, ensure_ascii=False, indent=4))