|
@@ -1,30 +1,205 @@
|
|
|
+# encoding: utf-8
|
|
|
"""
|
|
|
@author: luojunhui
|
|
|
"""
|
|
|
import json
|
|
|
+import requests
|
|
|
+
|
|
|
+from applications.ai import tencent_ai
|
|
|
+
|
|
|
+
|
|
|
+def get_score_list(
|
|
|
+ account_nickname_list,
|
|
|
+ text_list,
|
|
|
+ rate=0.1,
|
|
|
+ min_time=None,
|
|
|
+ max_time=None,
|
|
|
+ interest_type="by_avg",
|
|
|
+ sim_type="mean",
|
|
|
+ keys=[
|
|
|
+ "Title",
|
|
|
+ "show_view_count",
|
|
|
+ ],
|
|
|
+):
|
|
|
+ api_url = 'http://192.168.100.31:8179/score_list'
|
|
|
+ payload = json.dumps({
|
|
|
+ "account_nickname_list": account_nickname_list,
|
|
|
+ "text_list": text_list,
|
|
|
+ "max_time": max_time,
|
|
|
+ "min_time": min_time,
|
|
|
+ "interest_type": interest_type,
|
|
|
+ "sim_type": sim_type,
|
|
|
+ "rate": rate,
|
|
|
+ })
|
|
|
+ res = requests.request("POST", api_url, headers={}, data=payload).json()
|
|
|
+ return res
|
|
|
+
|
|
|
+
|
|
|
+def send_to_aigc(task_name, obj):
|
|
|
+ """
|
|
|
+ 通过video_id获取文本
|
|
|
+ :param
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ url = "http://47.99.132.47:8888/publish"
|
|
|
+ body = {
|
|
|
+ "task_name": task_name,
|
|
|
+ "article_list": [obj, obj, obj, obj]
|
|
|
+ }
|
|
|
+ header = {
|
|
|
+ "Content-Type": "application/json"
|
|
|
+ }
|
|
|
+ response = requests.post(url, json=body, headers=header, timeout=500)
|
|
|
+ return response.json()
|
|
|
+
|
|
|
+
|
|
|
+class AutoMatchMain:
|
|
|
+ """
|
|
|
+ auto match
|
|
|
+ 47.99.132.47
|
|
|
+ """
|
|
|
+ ip = "47.99.132.47"
|
|
|
+
|
|
|
+ @classmethod
|
|
|
+ def get_video_list(cls, start_dt, end_dt):
|
|
|
+ """
|
|
|
+ 获取视频list
|
|
|
+ """
|
|
|
+ url = f"http://{cls.ip}:8888/videos"
|
|
|
+ body = {
|
|
|
+ "cate": "video_return",
|
|
|
+ "start_date": start_dt,
|
|
|
+ "end_date": end_dt,
|
|
|
+ "topN": 2000
|
|
|
+ }
|
|
|
+ header = {
|
|
|
+ "Content-Type": "application/json",
|
|
|
+ }
|
|
|
+ response = requests.post(url, json=body, headers=header, timeout=600)
|
|
|
+ # print(json.dumps(response.json(), ensure_ascii=False, indent=4))
|
|
|
+ return response.json()
|
|
|
+
|
|
|
+ @classmethod
|
|
|
+ def match_account(cls, account_list, article_list):
|
|
|
+ """
|
|
|
+ 匹配账号
|
|
|
+ :param account_list:
|
|
|
+ :param article_list:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ url = f"http://{cls.ip}:8888/match"
|
|
|
+ body = {
|
|
|
+ "accountList": account_list,
|
|
|
+ "textList": article_list
|
|
|
+ }
|
|
|
+ header = {
|
|
|
+ "Content-Type": "application/json"
|
|
|
+ }
|
|
|
+ response = requests.post(url, json=body, headers=header, timeout=500)
|
|
|
+ print(response.text)
|
|
|
+ return response.json()
|
|
|
+
|
|
|
+ @classmethod
|
|
|
+ def ask_whisper(cls, video_id, title):
|
|
|
+ """
|
|
|
+ :param title:
|
|
|
+ :param video_id:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ url = f"http://{cls.ip}:8888/whisper"
|
|
|
+ body = {
|
|
|
+ "vid": video_id,
|
|
|
+ "title": title
|
|
|
+ }
|
|
|
+ header = {
|
|
|
+ "Content-Type": "application/json"
|
|
|
+ }
|
|
|
+ response = requests.post(url, json=body, headers=header, timeout=500)
|
|
|
+ return response.json()
|
|
|
+
|
|
|
+ @classmethod
|
|
|
+ def get_text(cls, video_id):
|
|
|
+ """
|
|
|
+ 通过video_id获取文本
|
|
|
+ :param video_id:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ url = f"http://{cls.ip}:8888/get_text"
|
|
|
+ body = {
|
|
|
+ "vid": video_id
|
|
|
+ }
|
|
|
+ header = {
|
|
|
+ "Content-Type": "application/json"
|
|
|
+ }
|
|
|
+ response = requests.post(url, json=body, headers=header, timeout=500)
|
|
|
+ return response.json()
|
|
|
+
|
|
|
+ @classmethod
|
|
|
+ def process(cls):
|
|
|
+ """
|
|
|
+ 处理
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ video_list = cls.get_video_list(start_dt="2024-05-01", end_dt="2024-05-30")
|
|
|
+ # 去重
|
|
|
+ result = {}
|
|
|
+ for video in video_list['data']:
|
|
|
+ vid = video['video_id']
|
|
|
+ if result.get(vid):
|
|
|
+ continue
|
|
|
+ else:
|
|
|
+ result[vid] = video
|
|
|
+ # 去重后的title_dict
|
|
|
+ title_dict = {}
|
|
|
+ for i in result:
|
|
|
+ title_dict[result[i]['title']] = i
|
|
|
+ title_list = [title for title in title_dict]
|
|
|
+ account_list = ['生活良读'] # '票圈最新消息', '老友欢聚地'
|
|
|
+ print("开始匹配账号")
|
|
|
+ detail_score_obj = get_score_list(account_list, title_list)
|
|
|
+ print("账号匹配完成")
|
|
|
+ L = []
|
|
|
+ for key in detail_score_obj:
|
|
|
+ each_account = detail_score_obj[key]
|
|
|
+ value_list = list(zip(title_list, each_account['score_list'], each_account['text_list_max']))
|
|
|
+ top_5 = sorted(value_list, key=lambda x: x[1], reverse=True)[:5]
|
|
|
+ for item in top_5:
|
|
|
+ ort_title = item[0]
|
|
|
+ generate_title = item[2]
|
|
|
+ video_id = title_dict[ort_title]
|
|
|
+ cls.ask_whisper(video_id=video_id, title=ort_title)
|
|
|
+ video_text = cls.get_text(video_id=video_id)['text']
|
|
|
+ video_url = result[video_id]['video_url']
|
|
|
+ obj = {
|
|
|
+ "account": key,
|
|
|
+ "ori_title": ort_title,
|
|
|
+ "generate_title": generate_title,
|
|
|
+ "video_id": video_id,
|
|
|
+ "video_text": video_text,
|
|
|
+ "video_url": video_url
|
|
|
+ }
|
|
|
+ L.append(obj)
|
|
|
+ return L
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ AM = AutoMatchMain()
|
|
|
+ target_list = AM.process()
|
|
|
+ for index, i in enumerate(target_list):
|
|
|
+ # print(json.dumps(i, ensure_ascii=False, indent=4))
|
|
|
+ m_text = i['video_text']
|
|
|
+ title = i['ori_title']
|
|
|
+ video_id = i['video_id']
|
|
|
+ prompt = f"通过这个标题({title}) 和这些文本({m_text}), 生成一篇1000字以上的文章"
|
|
|
+ ai_text = tencent_ai(prompt=prompt)
|
|
|
+ ppp = {
|
|
|
+ "title": i['ori_title'],
|
|
|
+ "video_id": i['video_id'],
|
|
|
+ "img_list": [],
|
|
|
+ "cover": "",
|
|
|
+ "text": ai_text
|
|
|
+ }
|
|
|
+ r = send_to_aigc("test_upload_by_luojunhui---{}".format(index), ppp)
|
|
|
+ print(r)
|
|
|
+ print(json.dumps(i, ensure_ascii=False, indent=4))
|
|
|
|
|
|
-from spider.toutiao import parse_detail, search_article
|
|
|
-
|
|
|
-with open("test_return.json", encoding="utf-8") as f:
|
|
|
- video_data = json.loads(f.read())
|
|
|
-
|
|
|
-L = []
|
|
|
-for video_obj in video_data['data']:
|
|
|
- title = video_obj['title']
|
|
|
- print(title)
|
|
|
- urls = search_article(title)
|
|
|
- if urls:
|
|
|
- search_list = []
|
|
|
- for url in urls:
|
|
|
- try:
|
|
|
- res_o = parse_detail(url)
|
|
|
- search_list.append(res_o)
|
|
|
- except Exception as e:
|
|
|
- print(e)
|
|
|
- video_obj['search_list'] = search_list
|
|
|
- L.append(video_obj)
|
|
|
- else:
|
|
|
- continue
|
|
|
-
|
|
|
-with open("search_tt.json", "w", encoding="utf-8") as f:
|
|
|
- f.write(json.dumps(L, ensure_ascii=False, indent=4))
|