|
@@ -1,5 +1,8 @@
|
|
|
+import json
|
|
|
import time
|
|
|
import datetime
|
|
|
+import traceback
|
|
|
+
|
|
|
from tqdm import tqdm
|
|
|
from typing import List, Dict
|
|
|
|
|
@@ -127,32 +130,35 @@ class TopArticleGeneralizeFromArticlePool(TopArticleGeneralize):
|
|
|
publishing_article_list = []
|
|
|
for title_obj in tqdm(title_obj_list):
|
|
|
if self.get_title_read_info_detail(title_obj["title"]):
|
|
|
-
|
|
|
- temp = []
|
|
|
- keys = self.get_keys_by_ai(title_obj)
|
|
|
- for key in keys:
|
|
|
- candidate_articles = self.get_candidate_articles(key)
|
|
|
- temp += candidate_articles
|
|
|
-
|
|
|
- if temp:
|
|
|
- title_list = [i["title"] for i in temp]
|
|
|
- # 相关性排序
|
|
|
- similarity_array = similarity_between_title_list(
|
|
|
- title_list, [title_obj["title"]]
|
|
|
- )
|
|
|
- response_with_similarity_list = []
|
|
|
- for index, item in enumerate(temp):
|
|
|
- item["similarity"] = similarity_array[index][0]
|
|
|
- response_with_similarity_list.append(item)
|
|
|
-
|
|
|
- sorted_response_with_similarity_list = sorted(
|
|
|
- response_with_similarity_list,
|
|
|
- key=lambda k: k["similarity"],
|
|
|
- reverse=True,
|
|
|
- )
|
|
|
- publishing_article_list += sorted_response_with_similarity_list[
|
|
|
- :10
|
|
|
- ]
|
|
|
+ try:
|
|
|
+ temp = []
|
|
|
+ keys = self.get_keys_by_ai(title_obj)
|
|
|
+ for key in keys:
|
|
|
+ candidate_articles = self.get_candidate_articles(key)
|
|
|
+ temp += candidate_articles
|
|
|
+
|
|
|
+ if temp:
|
|
|
+ title_list = [i["title"] for i in temp]
|
|
|
+ # 相关性排序
|
|
|
+ similarity_array = similarity_between_title_list(
|
|
|
+ title_list, [title_obj["title"]]
|
|
|
+ )
|
|
|
+ response_with_similarity_list = []
|
|
|
+ for index, item in enumerate(temp):
|
|
|
+ item["similarity"] = similarity_array[index][0]
|
|
|
+ response_with_similarity_list.append(item)
|
|
|
+
|
|
|
+ sorted_response_with_similarity_list = sorted(
|
|
|
+ response_with_similarity_list,
|
|
|
+ key=lambda k: k["similarity"],
|
|
|
+ reverse=True,
|
|
|
+ )
|
|
|
+ publishing_article_list += sorted_response_with_similarity_list[
|
|
|
+ :10
|
|
|
+ ]
|
|
|
+ except Exception as e:
|
|
|
+ print(e)
|
|
|
+ print(traceback.format_exc())
|
|
|
|
|
|
url_list = [i["link"] for i in publishing_article_list]
|
|
|
if url_list:
|
|
@@ -192,3 +198,33 @@ class TopArticleGeneralizeFromArticlePool(TopArticleGeneralize):
|
|
|
# change article status
|
|
|
article_id_list = [i["article_id"] for i in publishing_article_list]
|
|
|
self.change_article_status_while_publishing(article_id_list=article_id_list)
|
|
|
+
|
|
|
+class TopArticleGeneralizeFromVideoPool(TopArticleGeneralize):
|
|
|
+ def get_candidate_videos(self, key):
|
|
|
+ fetch_query = f"""
|
|
|
+ select article_title, content_trace_id, audit_video_id
|
|
|
+ from publish_single_video_source
|
|
|
+ where status = 0 and bad_status = 0 and article_title like '%{key}%'
|
|
|
+ """
|
|
|
+ fetch_response = self.long_articles_client.fetch(
|
|
|
+ fetch_query, cursor_type=DictCursor
|
|
|
+ )
|
|
|
+ return fetch_response
|
|
|
+
|
|
|
+ def deal(self):
|
|
|
+ title_obj_list = self.fetch_distinct_top_titles()
|
|
|
+ publishing_article_list = []
|
|
|
+ for title_obj in tqdm(title_obj_list):
|
|
|
+ if self.get_title_read_info_detail(title_obj["title"]):
|
|
|
+
|
|
|
+ temp = []
|
|
|
+ keys = self.get_keys_by_ai(title_obj)
|
|
|
+ for key in keys:
|
|
|
+ candidate_articles = self.get_candidate_videos(key)
|
|
|
+ temp += candidate_articles
|
|
|
+ print(json.dumps(temp, ensure_ascii=False, indent=4))
|
|
|
+
|
|
|
+#
|
|
|
+# TopArticleGeneralizeFromVideoPool().deal()
|
|
|
+
|
|
|
+
|