luojunhui 1 هفته پیش
والد
کامیت
62d8885661
1فایلهای تغییر یافته به همراه62 افزوده شده و 26 حذف شده
  1. 62 26
      tasks/publish_tasks/top_article_generalize.py

+ 62 - 26
tasks/publish_tasks/top_article_generalize.py

@@ -1,5 +1,8 @@
+import json
 import time
 import datetime
+import traceback
+
 from tqdm import tqdm
 from typing import List, Dict
 
@@ -127,32 +130,35 @@ class TopArticleGeneralizeFromArticlePool(TopArticleGeneralize):
         publishing_article_list = []
         for title_obj in tqdm(title_obj_list):
             if self.get_title_read_info_detail(title_obj["title"]):
-
-                temp = []
-                keys = self.get_keys_by_ai(title_obj)
-                for key in keys:
-                    candidate_articles = self.get_candidate_articles(key)
-                    temp += candidate_articles
-
-                if temp:
-                    title_list = [i["title"] for i in temp]
-                    # 相关性排序
-                    similarity_array = similarity_between_title_list(
-                        title_list, [title_obj["title"]]
-                    )
-                    response_with_similarity_list = []
-                    for index, item in enumerate(temp):
-                        item["similarity"] = similarity_array[index][0]
-                        response_with_similarity_list.append(item)
-
-                    sorted_response_with_similarity_list = sorted(
-                        response_with_similarity_list,
-                        key=lambda k: k["similarity"],
-                        reverse=True,
-                    )
-                    publishing_article_list += sorted_response_with_similarity_list[
-                        :10
-                    ]
+                try:
+                    temp = []
+                    keys = self.get_keys_by_ai(title_obj)
+                    for key in keys:
+                        candidate_articles = self.get_candidate_articles(key)
+                        temp += candidate_articles
+
+                    if temp:
+                        title_list = [i["title"] for i in temp]
+                        # 相关性排序
+                        similarity_array = similarity_between_title_list(
+                            title_list, [title_obj["title"]]
+                        )
+                        response_with_similarity_list = []
+                        for index, item in enumerate(temp):
+                            item["similarity"] = similarity_array[index][0]
+                            response_with_similarity_list.append(item)
+
+                        sorted_response_with_similarity_list = sorted(
+                            response_with_similarity_list,
+                            key=lambda k: k["similarity"],
+                            reverse=True,
+                        )
+                        publishing_article_list += sorted_response_with_similarity_list[
+                            :10
+                        ]
+                except Exception as e:
+                    print(e)
+                    print(traceback.format_exc())
 
         url_list = [i["link"] for i in publishing_article_list]
         if url_list:
@@ -192,3 +198,33 @@ class TopArticleGeneralizeFromArticlePool(TopArticleGeneralize):
             # change article status
             article_id_list = [i["article_id"] for i in publishing_article_list]
             self.change_article_status_while_publishing(article_id_list=article_id_list)
+
+class TopArticleGeneralizeFromVideoPool(TopArticleGeneralize):
+    def get_candidate_videos(self, key):
+        fetch_query = f"""
+            select article_title, content_trace_id, audit_video_id
+            from publish_single_video_source
+            where status = 0 and bad_status = 0 and article_title like '%{key}%'
+        """
+        fetch_response = self.long_articles_client.fetch(
+            fetch_query, cursor_type=DictCursor
+        )
+        return fetch_response
+
+    def deal(self):
+        title_obj_list = self.fetch_distinct_top_titles()
+        publishing_article_list = []
+        for title_obj in tqdm(title_obj_list):
+            if self.get_title_read_info_detail(title_obj["title"]):
+
+                temp = []
+                keys = self.get_keys_by_ai(title_obj)
+                for key in keys:
+                    candidate_articles = self.get_candidate_videos(key)
+                    temp += candidate_articles
+                print(json.dumps(temp, ensure_ascii=False, indent=4))
+
+#
+# TopArticleGeneralizeFromVideoPool().deal()
+
+