Browse Source

修改数据每日冷启动数据

luojunhui 1 month ago
parent
commit
a9975e9ba0
1 changed files with 19 additions and 17 deletions
  1. 19 17
      cold_start/publish/publishCategoryArticles.py

+ 19 - 17
cold_start/publish/publishCategoryArticles.py

@@ -13,8 +13,8 @@ from applications import aiditApi, log, bot, llm_sensitivity
 from config import apolloConfig
 
 apollo = apolloConfig()
-DAILY_CRAWLER_MAX_NUM = 1000
-SIMILARITY_MIN_SCORE = 0.4
+DAILY_CRAWLER_MAX_NUM = 2000
+SIMILARITY_MIN_SCORE = 0.5
 TITLE_NOT_SENSITIVE = 0
 
 
@@ -136,6 +136,7 @@ class CategoryColdStartTask(object):
         过滤单个生成计划类别的文章
         """
         plan_id = self.category_map.get(category)
+        print(category)
         if plan_id:
             article_list = aiditApi.get_generated_article_list(plan_id)
             title_list = [i[1] for i in article_list]
@@ -318,21 +319,21 @@ class CategoryColdStartTask(object):
             case _:
                 return
 
-        success_titles = filtered_articles_df['title'].values.tolist()
-        article_id_list = filtered_articles_df['article_id'].values.tolist()
-        if success_titles:
-            try:
-                sensitive_results = llm_sensitivity.check_titles(success_titles)
-                for article_id, sensitive_result in zip(article_id_list, sensitive_results):
-                    self.update_article_sensitive_status(
-                        article_id=article_id,
-                        status=sensitive_result['hit_rule']
-                    )
-                    if sensitive_result['hit_rule'] > TITLE_NOT_SENSITIVE:
-                        filtered_articles_df = filtered_articles_df[filtered_articles_df['article_id'] != article_id]
-
-            except Exception as e:
-                print("failed to update sensitive status: {}".format(e))
+        # success_titles = filtered_articles_df['title'].values.tolist()
+        # article_id_list = filtered_articles_df['article_id'].values.tolist()
+        # if success_titles:
+        #     try:
+        #         sensitive_results = llm_sensitivity.check_titles(success_titles)
+        #         for article_id, sensitive_result in zip(article_id_list, sensitive_results):
+        #             self.update_article_sensitive_status(
+        #                 article_id=article_id,
+        #                 status=sensitive_result['hit_rule']
+        #             )
+        #             if sensitive_result['hit_rule'] > TITLE_NOT_SENSITIVE:
+        #                 filtered_articles_df = filtered_articles_df[filtered_articles_df['article_id'] != article_id]
+        #
+        #     except Exception as e:
+        #         print("failed to update sensitive status: {}".format(e))
 
         # split into different category
         for ai_category in self.article_category_list:
@@ -424,3 +425,4 @@ class CategoryColdStartTask(object):
                         "traceback": traceback.format_exc()
                     }
                 )
+