|
@@ -13,8 +13,8 @@ from applications import aiditApi, log, bot, llm_sensitivity
|
|
|
from config import apolloConfig
|
|
|
|
|
|
apollo = apolloConfig()
|
|
|
-DAILY_CRAWLER_MAX_NUM = 1000
|
|
|
-SIMILARITY_MIN_SCORE = 0.4
|
|
|
+DAILY_CRAWLER_MAX_NUM = 2000
|
|
|
+SIMILARITY_MIN_SCORE = 0.5
|
|
|
TITLE_NOT_SENSITIVE = 0
|
|
|
|
|
|
|
|
@@ -136,6 +136,7 @@ class CategoryColdStartTask(object):
|
|
|
过滤单个生成计划类别的文章
|
|
|
"""
|
|
|
plan_id = self.category_map.get(category)
|
|
|
+ print(category)
|
|
|
if plan_id:
|
|
|
article_list = aiditApi.get_generated_article_list(plan_id)
|
|
|
title_list = [i[1] for i in article_list]
|
|
@@ -318,21 +319,21 @@ class CategoryColdStartTask(object):
|
|
|
case _:
|
|
|
return
|
|
|
|
|
|
- success_titles = filtered_articles_df['title'].values.tolist()
|
|
|
- article_id_list = filtered_articles_df['article_id'].values.tolist()
|
|
|
- if success_titles:
|
|
|
- try:
|
|
|
- sensitive_results = llm_sensitivity.check_titles(success_titles)
|
|
|
- for article_id, sensitive_result in zip(article_id_list, sensitive_results):
|
|
|
- self.update_article_sensitive_status(
|
|
|
- article_id=article_id,
|
|
|
- status=sensitive_result['hit_rule']
|
|
|
- )
|
|
|
- if sensitive_result['hit_rule'] > TITLE_NOT_SENSITIVE:
|
|
|
- filtered_articles_df = filtered_articles_df[filtered_articles_df['article_id'] != article_id]
|
|
|
-
|
|
|
- except Exception as e:
|
|
|
- print("failed to update sensitive status: {}".format(e))
|
|
|
+ # success_titles = filtered_articles_df['title'].values.tolist()
|
|
|
+ # article_id_list = filtered_articles_df['article_id'].values.tolist()
|
|
|
+ # if success_titles:
|
|
|
+ # try:
|
|
|
+ # sensitive_results = llm_sensitivity.check_titles(success_titles)
|
|
|
+ # for article_id, sensitive_result in zip(article_id_list, sensitive_results):
|
|
|
+ # self.update_article_sensitive_status(
|
|
|
+ # article_id=article_id,
|
|
|
+ # status=sensitive_result['hit_rule']
|
|
|
+ # )
|
|
|
+ # if sensitive_result['hit_rule'] > TITLE_NOT_SENSITIVE:
|
|
|
+ # filtered_articles_df = filtered_articles_df[filtered_articles_df['article_id'] != article_id]
|
|
|
+ #
|
|
|
+ # except Exception as e:
|
|
|
+ # print("failed to update sensitive status: {}".format(e))
|
|
|
|
|
|
# split into different category
|
|
|
for ai_category in self.article_category_list:
|
|
@@ -424,3 +425,4 @@ class CategoryColdStartTask(object):
|
|
|
"traceback": traceback.format_exc()
|
|
|
}
|
|
|
)
|
|
|
+
|