Prechádzať zdrojové kódy

账号联想--优化

luojunhui 8 mesiacov pred
rodič
commit
352fdd3173

+ 4 - 3
coldStartTasks/publish/publishCategoryArticles.py

@@ -14,6 +14,7 @@ from config import apolloConfig
 
 apollo = apolloConfig()
 DAILY_CRAWLER_MAX_NUM = 1000
+SIMILARITY_MIN_SCORE = 0.4
 
 
 class CategoryColdStartTask(object):
@@ -101,7 +102,8 @@ class CategoryColdStartTask(object):
             }
         )
         article_df = DataFrame(article_list,
-                               columns=['article_id', 'gh_id', 'position', 'title', 'link', 'read_cnt', 'status', 'llm_sensitivity', 'score'])
+                               columns=['article_id', 'gh_id', 'position', 'title', 'link', 'read_cnt', 'status',
+                                        'llm_sensitivity', 'score'])
         return article_df
 
     def filter_each_category(self, category):
@@ -230,7 +232,7 @@ class CategoryColdStartTask(object):
         length_level5 = filter_df.shape[0]
 
         # 第六层通过相关性分数过滤
-        filter_df = filter_df[filter_df['score'] > 0.4]
+        filter_df = filter_df[filter_df['score'] > SIMILARITY_MIN_SCORE]
         length_level6 = filter_df.shape[0]
 
         log(
@@ -396,4 +398,3 @@ class CategoryColdStartTask(object):
                         "traceback": traceback.format_exc()
                     }
                 )
-