|  | @@ -82,11 +82,12 @@ class CategoryColdStartTask(object):
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  |          sql = f"""
 | 
	
		
			
				|  |  |          SELECT 
 | 
	
		
			
				|  |  | -            article_id, out_account_id, article_index, title, link, read_cnt, status, llm_sensitivity
 | 
	
		
			
				|  |  | +            article_id, out_account_id, article_index, title, link, read_cnt, status, llm_sensitivity, score
 | 
	
		
			
				|  |  |          FROM
 | 
	
		
			
				|  |  |              crawler_meta_article
 | 
	
		
			
				|  |  |          WHERE 
 | 
	
		
			
				|  |  | -            category = "{category}" and platform = "{article_source}";
 | 
	
		
			
				|  |  | +            category = "{category}" and platform = "{article_source}"
 | 
	
		
			
				|  |  | +        ORDER BY score DESC;
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  |          article_list = self.db_client.select(sql)
 | 
	
		
			
				|  |  |          log(
 | 
	
	
		
			
				|  | @@ -99,7 +100,7 @@ class CategoryColdStartTask(object):
 | 
	
		
			
				|  |  |              }
 | 
	
		
			
				|  |  |          )
 | 
	
		
			
				|  |  |          article_df = DataFrame(article_list,
 | 
	
		
			
				|  |  | -                               columns=['article_id', 'gh_id', 'position', 'title', 'link', 'read_cnt', 'status', 'llm_sensitivity'])
 | 
	
		
			
				|  |  | +                               columns=['article_id', 'gh_id', 'position', 'title', 'link', 'read_cnt', 'status', 'llm_sensitivity', 'score'])
 | 
	
		
			
				|  |  |          return article_df
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      def change_article_status(self, category):
 | 
	
	
		
			
				|  | @@ -206,6 +207,11 @@ class CategoryColdStartTask(object):
 | 
	
		
			
				|  |  |              ~(filter_df['llm_sensitivity'] > 0)
 | 
	
		
			
				|  |  |          ]
 | 
	
		
			
				|  |  |          length_level5 = filter_df.shape[0]
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        # 第六层通过相关性分数过滤
 | 
	
		
			
				|  |  | +        filter_df = filter_df[filter_df['score'] > 0.4]
 | 
	
		
			
				|  |  | +        length_level6 = filter_df.shape[0]
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |          log(
 | 
	
		
			
				|  |  |              task="category_publish_task",
 | 
	
		
			
				|  |  |              function="publish_filter_articles",
 | 
	
	
		
			
				|  | @@ -232,6 +238,9 @@ class CategoryColdStartTask(object):
 | 
	
		
			
				|  |  |                  "通过LLM敏感度过滤": "过滤数量: {}    剩余数量: {}".format(
 | 
	
		
			
				|  |  |                      length_level4 - length_level5, length_level5
 | 
	
		
			
				|  |  |                  ),
 | 
	
		
			
				|  |  | +                "通过相关性分数过滤": "过滤数量: {}    剩余数量: {}".format(
 | 
	
		
			
				|  |  | +                    length_level5 - length_level6, length_level6
 | 
	
		
			
				|  |  | +                ),
 | 
	
		
			
				|  |  |                  "品类": category,
 | 
	
		
			
				|  |  |                  "阅读均值倍数阈值": self.READ_TIMES_THRESHOLD,
 | 
	
		
			
				|  |  |                  "阅读量阈值": self.READ_THRESHOLD,
 | 
	
	
		
			
				|  | @@ -239,7 +248,7 @@ class CategoryColdStartTask(object):
 | 
	
		
			
				|  |  |              },
 | 
	
		
			
				|  |  |              mention=False
 | 
	
		
			
				|  |  |          )
 | 
	
		
			
				|  |  | -        return filter_df
 | 
	
		
			
				|  |  | +        return filter_df[:1000]
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      def filter_toutiao_articles(self, articles_df, category):
 | 
	
		
			
				|  |  |          """
 |