hace 5 meses · 7f808d9f13
--- a/account_cold_start_daily.py
+++ b/account_cold_start_daily.py
@@ -8,7 +8,7 @@ from applications import longArticlesMySQL, bot
 
				 from coldStartTasks.crawler.weixinCategoryCrawler import weixinCategory
			
 
				 from coldStartTasks.publish.publishCategoryArticles import CategoryColdStartTask
			
 
				 
			
 
				-DEFAULT_CATEGORY_LIST = ['account_association']
			
 
				+DEFAULT_CATEGORY_LIST = ['1030-手动挑号']
			
 
				 
			
 
				 
			
 
				 class AccountColdStartDailyTask(object):
			
@@ -109,7 +109,7 @@ def main(category_list=None, article_source=None):
 
				         if article_source == 'weixin':
			
 
				             task.crawler_task(category_list=category_list)
			
 
				 
			
 
				-        # task.publish_task(category_list=category_list, article_source=article_source)
			
 
				+        task.publish_task(category_list=category_list, article_source=article_source)
			
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':
			
--- a/coldStartTasks/publish/publishCategoryArticles.py
+++ b/coldStartTasks/publish/publishCategoryArticles.py
@@ -82,11 +82,12 @@ class CategoryColdStartTask(object):
 
				         """
			
 
				         sql = f"""
			
 
				         SELECT 
			
 
				-            article_id, out_account_id, article_index, title, link, read_cnt, status, llm_sensitivity
			
 
				+            article_id, out_account_id, article_index, title, link, read_cnt, status, llm_sensitivity, score
			
 
				         FROM
			
 
				             crawler_meta_article
			
 
				         WHERE 
			
 
				-            category = "{category}" and platform = "{article_source}";
			
 
				+            category = "{category}" and platform = "{article_source}"
			
 
				+        ORDER BY score DESC;
			
 
				         """
			
 
				         article_list = self.db_client.select(sql)
			
 
				         log(
			
@@ -99,7 +100,7 @@ class CategoryColdStartTask(object):
 
				             }
			
 
				         )
			
 
				         article_df = DataFrame(article_list,
			
 
				-                               columns=['article_id', 'gh_id', 'position', 'title', 'link', 'read_cnt', 'status', 'llm_sensitivity'])
			
 
				+                               columns=['article_id', 'gh_id', 'position', 'title', 'link', 'read_cnt', 'status', 'llm_sensitivity', 'score'])
			
 
				         return article_df
			
 
				 
			
 
				     def change_article_status(self, category):
			
@@ -206,6 +207,11 @@ class CategoryColdStartTask(object):
 
				             ~(filter_df['llm_sensitivity'] > 0)
			
 
				         ]
			
 
				         length_level5 = filter_df.shape[0]
			
 
				+
			
 
				+        # 第六层通过相关性分数过滤
			
 
				+        filter_df = filter_df[filter_df['score'] > 0.4]
			
 
				+        length_level6 = filter_df.shape[0]
			
 
				+
			
 
				         log(
			
 
				             task="category_publish_task",
			
 
				             function="publish_filter_articles",
			
@@ -232,6 +238,9 @@ class CategoryColdStartTask(object):
 
				                 "通过LLM敏感度过滤": "过滤数量: {}    剩余数量: {}".format(
			
 
				                     length_level4 - length_level5, length_level5
			
 
				                 ),
			
 
				+                "通过相关性分数过滤": "过滤数量: {}    剩余数量: {}".format(
			
 
				+                    length_level5 - length_level6, length_level6
			
 
				+                ),
			
 
				                 "品类": category,
			
 
				                 "阅读均值倍数阈值": self.READ_TIMES_THRESHOLD,
			
 
				                 "阅读量阈值": self.READ_THRESHOLD,
			
@@ -239,7 +248,7 @@ class CategoryColdStartTask(object):
 
				             },
			
 
				             mention=False
			
 
				         )
			
 
				-        return filter_df
			
 
				+        return filter_df[:1000]
			
 
				 
			
 
				     def filter_toutiao_articles(self, articles_df, category):
			
 
				         """