Browse Source

冷启动发布,增加品类信息bugfix

luojunhui 5 tháng trước cách đây
mục cha
commit
b210a19ec6
2 tập tin đã thay đổi với 10 bổ sung10 xóa
  1. 9 9
      account_cold_start_daily.py
  2. 1 1
      cold_start/publish/publishCategoryArticles.py

+ 9 - 9
account_cold_start_daily.py

@@ -11,7 +11,7 @@ from cold_start.crawler.weixinCategoryCrawler import weixinCategory
 from cold_start.publish.publishCategoryArticles import CategoryColdStartTask
 from cold_start.filter.title_similarity_task import ColdStartTitleSimilarityTask
 
-DEFAULT_CATEGORY_LIST = ['1030-手动挑号', 'account_association']
+DEFAULT_CATEGORY_LIST = ['account_association']
 
 
 
@@ -118,8 +118,8 @@ def main(date_str, category_list=None, article_source=None):
     if task.init_db():
         task.publish_article_task(category_list=category_list, article_source=article_source)
 
-        if article_source == 'weixin':
-            task.crawler_task(category_list=category_list, date_str=date_str)
+        # if article_source == 'weixin':
+        #     task.crawler_task(category_list=category_list, date_str=date_str)
 
 
 if __name__ == '__main__':
@@ -132,12 +132,12 @@ if __name__ == '__main__':
     else:
         run_date = datetime.date.today().isoformat()
 
-    # 执行头条发布
-    main(
-        date_str=run_date,
-        category_list=['history', 'tech', 'finance', 'entertainment'],
-        article_source='toutiao'
-    )
+    # # 执行头条发布
+    # main(
+    #     date_str=run_date,
+    #     category_list=['history', 'tech', 'finance', 'entertainment'],
+    #     article_source='toutiao'
+    # )
 
     # 执行微信抓取发布
     main(date_str=run_date)

+ 1 - 1
cold_start/publish/publishCategoryArticles.py

@@ -13,7 +13,7 @@ from applications import aiditApi, log, bot, llm_sensitivity
 from config import apolloConfig
 
 apollo = apolloConfig()
-DAILY_CRAWLER_MAX_NUM = 1000
+DAILY_CRAWLER_MAX_NUM = 9000
 SIMILARITY_MIN_SCORE = 0.4
 TITLE_NOT_SENSITIVE = 0