瀏覽代碼

account association improve

luojunhui 9 月之前
父節點
當前提交
17405b6857
共有 2 個文件被更改,包括 21 次插入3 次删除
  1. 18 0
      applications/const.py
  2. 3 3
      coldStartTasks/crawler/weixin_account_association_crawler.py

+ 18 - 0
applications/const.py

@@ -157,6 +157,24 @@ class UpdateMiniProgramDetailConst(updatePublishedMsgTaskConst):
     """
 
 
+# 账号联想
+class AccountAssociationTaskConst:
+    """
+    账号联想任务常量配置
+    """
+    # 获取种子标题的统计周期
+    STAT_PERIOD = 7 * 24 * 60 * 60
+
+    # 阅读均值阈值
+    READ_AVG_MULTIPLE = 1.3
+
+    # 最小阅读量
+    MIN_READ_COUNT = 2000
+
+    # 种子数量限制
+    SEED_TITLE_LIMIT = 100
+
+
 
 
 

+ 3 - 3
coldStartTasks/crawler/weixin_account_association_crawler.py

@@ -15,11 +15,11 @@ from applications import bot
 from applications import log
 from applications import longArticlesMySQL
 from applications import WeixinSpider
-from applications.const import WeixinVideoCrawlerConst
+from applications.const import AccountAssociationTaskConst
 from applications.functions import Functions
 from config import apolloConfig
 
-const = WeixinVideoCrawlerConst()
+const = AccountAssociationTaskConst()
 function = Functions()
 config = apolloConfig()
 
@@ -76,7 +76,7 @@ class AccountAssociationCrawler(object):
                 AND t1.view_count > {const.MIN_READ_COUNT} 
                 AND publish_timestamp > {publish_timestamp_threshold}
             ORDER BY read_rate DESC
-            LIMIT 100;
+            LIMIT {const.SEED_TITLE_LIMIT};
         """
         article_obj_list = self.db_client.select(sql, cursor_type=DictCursor)
         return article_obj_list