瀏覽代碼

Update task1: increase select limit since there are many duplicate contents

StrayWarrior 5 月之前
父節點
當前提交
ba86a4215f
共有 1 個文件被更改,包括 6 次插入1 次删除
  1. 6 1
      tasks/task1.py

+ 6 - 1
tasks/task1.py

@@ -27,7 +27,7 @@ class MatchTask1(object):
         获取任务
         获取任务
         :return:
         :return:
         """
         """
-        select_limit = spider_coroutines * 5
+        select_limit = spider_coroutines * 100
         select_sql1 = f"""
         select_sql1 = f"""
             SELECT content_id, id
             SELECT content_id, id
             FROM {db_article} 
             FROM {db_article} 
@@ -45,6 +45,11 @@ class MatchTask1(object):
         g_values['row_offset'] = content_ids[-1][1]
         g_values['row_offset'] = content_ids[-1][1]
         print(f"update row offset to: {g_values['row_offset']}")
         print(f"update row offset to: {g_values['row_offset']}")
         unique_content_ids = list(unique_content_ids)[0:spider_coroutines]
         unique_content_ids = list(unique_content_ids)[0:spider_coroutines]
+        logging(
+            code=9001,
+            function="task1.get_task",
+            info=f"unique content ids in batch: {len(unique_content_ids)}"
+        )
         content_ids_tuple = str(unique_content_ids).replace("[", "(").replace("]", ")")
         content_ids_tuple = str(unique_content_ids).replace("[", "(").replace("]", ")")
         if len(content_ids_tuple) > 3:
         if len(content_ids_tuple) > 3:
             select_sql = f"""
             select_sql = f"""