Browse Source

Update task1: increase select limit since there are many duplicate contents

StrayWarrior 5 months ago
parent
commit
ba86a4215f
1 changed files with 6 additions and 1 deletions
  1. 6 1
      tasks/task1.py

+ 6 - 1
tasks/task1.py

@@ -27,7 +27,7 @@ class MatchTask1(object):
         获取任务
         :return:
         """
-        select_limit = spider_coroutines * 5
+        select_limit = spider_coroutines * 100
         select_sql1 = f"""
             SELECT content_id, id
             FROM {db_article} 
@@ -45,6 +45,11 @@ class MatchTask1(object):
         g_values['row_offset'] = content_ids[-1][1]
         print(f"update row offset to: {g_values['row_offset']}")
         unique_content_ids = list(unique_content_ids)[0:spider_coroutines]
+        logging(
+            code=9001,
+            function="task1.get_task",
+            info=f"unique content ids in batch: {len(unique_content_ids)}"
+        )
         content_ids_tuple = str(unique_content_ids).replace("[", "(").replace("]", ")")
         if len(content_ids_tuple) > 3:
             select_sql = f"""