|
@@ -25,19 +25,21 @@ class MatchTask1(object):
|
|
|
获取任务
|
|
|
:return:
|
|
|
"""
|
|
|
+ select_limit = spider_coroutines * 5
|
|
|
select_sql1 = f"""
|
|
|
- SELECT DISTINCT (content_id)
|
|
|
+ SELECT content_id
|
|
|
FROM {db_article}
|
|
|
WHERE content_status = 0 and process_times <= 3
|
|
|
ORDER BY request_time_stamp
|
|
|
ASC
|
|
|
- LIMIT {spider_coroutines};
|
|
|
+ LIMIT {select_limit};
|
|
|
"""
|
|
|
content_ids = await self.mysql_client.async_select(select_sql1)
|
|
|
- cil = []
|
|
|
+ unique_content_ids = set()
|
|
|
for content_id in content_ids:
|
|
|
- cil.append(content_id[0])
|
|
|
- content_ids_tuple = str(cil).replace("[", "(").replace("]", ")")
|
|
|
+ unique_content_ids.add(content_id[0])
|
|
|
+ unique_content_ids = list(unique_content_ids)[0:spider_coroutines]
|
|
|
+ content_ids_tuple = str(unique_content_ids).replace("[", "(").replace("]", ")")
|
|
|
if len(content_ids_tuple) > 3:
|
|
|
select_sql = f"""
|
|
|
SELECT trace_id, content_id, gh_id, article_title, article_text, content_status, process_times
|