| 
					
				 | 
			
			
				@@ -9,7 +9,7 @@ from applications.functions.log import logging 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from static.config import spider_coroutines 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 # Temporary solution for task dead-lock 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-g_values = {'row_offset': 0} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+g_values = {'row_offset': 0, 'skip_num': 0} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 class MatchTask1(object): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     """ 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -41,16 +41,33 @@ class MatchTask1(object): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         for content_id in content_ids: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             unique_content_ids.add(content_id[0]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if not unique_content_ids: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if g_values['skip_num'] > 0: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                logging(code=9001, function="task1.get_task", info="reset row offset to 0") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                g_values['row_offset'] = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                g_values['skip_num'] = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             return [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         g_values['row_offset'] = content_ids[-1][1] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         print(f"update row offset to: {g_values['row_offset']}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        unique_content_ids = list(unique_content_ids)[0:spider_coroutines] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         logging( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             code=9001, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             function="task1.get_task", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            info=f"unique content ids in batch: {len(unique_content_ids)}" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            info=f"unique content ids: {len(unique_content_ids)}" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        content_ids_tuple = str(unique_content_ids).replace("[", "(").replace("]", ")") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        content_ids_to_process = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for content_id in unique_content_ids: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            history_videos = await self.get_history_videos(content_id) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if not history_videos: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                content_ids_to_process.append(content_id) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if spider_coroutines > len(content_ids_to_process): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            logging(code=9001, function="task1.get_task", info="some content is skipped, process it later") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            g_values['skip_num'] = 1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        content_ids_to_process = content_ids_to_process[0:spider_coroutines] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        logging( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            code=9001, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            function="task1.get_task", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            info=f"content ids to process: {len(content_ids_to_process)}" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        content_ids_tuple = str(content_ids_to_process).replace("[", "(").replace("]", ")") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if len(content_ids_tuple) > 3: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             select_sql = f""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 SELECT trace_id, content_id, gh_id, article_title, article_text, content_status, process_times 
			 |