| 
														
															@@ -9,7 +9,7 @@ from applications.functions.log import logging 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from static.config import spider_coroutines 
														 | 
														
														 | 
														
															 from static.config import spider_coroutines 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															 # Temporary solution for task dead-lock 
														 | 
														
														 | 
														
															 # Temporary solution for task dead-lock 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-g_values = {'row_offset': 0} 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+g_values = {'row_offset': 0, 'skip_num': 0} 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															 class MatchTask1(object): 
														 | 
														
														 | 
														
															 class MatchTask1(object): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     """ 
														 | 
														
														 | 
														
															     """ 
														 | 
													
												
											
										
											
												
													
														 | 
														
															@@ -41,16 +41,33 @@ class MatchTask1(object): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         for content_id in content_ids: 
														 | 
														
														 | 
														
															         for content_id in content_ids: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															             unique_content_ids.add(content_id[0]) 
														 | 
														
														 | 
														
															             unique_content_ids.add(content_id[0]) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         if not unique_content_ids: 
														 | 
														
														 | 
														
															         if not unique_content_ids: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+            if g_values['skip_num'] > 0: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+                logging(code=9001, function="task1.get_task", info="reset row offset to 0") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+                g_values['row_offset'] = 0 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+                g_values['skip_num'] = 0 
														 | 
													
												
											
												
													
														| 
														 | 
														
															             return [] 
														 | 
														
														 | 
														
															             return [] 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         g_values['row_offset'] = content_ids[-1][1] 
														 | 
														
														 | 
														
															         g_values['row_offset'] = content_ids[-1][1] 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         print(f"update row offset to: {g_values['row_offset']}") 
														 | 
														
														 | 
														
															         print(f"update row offset to: {g_values['row_offset']}") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-        unique_content_ids = list(unique_content_ids)[0:spider_coroutines] 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         logging( 
														 | 
														
														 | 
														
															         logging( 
														 | 
													
												
											
												
													
														| 
														 | 
														
															             code=9001, 
														 | 
														
														 | 
														
															             code=9001, 
														 | 
													
												
											
												
													
														| 
														 | 
														
															             function="task1.get_task", 
														 | 
														
														 | 
														
															             function="task1.get_task", 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-            info=f"unique content ids in batch: {len(unique_content_ids)}" 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+            info=f"unique content ids: {len(unique_content_ids)}" 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         ) 
														 | 
														
														 | 
														
															         ) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-        content_ids_tuple = str(unique_content_ids).replace("[", "(").replace("]", ")") 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        content_ids_to_process = [] 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        for content_id in unique_content_ids: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+            history_videos = await self.get_history_videos(content_id) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+            if not history_videos: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+                content_ids_to_process.append(content_id) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        if spider_coroutines > len(content_ids_to_process): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+            logging(code=9001, function="task1.get_task", info="some content is skipped, process it later") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+            g_values['skip_num'] = 1 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        content_ids_to_process = content_ids_to_process[0:spider_coroutines] 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        logging( 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+            code=9001, 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+            function="task1.get_task", 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+            info=f"content ids to process: {len(content_ids_to_process)}" 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        ) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        content_ids_tuple = str(content_ids_to_process).replace("[", "(").replace("]", ")") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         if len(content_ids_tuple) > 3: 
														 | 
														
														 | 
														
															         if len(content_ids_tuple) > 3: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															             select_sql = f""" 
														 | 
														
														 | 
														
															             select_sql = f""" 
														 | 
													
												
											
												
													
														| 
														 | 
														
															                 SELECT trace_id, content_id, gh_id, article_title, article_text, content_status, process_times 
														 | 
														
														 | 
														
															                 SELECT trace_id, content_id, gh_id, article_title, article_text, content_status, process_times 
														 |