| 
					
				 | 
			
			
				@@ -5,14 +5,13 @@ import argparse 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 sys.path.append(os.getcwd()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-from application.common.mysql import MysqlHelper 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from application.common import MysqlHelper, AliyunLogger 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from spider.spider_map import spider_map 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 class OnlineManager(object): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     线上爬虫模版 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    Todo: 加上阿里云日志; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     def __init__(self, task_id, mode, platform): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         self.env = "prod" 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -20,6 +19,7 @@ class OnlineManager(object): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         self.mode = mode 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         self.platform = platform 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         self.MySQL = MysqlHelper(mode=self.mode, platform=self.platform, env=self.env) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        self.logger = AliyunLogger(platform=self.platform, mode=mode) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     def get_task_rule(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         """ 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -33,6 +33,11 @@ class OnlineManager(object): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             for item in rule_list: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 for key in item: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     rule_dict[key] = item[key] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        self.logger.logging( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            code=1000, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            message="抓取规则", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            data=rule_dict 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         return rule_dict 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     def get_task_user_list(self): 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -42,25 +47,38 @@ class OnlineManager(object): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         task_user_list_sql = f"SELECT uid, link from crawler_user_v3 where task_id = {self.task_id};" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         uid_list = self.MySQL.select(task_user_list_sql) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         user_list = [{"uid": i[0], "link": i[1]} for i in uid_list] if uid_list else [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        self.logger.logging( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            code=1000, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            message="用户列表", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            data=user_list 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         return user_list 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     def start_crawl(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :return: 爬虫启动脚本 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         rule_dict = self.get_task_rule() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         user_list = self.get_task_user_list() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if rule_dict and user_list: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            spider_class = spider_map[self.platform][self.mode] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            main_process = spider_class( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                platform=self.platform, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                mode=self.mode, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                rule_dict=rule_dict, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                user_list=user_list, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                env=self.env 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            main_process.run() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                spider_class = spider_map[self.platform][self.mode] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.logger.logging(code=1003, message="开始一轮抓取") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                main_process = spider_class( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    platform=self.platform, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    mode=self.mode, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    rule_dict=rule_dict, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    user_list=user_list, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    env=self.env 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                main_process.run() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.logger.logging(code=1004, message="完成一轮抓取") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.logger.logging(code=1006, message="启动爬虫出现错误, 报错原因是: {}".format(e)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 if __name__ == "__main__": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    parser = argparse.ArgumentParser()  # 新建参数解释器对象 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     parser.add_argument("--task_id") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     parser.add_argument("--mode") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     parser.add_argument("--platform") 
			 |