heyudev il y a 2 ans
Parent
commit
4cede42b5f
4 fichiers modifiés avec 46 ajouts et 41 suppressions
  1. 19 16
      crawler/config.py
  2. 2 2
      crawler/context.py
  3. 13 15
      crawler/run.py
  4. 12 8
      crawler/strategy.py

+ 19 - 16
crawler/config.py

@@ -1,23 +1,25 @@
 # 配置信息:环境 策略等
-import os
 
-class BaseConfig:
-    ENV = {
-        'dev': 'dev',  # 开发环境
-        'test': 'test',  # 开发环境
-        'pre': 'pre',  # 开发环境
-        'prod': 'prod'  # 开发环境
-    }
-    PLATFORM = {
-        'xiaoniangao': 'xiaoniangao',  # 小年糕
-        'xigua': 'xigua',  # 西瓜视频
-        'shipinhao': 'shipinhao'  # 视频号
-    }
+from strategy import *
 
+
+class BaseConfig:
     STRATEGY = {
-        'channel_page': 'channel_page',  # 推荐/频道页  规则抓取
-        'home_page': 'home_page',  # 用户主页 规则抓取
-        'search': 'search'  # 搜索结果 规则抓取
+        'xiaoniangao': {
+            'channel_page': XiaoniangaoHomePageStrategy(),  # 推荐/频道页  规则抓取
+            'home_page': XiaoniangaoHomePageStrategy(),  # 用户主页 规则抓取
+            'search': XiaoniangaoHomePageStrategy()  # 搜索结果 规则抓取
+        },  # 小年糕
+        'xigua': {
+            'channel_page': XiguaHomePageStrategy(),  # 推荐/频道页  规则抓取
+            'home_page': XiguaHomePageStrategy(),  # 用户主页 规则抓取
+            'search': XiguaHomePageStrategy()  # 搜索结果 规则抓取
+        },  # 西瓜视频
+        'shipinhao': {
+            'channel_page': ShipinhaoHomePageStrategy(),  # 推荐/频道页  规则抓取
+            'home_page': ShipinhaoHomePageStrategy(),  # 用户主页 规则抓取
+            'search': ShipinhaoHomePageStrategy()  # 搜索结果 规则抓取
+        },  # 视频号
     }
 
     """
@@ -57,6 +59,7 @@ class TestConfig(BaseConfig):
 
     # TODO 发布 接口
 
+
 class ProdConfig(BaseConfig):
     """生产环境配置"""
     ENV_TEXT = "生产环境"

+ 2 - 2
crawler/context.py

@@ -1,4 +1,4 @@
-from crawler.strategy import Strategy
+from strategy import Strategy
 
 
 class Context:
@@ -7,7 +7,7 @@ class Context:
 
     @property
     def strategy(self) -> Strategy:
-        pass
+        return self._strategy
 
     @strategy.setter
     def strategy(self, strategy: Strategy) -> None:

+ 13 - 15
crawler/run.py

@@ -1,27 +1,25 @@
-import logging
 import sys
 
-from crawler import config
-from crawler.context import Context
-from crawler.strategy import XiaoniangaoHomePageStrategy
+import config
+from context import Context
 
 
 class Main:
     @staticmethod
-    def main(argv):
+    def main(**argv):
         # Platform  Strategy  Env
-        print("start run crawler...")
-        print("Platform = " + argv[0])
-        print("Strategy = " + argv[1])
-        print("Env = " + argv[2])
-        print("Platform = " + config.PLATFORM['xiaoniangao'])
-        print("Strategy = " + config.STRATEGY['home_page'])
-        print("Env = " + config.ENV['test'])
-        # TODO 根据参数判断 具体的策略
-        strategy = XiaoniangaoHomePageStrategy()
+        platform = argv.get('platform')
+        strategy = argv.get('strategy')
+        env = argv.get('env')
+        print(f"start run crawler...{platform}, {strategy}, {env}")
+        # 配置环境
+        config_ = config.set_config(env)
+        # 根据参数判断 具体的策略
+        strategy = config_.STRATEGY[platform][strategy]
+        print(strategy)
         context = Context(strategy)
         context.strategy.crawl()
 
 
 if __name__ == '__main__':
-    Main.main(sys.argv)
+    Main.main(**eval(sys.argv[1]))

+ 12 - 8
crawler/strategy.py

@@ -1,28 +1,32 @@
 from abc import ABC, abstractmethod
-from typing import List
 
 
 # 策略
 class Strategy(ABC):
 
     @abstractmethod
-    def crawl(self, data: List):
+    def crawl(self):
         pass
 
+
 # TODO 不同平台 不同策略
 class XiaoniangaoHomePageStrategy(Strategy):
-    def crawl(self, data: List) -> List:
+    def crawl(self):
         # 具体逻辑
-        return data
+        print("XiaoniangaoHomePageStrategy")
+        return
 
 
 class XiguaHomePageStrategy(Strategy):
-    def crawl(self, data: List) -> List:
+    def crawl(self):
         # 具体逻辑
-        return data
+        print("XiguaHomePageStrategy")
+        return
 
 
 class ShipinhaoHomePageStrategy(Strategy):
-    def crawl(self, data: List) -> List:
+    def crawl(self):
         # 具体逻辑
-        return data
+        print("ShipinhaoHomePageStrategy")
+
+        return