zhangyong 5 miesięcy temu
rodzic
commit
29419da67c

+ 2 - 1
application/common/redis/pyredis.py

@@ -14,7 +14,8 @@ class RedisClient(object):
 
     def __init__(self):
         self.pool = None
-        self.host = 'r-bp1mb0v08fqi4hjffu.redis.rds.aliyuncs.com'
+        # self.host = 'r-bp1mb0v08fqi4hjffu.redis.rds.aliyuncs.com'
+        self.host="r-bp1mb0v08fqi4hjffupd.redis.rds.aliyuncs.com",  # 外网地址
         self.port = 6379
         self.db = 2
         self.password = 'Wqsd@2019'

+ 32 - 24
application/pipeline/pipeline.py

@@ -7,7 +7,7 @@ import time
 sys.path.append(os.getcwd())
 
 from application.common import MysqlHelper, AliyunLogger
-from application.common.redis.pyredis import RedisClient
+# from application.common.redis.pyredis import RedisClient
 
 
 class PiaoQuanPipeline(object):
@@ -25,7 +25,7 @@ class PiaoQuanPipeline(object):
         self.mysql = MysqlHelper(env=env, mode=mode, platform=platform)
         self.aliyun_log = AliyunLogger(platform=platform, mode=mode, env=env)
         self.account = account
-        self.red = RedisClient()
+        # self.red = RedisClient()
 
     def publish_time_flag(self):
         """
@@ -135,6 +135,8 @@ class PiaoQuanPipeline(object):
             return True
         if self.platform == "yuannifuqichangzai" and self.mode == "recommend":
             return True
+        if self.platform == "benshanzhufu" and self.mode == "recommend":
+            return True
         if self.platform == "zuihaodesongni" and self.mode == "recommend":
             return True
         if self.platform == "tiantianjufuqi" and self.mode == "recommend":
@@ -154,6 +156,12 @@ class PiaoQuanPipeline(object):
                 video_time = self.mysql.select(sql=sql_2)[0][0].timestamp()
                 if int(time.time()) - video_time >= 86400 * 4:
                     return True
+            # 小年糕推荐流和祝福圈子推荐流 3 天去重一次
+            elif self.platform == "xiaoniangaotuijianliu" or self.platform == "zhufuquanzituijianliu":
+                sql_2 = f"""select create_time from crawler_video where out_video_id="{out_id}";"""
+                video_time = self.mysql.select(sql=sql_2)[0][0].timestamp()
+                if int(time.time()) - video_time >= 86400 * 3:
+                    return True
             self.aliyun_log.logging(
                 code="2002",
                 trace_id=self.trace_id,
@@ -164,26 +172,26 @@ class PiaoQuanPipeline(object):
             return False
         return True
 
-    def mq_exists(self):
-        """
-        检测 mq 是否已经发送过了
-        :return:
-        """
-        if self.red.connect():
-            index_txt = "{}-{}".format(self.platform, self.item['video_id'])
-            index_md5 = hashlib.md5(index_txt.encode()).hexdigest()
-            if self.red.select(index_md5):
-                self.aliyun_log.logging(
-                    code="2007",
-                    trace_id=self.trace_id,
-                    message="该视频 mq 已经发送"
-                )
-                return False
-            else:
-                self.red.insert(index_md5, int(time.time()), 43200)
-                return True
-        else:
-            return True
+    # def mq_exists(self):
+    #     """
+    #     检测 mq 是否已经发送过了
+    #     :return:
+    #     """
+    #     if self.red.connect():
+    #         index_txt = "{}-{}".format(self.platform, self.item['video_id'])
+    #         index_md5 = hashlib.md5(index_txt.encode()).hexdigest()
+    #         if self.red.select(index_md5):
+    #             self.aliyun_log.logging(
+    #                 code="2007",
+    #                 trace_id=self.trace_id,
+    #                 message="该视频 mq 已经发送"
+    #             )
+    #             return False
+    #         else:
+    #             self.red.insert(index_md5, int(time.time()), 43200)
+    #             return True
+    #     else:
+    #         return True
 
     def process_item(self):
         """
@@ -191,8 +199,8 @@ class PiaoQuanPipeline(object):
         :return:
         """
         # 判断该 mq 是否已经发了
-        if not self.mq_exists():
-            return False
+        # if not self.mq_exists():
+        #     return False
         if not self.publish_time_flag():
             # 记录相关日志
             return False

+ 3 - 3
spider/crawler_online/benshanzhufu.py

@@ -65,7 +65,7 @@ class BSZHRecommend(object):
             headers = {
                 'Content-Type': 'application/json'
             }
-            for i in range(3):
+            for i in range(1, 200):
                 response = requests.request("POST", url, headers=headers, data=payload)
                 response = response.json()
                 if response['code'] != 0:
@@ -79,7 +79,7 @@ class BSZHRecommend(object):
                     message="抓取单条视频失败,请求失败"
                 ),
                 return
-            for index, video_obj in enumerate(response['data']['data'], 1):
+            for index, video_obj in enumerate(response['data']['data'], i):
                 try:
                     self.aliyun_log.logging(
                         code="1001", message="扫描到一条视频", data=video_obj
@@ -90,7 +90,7 @@ class BSZHRecommend(object):
                     self.aliyun_log.logging(
                         code="3000",
                         message="抓取单条视频失败, 该视频位于第{}页第{}条报错原因是{}".format(
-                            1, index, e
+                            i, index, e
                         ),
                     )
                 if self.limit_flag: