罗俊辉 1 год назад
Родитель
Сommit
b7c50f8e89
1 измененных файлов с 58 добавлено и 42 удалено
  1. 58 42
      shanhuzhufu/shanhuzhufu_recommend/shanhuzhufu_recommend_scheduling.py

+ 58 - 42
shanhuzhufu/shanhuzhufu_recommend/shanhuzhufu_recommend_scheduling.py

@@ -30,17 +30,17 @@ class ShanHuZhuFuRecommend(object):
     def get_video_list(self):
         base_url = "https://shanhu.nnapi.cn/videos/api.videos/getItem"
         headers = {
-            'Host': 'shanhu.nnapi.cn',
-          'xweb_xhr': '1',
-          'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.5(0x13080510)XWEB/1100',
-          'content-type': 'application/json',
-          'accept': '*/*',
-          'sec-fetch-site': 'cross-site',
-          'sec-fetch-mode': 'cors',
-          'sec-fetch-dest': 'empty',
-          'referer': 'https://servicewechat.com/wxc2088c70f666b45e/2/page-frame.html',
-          'accept-language': 'en-US,en;q=0.9',
-          'Cookie': 'PHPSESSID=562dc39e8e68ad3e76c237f687bd049b; lang=zh-cn'
+            "Host": "shanhu.nnapi.cn",
+            "xweb_xhr": "1",
+            "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.5(0x13080510)XWEB/1100",
+            "content-type": "application/json",
+            "accept": "*/*",
+            "sec-fetch-site": "cross-site",
+            "sec-fetch-mode": "cors",
+            "sec-fetch-dest": "empty",
+            "referer": "https://servicewechat.com/wxc2088c70f666b45e/2/page-frame.html",
+            "accept-language": "en-US,en;q=0.9",
+            "Cookie": "PHPSESSID=562dc39e8e68ad3e76c237f687bd049b; lang=zh-cn",
         }
         for i in range(100):
             try:
@@ -50,18 +50,22 @@ class ShanHuZhuFuRecommend(object):
                         platform=self.platform,
                         mode=self.mode,
                         env=self.env,
-                        message="本轮已经抓取到足够的数据,自动退出\t{}".format(self.download_cnt)
+                        message="本轮已经抓取到足够的数据,自动退出\t{}".format(self.download_cnt),
                     )
                     return
                 else:
-                    params = {
-                        "mark": "",
-                        "page": i + 1
-                    }
-                    response = requests.get(url=base_url, headers=headers, params=params, proxies=tunnel_proxies())
-                    encrypted_info = response.json()['data']
-                    decrypted_info = json.loads(self.cryptor.decrypt(data=encrypted_info))
-                    video_list = decrypted_info['list']
+                    params = {"mark": "", "page": i + 1}
+                    response = requests.get(
+                        url=base_url,
+                        headers=headers,
+                        params=params,
+                        proxies=tunnel_proxies(),
+                    )
+                    encrypted_info = response.json()["data"]
+                    decrypted_info = json.loads(
+                        self.cryptor.decrypt(data=encrypted_info)
+                    )
+                    video_list = decrypted_info["list"]
                     for index, video_obj in enumerate(video_list, 1):
                         try:
                             AliyunLogger.logging(
@@ -70,7 +74,7 @@ class ShanHuZhuFuRecommend(object):
                                 mode=self.mode,
                                 env=self.env,
                                 message="扫描到一条视频",
-                                data=video_obj
+                                data=video_obj,
                             )
                             self.process_video_obj(video_obj)
                         except Exception as e:
@@ -79,7 +83,8 @@ class ShanHuZhuFuRecommend(object):
                                 platform=self.platform,
                                 mode=self.mode,
                                 env=self.env,
-                                message="抓取第{}条的时候出现问题, 报错信息是{}".format(index, e)
+                                data=video_obj,
+                                message="抓取第{}条的时候出现问题, 报错信息是{}".format(index, e),
                             )
             except Exception as e:
                 AliyunLogger.logging(
@@ -87,26 +92,30 @@ class ShanHuZhuFuRecommend(object):
                     platform=self.platform,
                     mode=self.mode,
                     env=self.env,
-                    message="抓取第{}页时候出现错误, 报错信息是{}".format(i + 1, e)
+                    message="抓取第{}页时候出现错误, 报错信息是{}".format(i + 1, e),
                 )
 
     def process_video_obj(self, video_obj):
         trace_id = self.platform + str(uuid.uuid1())
         our_user = random.choice(self.user_list)
-        publish_time_stamp = datetime.datetime.strptime(video_obj['create_at'], "%Y-%m-%d %H:%M:%S").timestamp()
+        publish_time_stamp = datetime.datetime.strptime(
+            video_obj["create_at"], "%Y-%m-%d %H:%M:%S"
+        ).timestamp()
         item = VideoItem()
-        item.add_video_info("user_id", our_user['uid'])
-        item.add_video_info("user_name", our_user['nick_name'])
-        item.add_video_info("video_id", video_obj['id'])
-        item.add_video_info("video_title", video_obj['name'])
-        item.add_video_info("publish_time_str", video_obj['create_at'])
+        item.add_video_info("user_id", our_user["uid"])
+        item.add_video_info("user_name", our_user["nick_name"])
+        item.add_video_info("video_id", video_obj["id"])
+        item.add_video_info("video_title", video_obj["name"])
+        item.add_video_info("publish_time_str", video_obj["create_at"])
         item.add_video_info("publish_time_stamp", int(publish_time_stamp))
-        item.add_video_info("video_url", video_obj['cover'])
-        item.add_video_info("cover_url", video_obj['cover'] + '&vframe/png/offset/1/w/200')
-        item.add_video_info("like_cnt", video_obj['num_like'])
-        item.add_video_info("play_cnt", video_obj['num_read'])
-        item.add_video_info("comment_cnt", video_obj['num_comment'])
-        item.add_video_info("out_video_id", video_obj['id'])
+        item.add_video_info("video_url", video_obj["cover"])
+        item.add_video_info(
+            "cover_url", video_obj["cover"] + "&vframe/png/offset/1/w/200"
+        )
+        item.add_video_info("like_cnt", video_obj["num_like"])
+        item.add_video_info("play_cnt", video_obj["num_read"])
+        item.add_video_info("comment_cnt", video_obj["num_comment"])
+        item.add_video_info("out_video_id", video_obj["id"])
         item.add_video_info("platform", self.platform)
         item.add_video_info("strategy", self.mode)
         item.add_video_info("session", "{}-{}".format(self.platform, int(time.time())))
@@ -121,6 +130,7 @@ class ShanHuZhuFuRecommend(object):
         )
         if pipeline.process_item():
             self.download_cnt += 1
+            # print(mq_obj)
             self.mq.send_msg(mq_obj)
             AliyunLogger.logging(
                 code="1002",
@@ -128,14 +138,20 @@ class ShanHuZhuFuRecommend(object):
                 mode=self.mode,
                 env=self.env,
                 message="成功发送至 ETL",
-                data=mq_obj
+                data=mq_obj,
             )
-            if self.download_cnt >= int(self.rule_dict.get("videos_cnt", {}).get("min", 200)):
+            if self.download_cnt >= int(
+                self.rule_dict.get("videos_cnt", {}).get("min", 200)
+            ):
                 self.limit_flag = True
 
 
-
-
-
-
-
+if __name__ == '__main__':
+    S = ShanHuZhuFuRecommend(
+        platform="shanhuzhufu",
+        mode="recommend",
+        env="dev",
+        rule_dict={},
+        user_list=[{'nick_name': "Ivring", 'uid': "1997"}, {'nick_name': "paul", 'uid': "1998"}]
+    )
+    S.get_video_list()