Browse Source

新增中老年娱乐爬虫-sendMessage,修改下载量,修改 user_id

罗俊辉 1 year ago
parent
commit
8b3ac8813e

+ 6 - 2
zhonglaonianyule/zhonglaonianyule_main/run_zlnyljkys_recommend.py

@@ -106,8 +106,12 @@ def main(log_type, crawler, topic_name, group_id, env):
                     env=env,
                 )
                 for i in range(100):
-                    ZL.get_videoList(i + 1)
-                    time.sleep(60)
+                    if ZL.download_count >= int(rule_dict.get("videos_cnt", {}).get("min", 10)):
+                        ZL.download_count = 0
+                        break
+                    else:
+                        ZL.get_videoList(i + 1)
+                        time.sleep(60)
                 Common.logger(log_type, crawler).info("抓取一轮结束\n")
                 Common.logging(log_type, crawler, env, "抓取一轮结束\n")
                 xng_author_end_time = int(time.time())

+ 8 - 2
zhonglaonianyule/zhonglaonianyule_recommend/zhonglaonianyule_recommend_scheduling.py

@@ -7,6 +7,7 @@ import random
 import sys
 import time
 import requests
+from hashlib import md5
 
 from common.mq import MQ
 
@@ -51,6 +52,7 @@ class ZLNYLScheduling:
         self.rule_dict = rule_dict
         self.env = env
         self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
+        self.download_count = 0
 
     def repeat_video(self, video_id):
         sql = f""" select * from crawler_video where platform in ("{self.crawler}","{self.platform}") and out_video_id="{video_id}"; """
@@ -117,7 +119,7 @@ class ZLNYLScheduling:
                         "video_height": 0,
                         "profile_id": 0,
                         "profile_mid": 0,
-                        "cover_url": "",
+                        # "cover_url": "",
                         "session": f"zhonglaonianyule-{int(time.time())}",
                     }
                     for k, v in video_dict.items():
@@ -164,6 +166,8 @@ class ZLNYLScheduling:
                         Common.logger(self.log_type, self.crawler).info("视频已下载\n")
                         Common.logging(self.log_type, self.crawler, self.env, "视频已下载\n")
                     else:
+                        # out_video_id = md5(video_title.encode('utf8')).hexdigest()
+                        # out_user_id = md5(user_name.encode('utf8')).hexdigest()
                         video_dict["out_user_id"] = video_dict["profile_id"]
                         video_dict["platform"] = self.crawler
                         video_dict["strategy"] = self.log_type
@@ -171,11 +175,12 @@ class ZLNYLScheduling:
                         video_dict["width"] = video_dict["video_width"]
                         video_dict["height"] = video_dict["video_height"]
                         video_dict["crawler_rule"] = json.dumps(self.rule_dict)
-                        video_dict["user_id"] = ""
+                        video_dict["user_id"] = "-1"
                         video_dict["publish_time"] = video_dict["publish_time_str"]
                         d_obj = self.find_video_url(video_id)
                         video_dict["video_url"] = d_obj["url"]
                         video_dict["avatar_url"] = d_obj["cover"]
+                        video_dict["cover_url"] = d_obj["cover"]
                         # print(json.dumps(video_dict, ensure_ascii=False, indent=4))
                         self.mq.send_msg(video_dict)
                 except Exception as e:
@@ -213,6 +218,7 @@ class ZLNYLScheduling:
             "{}成功抓取视频链接\n".format(response["data"]["vtitle"]),
         )
         time.sleep(random.randint(3, 5))
+        self.download_count += 1
         return {"url": video_url, "cover": video_cover}