|
@@ -7,6 +7,7 @@ import random
|
|
|
import sys
|
|
|
import time
|
|
|
import requests
|
|
|
+from hashlib import md5
|
|
|
|
|
|
from common.mq import MQ
|
|
|
|
|
@@ -51,6 +52,7 @@ class ZLNYLScheduling:
|
|
|
self.rule_dict = rule_dict
|
|
|
self.env = env
|
|
|
self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
|
|
|
+ self.download_count = 0
|
|
|
|
|
|
def repeat_video(self, video_id):
|
|
|
sql = f""" select * from crawler_video where platform in ("{self.crawler}","{self.platform}") and out_video_id="{video_id}"; """
|
|
@@ -117,7 +119,7 @@ class ZLNYLScheduling:
|
|
|
"video_height": 0,
|
|
|
"profile_id": 0,
|
|
|
"profile_mid": 0,
|
|
|
- "cover_url": "",
|
|
|
+ # "cover_url": "",
|
|
|
"session": f"zhonglaonianyule-{int(time.time())}",
|
|
|
}
|
|
|
for k, v in video_dict.items():
|
|
@@ -164,6 +166,8 @@ class ZLNYLScheduling:
|
|
|
Common.logger(self.log_type, self.crawler).info("视频已下载\n")
|
|
|
Common.logging(self.log_type, self.crawler, self.env, "视频已下载\n")
|
|
|
else:
|
|
|
+ # out_video_id = md5(video_title.encode('utf8')).hexdigest()
|
|
|
+ # out_user_id = md5(user_name.encode('utf8')).hexdigest()
|
|
|
video_dict["out_user_id"] = video_dict["profile_id"]
|
|
|
video_dict["platform"] = self.crawler
|
|
|
video_dict["strategy"] = self.log_type
|
|
@@ -171,11 +175,12 @@ class ZLNYLScheduling:
|
|
|
video_dict["width"] = video_dict["video_width"]
|
|
|
video_dict["height"] = video_dict["video_height"]
|
|
|
video_dict["crawler_rule"] = json.dumps(self.rule_dict)
|
|
|
- video_dict["user_id"] = ""
|
|
|
+ video_dict["user_id"] = "-1"
|
|
|
video_dict["publish_time"] = video_dict["publish_time_str"]
|
|
|
d_obj = self.find_video_url(video_id)
|
|
|
video_dict["video_url"] = d_obj["url"]
|
|
|
video_dict["avatar_url"] = d_obj["cover"]
|
|
|
+ video_dict["cover_url"] = d_obj["cover"]
|
|
|
# print(json.dumps(video_dict, ensure_ascii=False, indent=4))
|
|
|
self.mq.send_msg(video_dict)
|
|
|
except Exception as e:
|
|
@@ -213,6 +218,7 @@ class ZLNYLScheduling:
|
|
|
"{}成功抓取视频链接\n".format(response["data"]["vtitle"]),
|
|
|
)
|
|
|
time.sleep(random.randint(3, 5))
|
|
|
+ self.download_count += 1
|
|
|
return {"url": video_url, "cover": video_cover}
|
|
|
|
|
|
|