|
@@ -210,181 +210,159 @@ class ShiPinHaoAuthor(object):
|
|
|
# 获取当前时间
|
|
|
current_time = datetime.now()
|
|
|
formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
+ video_dict = {
|
|
|
+ "video_id": objectId,
|
|
|
+ "video_title": clean_title(video_obj.get('title').split("\n")[0].split("#")[0]),
|
|
|
+ "out_video_id": objectId,
|
|
|
+ "publish_time_stamp": publish_time_stamp,
|
|
|
+ "publish_time_str": publish_time_str,
|
|
|
+ "play_cnt": 0,
|
|
|
+ "fav_count": int(obj['fav_count']),
|
|
|
+ "comment_cnt": int(obj['comment_count']),
|
|
|
+ "like_cnt": like_cnt,
|
|
|
+ "share_cnt": share_cnt,
|
|
|
+ "user_id": self.user_dict["uid"],
|
|
|
+ "cover_url": video_obj.get('thumb_url'),
|
|
|
+ "video_url": video_url,
|
|
|
+ "avatar_url": video_obj.get('thumb_url'),
|
|
|
+ "width": video_obj.get('width'),
|
|
|
+ "height": video_obj.get('height'),
|
|
|
+ "duration": duration,
|
|
|
+ "platform": self.platform,
|
|
|
+ "strategy": self.mode,
|
|
|
+ "crawler_rule": self.rule_dict,
|
|
|
+ "session": f"shipinhao-author-{int(time.time())}",
|
|
|
+ }
|
|
|
if share_cnt == 0:
|
|
|
divisor_cnt = 0
|
|
|
else:
|
|
|
divisor_cnt = int(share_cnt / like_cnt)
|
|
|
- values = [[
|
|
|
- obj['nickname'],
|
|
|
- publish_time_str,
|
|
|
- formatted_time,
|
|
|
- int(obj['fav_count']),
|
|
|
- int(obj['comment_count']),
|
|
|
- int(obj['like_count']),
|
|
|
- int(obj['forward_count']),
|
|
|
- divisor_cnt,
|
|
|
- video_obj.get('title').split("\n")[0].split("#")[0],
|
|
|
- duration,
|
|
|
- '',
|
|
|
- '',
|
|
|
- video_obj.get('DownloadAddress')
|
|
|
- ]]
|
|
|
- Feishu.insert_columns(self.platform, 'shipinhao', "0ipHnR", "ROWS", 1, 2)
|
|
|
- time.sleep(0.5)
|
|
|
- Feishu.update_values(self.platform, 'shipinhao', "0ipHnR", "A2:Z2", values)
|
|
|
- # video_dict = {
|
|
|
- # "video_id": objectId,
|
|
|
- # "video_title": clean_title(video_obj.get('title').split("\n")[0].split("#")[0]),
|
|
|
- # "out_video_id": objectId,
|
|
|
- # "publish_time_stamp": publish_time_stamp,
|
|
|
- # "publish_time_str": publish_time_str,
|
|
|
- # "play_cnt": 0,
|
|
|
- # "fav_count": int(obj['fav_count']),
|
|
|
- # "comment_cnt": int(obj['comment_count']),
|
|
|
- # "like_cnt": like_cnt,
|
|
|
- # "share_cnt": share_cnt,
|
|
|
- # "user_id": self.user_dict["uid"],
|
|
|
- # "cover_url": video_obj.get('thumb_url'),
|
|
|
- # "video_url": video_url,
|
|
|
- # "avatar_url": video_obj.get('thumb_url'),
|
|
|
- # "width": video_obj.get('width'),
|
|
|
- # "height": video_obj.get('height'),
|
|
|
- # "duration": duration,
|
|
|
- # "platform": self.platform,
|
|
|
- # "strategy": self.mode,
|
|
|
- # "crawler_rule": self.rule_dict,
|
|
|
- # "session": f"shipinhao-author-{int(time.time())}",
|
|
|
- # }
|
|
|
- # if share_cnt == 0:
|
|
|
- # divisor_cnt = 0
|
|
|
- # else:
|
|
|
- # divisor_cnt = int(share_cnt / like_cnt)
|
|
|
- # # 视频时长小于30秒 返回
|
|
|
- # if duration < 20:
|
|
|
- # values = [[
|
|
|
- # obj['nickname'],
|
|
|
- # publish_time_str,
|
|
|
- # formatted_time,
|
|
|
- # int(obj['fav_count']),
|
|
|
- # int(obj['comment_count']),
|
|
|
- # int(obj['like_count']),
|
|
|
- # int(obj['forward_count']),
|
|
|
- # divisor_cnt,
|
|
|
- # video_obj.get('title').split("\n")[0].split("#")[0],
|
|
|
- # duration,
|
|
|
- # '否',
|
|
|
- # '时长小于30秒',
|
|
|
- # video_obj.get('DownloadAddress')
|
|
|
- # ]]
|
|
|
- # Feishu.insert_columns(self.platform, 'shipinhao', "Vq7NeH", "ROWS", 1, 2)
|
|
|
- # time.sleep(0.5)
|
|
|
- # Feishu.update_values(self.platform, 'shipinhao', "Vq7NeH", "A2:Z2", values)
|
|
|
- # return True
|
|
|
- # # 分享小于1000 返回
|
|
|
- # if share_cnt < 1000:
|
|
|
- # values = [[
|
|
|
- # obj['nickname'],
|
|
|
- # publish_time_str,
|
|
|
- # formatted_time,
|
|
|
- # int(obj['fav_count']),
|
|
|
- # int(obj['comment_count']),
|
|
|
- # int(obj['like_count']),
|
|
|
- # int(obj['forward_count']),
|
|
|
- # divisor_cnt,
|
|
|
- # video_obj.get('title').split("\n")[0].split("#")[0],
|
|
|
- # duration,
|
|
|
- # '否',
|
|
|
- # '分享小于1000',
|
|
|
- # video_obj.get('DownloadAddress')
|
|
|
- # ]]
|
|
|
- # Feishu.insert_columns(self.platform, 'shipinhao', "Vq7NeH", "ROWS", 1, 2)
|
|
|
- # time.sleep(0.5)
|
|
|
- # Feishu.update_values(self.platform, 'shipinhao', "Vq7NeH", "A2:Z2", values)
|
|
|
- # return True
|
|
|
- # # 分享小于等于99999
|
|
|
- # if share_cnt <= 99999 and divisor_cnt < 2:
|
|
|
- # values = [[
|
|
|
- # obj['nickname'],
|
|
|
- # publish_time_str,
|
|
|
- # formatted_time,
|
|
|
- # int(obj['fav_count']),
|
|
|
- # int(obj['comment_count']),
|
|
|
- # int(obj['like_count']),
|
|
|
- # int(obj['forward_count']),
|
|
|
- # divisor_cnt,
|
|
|
- # video_obj.get('title').split("\n")[0].split("#")[0],
|
|
|
- # duration,
|
|
|
- # '否',
|
|
|
- # f'分享小于100000,分享/点赞:{divisor_cnt}',
|
|
|
- # video_obj.get('DownloadAddress')
|
|
|
- # ]]
|
|
|
- # Feishu.insert_columns(self.platform, 'shipinhao', "Vq7NeH", "ROWS", 1, 2)
|
|
|
- # time.sleep(0.5)
|
|
|
- # Feishu.update_values(self.platform, 'shipinhao', "Vq7NeH", "A2:Z2", values)
|
|
|
- # return True
|
|
|
- # pipeline = PiaoQuanPipeline(
|
|
|
- # platform=self.platform,
|
|
|
- # mode=self.mode,
|
|
|
- # item=video_dict,
|
|
|
- # rule_dict=self.rule_dict,
|
|
|
- # env=self.env,
|
|
|
- # trace_id=trace_id,
|
|
|
- # )
|
|
|
- # if not pipeline.repeat_video():
|
|
|
- # values = [[
|
|
|
- # obj['nickname'],
|
|
|
- # publish_time_str,
|
|
|
- # formatted_time,
|
|
|
- # int(obj['fav_count']),
|
|
|
- # int(obj['comment_count']),
|
|
|
- # int(obj['like_count']),
|
|
|
- # int(obj['forward_count']),
|
|
|
- # divisor_cnt,
|
|
|
- # video_obj.get('title').split("\n")[0].split("#")[0],
|
|
|
- # duration,
|
|
|
- # '否',
|
|
|
- # '重复视频',
|
|
|
- # video_obj.get('DownloadAddress')
|
|
|
- # ]]
|
|
|
- # Feishu.insert_columns(self.platform, 'shipinhao', "Vq7NeH", "ROWS", 1, 2)
|
|
|
- # time.sleep(0.5)
|
|
|
- # Feishu.update_values(self.platform, 'shipinhao', "Vq7NeH", "A2:Z2", values)
|
|
|
- # if count > 3:
|
|
|
- # return False
|
|
|
- # else:
|
|
|
- # return True
|
|
|
- # else:
|
|
|
- # values = [[
|
|
|
- # obj['nickname'],
|
|
|
- # publish_time_str,
|
|
|
- # formatted_time,
|
|
|
- # int(obj['fav_count']),
|
|
|
- # int(obj['comment_count']),
|
|
|
- # int(obj['like_count']),
|
|
|
- # int(obj['forward_count']),
|
|
|
- # divisor_cnt,
|
|
|
- # video_obj.get('title').split("\n")[0].split("#")[0],
|
|
|
- # duration,
|
|
|
- # '是',
|
|
|
- # '',
|
|
|
- # video_obj.get('DownloadAddress')
|
|
|
- # ]]
|
|
|
- # Feishu.insert_columns(self.platform, 'shipinhao', "Vq7NeH", "ROWS", 1, 2)
|
|
|
- # time.sleep(0.5)
|
|
|
- # Feishu.update_values(self.platform, 'shipinhao', "Vq7NeH", "A2:Z2", values)
|
|
|
- # video_dict["publish_time"] = video_dict["publish_time_str"]
|
|
|
- # self.mq.send_msg(video_dict)
|
|
|
- # self.download_cnt += 1
|
|
|
- # AliyunLogger.logging(
|
|
|
- # code="1002",
|
|
|
- # platform=self.platform,
|
|
|
- # mode=self.mode,
|
|
|
- # env=self.env,
|
|
|
- # data=video_dict,
|
|
|
- # trace_id=trace_id,
|
|
|
- # message="成功发送 MQ 至 ETL",
|
|
|
- # )
|
|
|
- # time.sleep(5)
|
|
|
+ # 视频时长小于30秒 返回
|
|
|
+ if duration < 20:
|
|
|
+ values = [[
|
|
|
+ obj['nickname'],
|
|
|
+ publish_time_str,
|
|
|
+ formatted_time,
|
|
|
+ int(obj['fav_count']),
|
|
|
+ int(obj['comment_count']),
|
|
|
+ int(obj['like_count']),
|
|
|
+ int(obj['forward_count']),
|
|
|
+ divisor_cnt,
|
|
|
+ video_obj.get('title').split("\n")[0].split("#")[0],
|
|
|
+ duration,
|
|
|
+ '否',
|
|
|
+ '时长小于30秒',
|
|
|
+ video_obj.get('DownloadAddress')
|
|
|
+ ]]
|
|
|
+ Feishu.insert_columns(self.platform, 'shipinhao', "Vq7NeH", "ROWS", 1, 2)
|
|
|
+ time.sleep(0.5)
|
|
|
+ Feishu.update_values(self.platform, 'shipinhao', "Vq7NeH", "A2:Z2", values)
|
|
|
+ return True
|
|
|
+ # 分享小于1000 返回
|
|
|
+ if share_cnt < 500:
|
|
|
+ values = [[
|
|
|
+ obj['nickname'],
|
|
|
+ publish_time_str,
|
|
|
+ formatted_time,
|
|
|
+ int(obj['fav_count']),
|
|
|
+ int(obj['comment_count']),
|
|
|
+ int(obj['like_count']),
|
|
|
+ int(obj['forward_count']),
|
|
|
+ divisor_cnt,
|
|
|
+ video_obj.get('title').split("\n")[0].split("#")[0],
|
|
|
+ duration,
|
|
|
+ '否',
|
|
|
+ '分享小于500',
|
|
|
+ video_obj.get('DownloadAddress')
|
|
|
+ ]]
|
|
|
+ Feishu.insert_columns(self.platform, 'shipinhao', "Vq7NeH", "ROWS", 1, 2)
|
|
|
+ time.sleep(0.5)
|
|
|
+ Feishu.update_values(self.platform, 'shipinhao', "Vq7NeH", "A2:Z2", values)
|
|
|
+ return True
|
|
|
+ # 分享小于等于99999
|
|
|
+ if share_cnt <= 99999 and divisor_cnt < 2:
|
|
|
+ values = [[
|
|
|
+ obj['nickname'],
|
|
|
+ publish_time_str,
|
|
|
+ formatted_time,
|
|
|
+ int(obj['fav_count']),
|
|
|
+ int(obj['comment_count']),
|
|
|
+ int(obj['like_count']),
|
|
|
+ int(obj['forward_count']),
|
|
|
+ divisor_cnt,
|
|
|
+ video_obj.get('title').split("\n")[0].split("#")[0],
|
|
|
+ duration,
|
|
|
+ '否',
|
|
|
+ f'分享小于100000,分享/点赞:{divisor_cnt}',
|
|
|
+ video_obj.get('DownloadAddress')
|
|
|
+ ]]
|
|
|
+ Feishu.insert_columns(self.platform, 'shipinhao', "Vq7NeH", "ROWS", 1, 2)
|
|
|
+ time.sleep(0.5)
|
|
|
+ Feishu.update_values(self.platform, 'shipinhao', "Vq7NeH", "A2:Z2", values)
|
|
|
+ return True
|
|
|
+ pipeline = PiaoQuanPipeline(
|
|
|
+ platform=self.platform,
|
|
|
+ mode=self.mode,
|
|
|
+ item=video_dict,
|
|
|
+ rule_dict=self.rule_dict,
|
|
|
+ env=self.env,
|
|
|
+ trace_id=trace_id,
|
|
|
+ )
|
|
|
+ if not pipeline.repeat_video():
|
|
|
+ values = [[
|
|
|
+ obj['nickname'],
|
|
|
+ publish_time_str,
|
|
|
+ formatted_time,
|
|
|
+ int(obj['fav_count']),
|
|
|
+ int(obj['comment_count']),
|
|
|
+ int(obj['like_count']),
|
|
|
+ int(obj['forward_count']),
|
|
|
+ divisor_cnt,
|
|
|
+ video_obj.get('title').split("\n")[0].split("#")[0],
|
|
|
+ duration,
|
|
|
+ '否',
|
|
|
+ '重复视频',
|
|
|
+ video_obj.get('DownloadAddress')
|
|
|
+ ]]
|
|
|
+ Feishu.insert_columns(self.platform, 'shipinhao', "Vq7NeH", "ROWS", 1, 2)
|
|
|
+ time.sleep(0.5)
|
|
|
+ Feishu.update_values(self.platform, 'shipinhao', "Vq7NeH", "A2:Z2", values)
|
|
|
+ if count > 3:
|
|
|
+ return False
|
|
|
+ else:
|
|
|
+ return True
|
|
|
+ else:
|
|
|
+ values = [[
|
|
|
+ obj['nickname'],
|
|
|
+ publish_time_str,
|
|
|
+ formatted_time,
|
|
|
+ int(obj['fav_count']),
|
|
|
+ int(obj['comment_count']),
|
|
|
+ int(obj['like_count']),
|
|
|
+ int(obj['forward_count']),
|
|
|
+ divisor_cnt,
|
|
|
+ video_obj.get('title').split("\n")[0].split("#")[0],
|
|
|
+ duration,
|
|
|
+ '是',
|
|
|
+ '',
|
|
|
+ video_obj.get('DownloadAddress')
|
|
|
+ ]]
|
|
|
+ Feishu.insert_columns(self.platform, 'shipinhao', "Vq7NeH", "ROWS", 1, 2)
|
|
|
+ time.sleep(0.5)
|
|
|
+ Feishu.update_values(self.platform, 'shipinhao', "Vq7NeH", "A2:Z2", values)
|
|
|
+ video_dict["publish_time"] = video_dict["publish_time_str"]
|
|
|
+ self.mq.send_msg(video_dict)
|
|
|
+ self.download_cnt += 1
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="1002",
|
|
|
+ platform=self.platform,
|
|
|
+ mode=self.mode,
|
|
|
+ env=self.env,
|
|
|
+ data=video_dict,
|
|
|
+ trace_id=trace_id,
|
|
|
+ message="成功发送 MQ 至 ETL",
|
|
|
+ )
|
|
|
+ time.sleep(5)
|
|
|
return True
|
|
|
|
|
|
def video_duration(self, filename):
|