|
@@ -210,159 +210,181 @@ class ShiPinHaoAuthor(object):
|
|
|
# 获取当前时间
|
|
|
current_time = datetime.now()
|
|
|
formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
- video_dict = {
|
|
|
- "video_id": objectId,
|
|
|
- "video_title": clean_title(video_obj.get('title').split("\n")[0].split("#")[0]),
|
|
|
- "out_video_id": objectId,
|
|
|
- "publish_time_stamp": publish_time_stamp,
|
|
|
- "publish_time_str": publish_time_str,
|
|
|
- "play_cnt": 0,
|
|
|
- "fav_count": int(obj['fav_count']),
|
|
|
- "comment_cnt": int(obj['comment_count']),
|
|
|
- "like_cnt": like_cnt,
|
|
|
- "share_cnt": share_cnt,
|
|
|
- "user_id": self.user_dict["uid"],
|
|
|
- "cover_url": video_obj.get('thumb_url'),
|
|
|
- "video_url": video_url,
|
|
|
- "avatar_url": video_obj.get('thumb_url'),
|
|
|
- "width": video_obj.get('width'),
|
|
|
- "height": video_obj.get('height'),
|
|
|
- "duration": duration,
|
|
|
- "platform": self.platform,
|
|
|
- "strategy": self.mode,
|
|
|
- "crawler_rule": self.rule_dict,
|
|
|
- "session": f"shipinhao-author-{int(time.time())}",
|
|
|
- }
|
|
|
if share_cnt == 0:
|
|
|
divisor_cnt = 0
|
|
|
else:
|
|
|
divisor_cnt = int(share_cnt / like_cnt)
|
|
|
- # 视频时长小于30秒 返回
|
|
|
- if duration < 20:
|
|
|
- values = [[
|
|
|
- obj['nickname'],
|
|
|
- publish_time_str,
|
|
|
- formatted_time,
|
|
|
- int(obj['fav_count']),
|
|
|
- int(obj['comment_count']),
|
|
|
- int(obj['like_count']),
|
|
|
- int(obj['forward_count']),
|
|
|
- divisor_cnt,
|
|
|
- video_obj.get('title').split("\n")[0].split("#")[0],
|
|
|
- duration,
|
|
|
- '否',
|
|
|
- '时长小于30秒',
|
|
|
- video_obj.get('DownloadAddress')
|
|
|
- ]]
|
|
|
- Feishu.insert_columns(self.platform, 'shipinhao', "Vq7NeH", "ROWS", 1, 2)
|
|
|
- time.sleep(0.5)
|
|
|
- Feishu.update_values(self.platform, 'shipinhao', "Vq7NeH", "A2:Z2", values)
|
|
|
- return True
|
|
|
- # 分享小于1000 返回
|
|
|
- if share_cnt < 1000:
|
|
|
- values = [[
|
|
|
- obj['nickname'],
|
|
|
- publish_time_str,
|
|
|
- formatted_time,
|
|
|
- int(obj['fav_count']),
|
|
|
- int(obj['comment_count']),
|
|
|
- int(obj['like_count']),
|
|
|
- int(obj['forward_count']),
|
|
|
- divisor_cnt,
|
|
|
- video_obj.get('title').split("\n")[0].split("#")[0],
|
|
|
- duration,
|
|
|
- '否',
|
|
|
- '分享小于1000',
|
|
|
- video_obj.get('DownloadAddress')
|
|
|
- ]]
|
|
|
- Feishu.insert_columns(self.platform, 'shipinhao', "Vq7NeH", "ROWS", 1, 2)
|
|
|
- time.sleep(0.5)
|
|
|
- Feishu.update_values(self.platform, 'shipinhao', "Vq7NeH", "A2:Z2", values)
|
|
|
- return True
|
|
|
- # 分享小于等于99999
|
|
|
- if share_cnt <= 99999 and divisor_cnt < 2:
|
|
|
- values = [[
|
|
|
- obj['nickname'],
|
|
|
- publish_time_str,
|
|
|
- formatted_time,
|
|
|
- int(obj['fav_count']),
|
|
|
- int(obj['comment_count']),
|
|
|
- int(obj['like_count']),
|
|
|
- int(obj['forward_count']),
|
|
|
- divisor_cnt,
|
|
|
- video_obj.get('title').split("\n")[0].split("#")[0],
|
|
|
- duration,
|
|
|
- '否',
|
|
|
- f'分享小于100000,分享/点赞:{divisor_cnt}',
|
|
|
- video_obj.get('DownloadAddress')
|
|
|
- ]]
|
|
|
- Feishu.insert_columns(self.platform, 'shipinhao', "Vq7NeH", "ROWS", 1, 2)
|
|
|
- time.sleep(0.5)
|
|
|
- Feishu.update_values(self.platform, 'shipinhao', "Vq7NeH", "A2:Z2", values)
|
|
|
- return True
|
|
|
- pipeline = PiaoQuanPipeline(
|
|
|
- platform=self.platform,
|
|
|
- mode=self.mode,
|
|
|
- item=video_dict,
|
|
|
- rule_dict=self.rule_dict,
|
|
|
- env=self.env,
|
|
|
- trace_id=trace_id,
|
|
|
- )
|
|
|
- if not pipeline.repeat_video():
|
|
|
- values = [[
|
|
|
- obj['nickname'],
|
|
|
- publish_time_str,
|
|
|
- formatted_time,
|
|
|
- int(obj['fav_count']),
|
|
|
- int(obj['comment_count']),
|
|
|
- int(obj['like_count']),
|
|
|
- int(obj['forward_count']),
|
|
|
- divisor_cnt,
|
|
|
- video_obj.get('title').split("\n")[0].split("#")[0],
|
|
|
- duration,
|
|
|
- '否',
|
|
|
- '重复视频',
|
|
|
- video_obj.get('DownloadAddress')
|
|
|
- ]]
|
|
|
- Feishu.insert_columns(self.platform, 'shipinhao', "Vq7NeH", "ROWS", 1, 2)
|
|
|
- time.sleep(0.5)
|
|
|
- Feishu.update_values(self.platform, 'shipinhao', "Vq7NeH", "A2:Z2", values)
|
|
|
- if count > 3:
|
|
|
- return False
|
|
|
- else:
|
|
|
- return True
|
|
|
- else:
|
|
|
- values = [[
|
|
|
- obj['nickname'],
|
|
|
- publish_time_str,
|
|
|
- formatted_time,
|
|
|
- int(obj['fav_count']),
|
|
|
- int(obj['comment_count']),
|
|
|
- int(obj['like_count']),
|
|
|
- int(obj['forward_count']),
|
|
|
- divisor_cnt,
|
|
|
- video_obj.get('title').split("\n")[0].split("#")[0],
|
|
|
- duration,
|
|
|
- '是',
|
|
|
- '',
|
|
|
- video_obj.get('DownloadAddress')
|
|
|
- ]]
|
|
|
- Feishu.insert_columns(self.platform, 'shipinhao', "Vq7NeH", "ROWS", 1, 2)
|
|
|
- time.sleep(0.5)
|
|
|
- Feishu.update_values(self.platform, 'shipinhao', "Vq7NeH", "A2:Z2", values)
|
|
|
- video_dict["publish_time"] = video_dict["publish_time_str"]
|
|
|
- self.mq.send_msg(video_dict)
|
|
|
- self.download_cnt += 1
|
|
|
- AliyunLogger.logging(
|
|
|
- code="1002",
|
|
|
- platform=self.platform,
|
|
|
- mode=self.mode,
|
|
|
- env=self.env,
|
|
|
- data=video_dict,
|
|
|
- trace_id=trace_id,
|
|
|
- message="成功发送 MQ 至 ETL",
|
|
|
- )
|
|
|
- time.sleep(5)
|
|
|
+ values = [[
|
|
|
+ obj['nickname'],
|
|
|
+ publish_time_str,
|
|
|
+ formatted_time,
|
|
|
+ int(obj['fav_count']),
|
|
|
+ int(obj['comment_count']),
|
|
|
+ int(obj['like_count']),
|
|
|
+ int(obj['forward_count']),
|
|
|
+ divisor_cnt,
|
|
|
+ video_obj.get('title').split("\n")[0].split("#")[0],
|
|
|
+ duration,
|
|
|
+ '',
|
|
|
+ '',
|
|
|
+ video_obj.get('DownloadAddress')
|
|
|
+ ]]
|
|
|
+ Feishu.insert_columns(self.platform, 'shipinhao', "Vq7NeH", "ROWS", 1, 2)
|
|
|
+ time.sleep(0.5)
|
|
|
+ Feishu.update_values(self.platform, 'shipinhao', "Vq7NeH", "A2:Z2", values)
|
|
|
+ # video_dict = {
|
|
|
+ # "video_id": objectId,
|
|
|
+ # "video_title": clean_title(video_obj.get('title').split("\n")[0].split("#")[0]),
|
|
|
+ # "out_video_id": objectId,
|
|
|
+ # "publish_time_stamp": publish_time_stamp,
|
|
|
+ # "publish_time_str": publish_time_str,
|
|
|
+ # "play_cnt": 0,
|
|
|
+ # "fav_count": int(obj['fav_count']),
|
|
|
+ # "comment_cnt": int(obj['comment_count']),
|
|
|
+ # "like_cnt": like_cnt,
|
|
|
+ # "share_cnt": share_cnt,
|
|
|
+ # "user_id": self.user_dict["uid"],
|
|
|
+ # "cover_url": video_obj.get('thumb_url'),
|
|
|
+ # "video_url": video_url,
|
|
|
+ # "avatar_url": video_obj.get('thumb_url'),
|
|
|
+ # "width": video_obj.get('width'),
|
|
|
+ # "height": video_obj.get('height'),
|
|
|
+ # "duration": duration,
|
|
|
+ # "platform": self.platform,
|
|
|
+ # "strategy": self.mode,
|
|
|
+ # "crawler_rule": self.rule_dict,
|
|
|
+ # "session": f"shipinhao-author-{int(time.time())}",
|
|
|
+ # }
|
|
|
+ # if share_cnt == 0:
|
|
|
+ # divisor_cnt = 0
|
|
|
+ # else:
|
|
|
+ # divisor_cnt = int(share_cnt / like_cnt)
|
|
|
+ # # 视频时长小于30秒 返回
|
|
|
+ # if duration < 20:
|
|
|
+ # values = [[
|
|
|
+ # obj['nickname'],
|
|
|
+ # publish_time_str,
|
|
|
+ # formatted_time,
|
|
|
+ # int(obj['fav_count']),
|
|
|
+ # int(obj['comment_count']),
|
|
|
+ # int(obj['like_count']),
|
|
|
+ # int(obj['forward_count']),
|
|
|
+ # divisor_cnt,
|
|
|
+ # video_obj.get('title').split("\n")[0].split("#")[0],
|
|
|
+ # duration,
|
|
|
+ # '否',
|
|
|
+ # '时长小于30秒',
|
|
|
+ # video_obj.get('DownloadAddress')
|
|
|
+ # ]]
|
|
|
+ # Feishu.insert_columns(self.platform, 'shipinhao', "Vq7NeH", "ROWS", 1, 2)
|
|
|
+ # time.sleep(0.5)
|
|
|
+ # Feishu.update_values(self.platform, 'shipinhao', "Vq7NeH", "A2:Z2", values)
|
|
|
+ # return True
|
|
|
+ # # 分享小于1000 返回
|
|
|
+ # if share_cnt < 1000:
|
|
|
+ # values = [[
|
|
|
+ # obj['nickname'],
|
|
|
+ # publish_time_str,
|
|
|
+ # formatted_time,
|
|
|
+ # int(obj['fav_count']),
|
|
|
+ # int(obj['comment_count']),
|
|
|
+ # int(obj['like_count']),
|
|
|
+ # int(obj['forward_count']),
|
|
|
+ # divisor_cnt,
|
|
|
+ # video_obj.get('title').split("\n")[0].split("#")[0],
|
|
|
+ # duration,
|
|
|
+ # '否',
|
|
|
+ # '分享小于1000',
|
|
|
+ # video_obj.get('DownloadAddress')
|
|
|
+ # ]]
|
|
|
+ # Feishu.insert_columns(self.platform, 'shipinhao', "Vq7NeH", "ROWS", 1, 2)
|
|
|
+ # time.sleep(0.5)
|
|
|
+ # Feishu.update_values(self.platform, 'shipinhao', "Vq7NeH", "A2:Z2", values)
|
|
|
+ # return True
|
|
|
+ # # 分享小于等于99999
|
|
|
+ # if share_cnt <= 99999 and divisor_cnt < 2:
|
|
|
+ # values = [[
|
|
|
+ # obj['nickname'],
|
|
|
+ # publish_time_str,
|
|
|
+ # formatted_time,
|
|
|
+ # int(obj['fav_count']),
|
|
|
+ # int(obj['comment_count']),
|
|
|
+ # int(obj['like_count']),
|
|
|
+ # int(obj['forward_count']),
|
|
|
+ # divisor_cnt,
|
|
|
+ # video_obj.get('title').split("\n")[0].split("#")[0],
|
|
|
+ # duration,
|
|
|
+ # '否',
|
|
|
+ # f'分享小于100000,分享/点赞:{divisor_cnt}',
|
|
|
+ # video_obj.get('DownloadAddress')
|
|
|
+ # ]]
|
|
|
+ # Feishu.insert_columns(self.platform, 'shipinhao', "Vq7NeH", "ROWS", 1, 2)
|
|
|
+ # time.sleep(0.5)
|
|
|
+ # Feishu.update_values(self.platform, 'shipinhao', "Vq7NeH", "A2:Z2", values)
|
|
|
+ # return True
|
|
|
+ # pipeline = PiaoQuanPipeline(
|
|
|
+ # platform=self.platform,
|
|
|
+ # mode=self.mode,
|
|
|
+ # item=video_dict,
|
|
|
+ # rule_dict=self.rule_dict,
|
|
|
+ # env=self.env,
|
|
|
+ # trace_id=trace_id,
|
|
|
+ # )
|
|
|
+ # if not pipeline.repeat_video():
|
|
|
+ # values = [[
|
|
|
+ # obj['nickname'],
|
|
|
+ # publish_time_str,
|
|
|
+ # formatted_time,
|
|
|
+ # int(obj['fav_count']),
|
|
|
+ # int(obj['comment_count']),
|
|
|
+ # int(obj['like_count']),
|
|
|
+ # int(obj['forward_count']),
|
|
|
+ # divisor_cnt,
|
|
|
+ # video_obj.get('title').split("\n")[0].split("#")[0],
|
|
|
+ # duration,
|
|
|
+ # '否',
|
|
|
+ # '重复视频',
|
|
|
+ # video_obj.get('DownloadAddress')
|
|
|
+ # ]]
|
|
|
+ # Feishu.insert_columns(self.platform, 'shipinhao', "Vq7NeH", "ROWS", 1, 2)
|
|
|
+ # time.sleep(0.5)
|
|
|
+ # Feishu.update_values(self.platform, 'shipinhao', "Vq7NeH", "A2:Z2", values)
|
|
|
+ # if count > 3:
|
|
|
+ # return False
|
|
|
+ # else:
|
|
|
+ # return True
|
|
|
+ # else:
|
|
|
+ # values = [[
|
|
|
+ # obj['nickname'],
|
|
|
+ # publish_time_str,
|
|
|
+ # formatted_time,
|
|
|
+ # int(obj['fav_count']),
|
|
|
+ # int(obj['comment_count']),
|
|
|
+ # int(obj['like_count']),
|
|
|
+ # int(obj['forward_count']),
|
|
|
+ # divisor_cnt,
|
|
|
+ # video_obj.get('title').split("\n")[0].split("#")[0],
|
|
|
+ # duration,
|
|
|
+ # '是',
|
|
|
+ # '',
|
|
|
+ # video_obj.get('DownloadAddress')
|
|
|
+ # ]]
|
|
|
+ # Feishu.insert_columns(self.platform, 'shipinhao', "Vq7NeH", "ROWS", 1, 2)
|
|
|
+ # time.sleep(0.5)
|
|
|
+ # Feishu.update_values(self.platform, 'shipinhao', "Vq7NeH", "A2:Z2", values)
|
|
|
+ # video_dict["publish_time"] = video_dict["publish_time_str"]
|
|
|
+ # self.mq.send_msg(video_dict)
|
|
|
+ # self.download_cnt += 1
|
|
|
+ # AliyunLogger.logging(
|
|
|
+ # code="1002",
|
|
|
+ # platform=self.platform,
|
|
|
+ # mode=self.mode,
|
|
|
+ # env=self.env,
|
|
|
+ # data=video_dict,
|
|
|
+ # trace_id=trace_id,
|
|
|
+ # message="成功发送 MQ 至 ETL",
|
|
|
+ # )
|
|
|
+ # time.sleep(5)
|
|
|
return True
|
|
|
|
|
|
def video_duration(self, filename):
|