|
@@ -12,7 +12,6 @@
|
|
|
|
|
|
import json
|
|
|
import os
|
|
|
-import random
|
|
|
import sys
|
|
|
import time
|
|
|
import requests
|
|
@@ -21,6 +20,7 @@ sys.path.append(os.getcwd())
|
|
|
from main.common import Common
|
|
|
from main.get_feeds import get_feeds
|
|
|
from main.publish import Publish
|
|
|
+from main.feishu_lib import Feishu
|
|
|
|
|
|
proxies = {"http": None, "https": None}
|
|
|
|
|
@@ -51,179 +51,173 @@ class DownloadUp:
|
|
|
1.从 kanyikan_feeds.txt 中获取 videoid
|
|
|
2.根据 videoid,从 videoinfo 接口,获取当前视频最新的信息
|
|
|
3.根据下载规则判断,符合规则进行下载:
|
|
|
- 1 更新视频 ID 到 "./txt/kanyikan_videoid.txt"
|
|
|
+ 1 更新视频 ID 到 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
|
|
|
2 视频信息写入文件 "./videos/{d_title}/info.txt"
|
|
|
4.上传完成:
|
|
|
- 1 删除该视频在 "./txt/kanyikan_feeds.txt" 中的信息
|
|
|
+ 1 删除该视频在 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl 中的信息
|
|
|
"""
|
|
|
- get_video_info_session = Common.get_session()
|
|
|
- Common.crawler_log().info("获取视频info时,session:{}".format(get_video_info_session))
|
|
|
- lines = Common.read_txt("kanyikan_feeds.txt")
|
|
|
- for line in lines:
|
|
|
- v_time = line.strip().split(" + ")[0] # 第一次获取该视频的时间
|
|
|
- v_id = line.strip().split(" + ")[1] # 外网视频 ID
|
|
|
- v_play_ctn = line.strip().split(" + ")[2] # 播放量
|
|
|
- url = "https://search.weixin.qq.com/cgi-bin/recwxa/recwxagetonevideoinfo?"
|
|
|
- param = {
|
|
|
- "session": get_video_info_session,
|
|
|
- "vid": v_id,
|
|
|
- "wxaVersion": "3.9.2",
|
|
|
- "channelid": "208201",
|
|
|
- "scene": "32",
|
|
|
- "subscene": "1089",
|
|
|
- "model": "iPhone 11<iPhone12,1>14.7.1",
|
|
|
- "clientVersion": "8.0.18",
|
|
|
- "sharesearchid": "447665862521758270",
|
|
|
- "sharesource": "-1"
|
|
|
- }
|
|
|
- try:
|
|
|
- urllib3.disable_warnings()
|
|
|
- r = requests.get(url=url, params=param, proxies=proxies, verify=False)
|
|
|
- response = json.loads(r.content.decode("utf8"))
|
|
|
- if "data" not in response:
|
|
|
- Common.crawler_log().error("获取视频info时,session过期,等待30秒")
|
|
|
- # 如果返回空信息,则随机睡眠 31-35 秒
|
|
|
- time.sleep(random.randint(31, 35))
|
|
|
- else:
|
|
|
- data = response["data"]
|
|
|
- v_title = data["title"]
|
|
|
- v_duration = data["duration"]
|
|
|
- v_play_cnt_up = data["played_cnt"]
|
|
|
- v_comment_cnt = data["comment_cnt"]
|
|
|
- v_liked_cnt = data["liked_cnt"]
|
|
|
- v_shared_cnt = data["shared_cnt"]
|
|
|
- v_width = data["width"]
|
|
|
- v_height = data["height"]
|
|
|
- v_resolution = str(v_width) + "*" + str(v_height)
|
|
|
- v_send_date = data["upload_time"]
|
|
|
- v_username = data["user_info"]["nickname"]
|
|
|
- v_user_cover = data["user_info"]["headimg_url"]
|
|
|
- v_video_cover = data["cover_url"]
|
|
|
- if "items" not in data["play_info"]:
|
|
|
- if len(data["play_info"]) > 2:
|
|
|
- download_url_up = data["play_info"][2]["play_url"]
|
|
|
- else:
|
|
|
- download_url_up = data["play_info"][0]["play_url"]
|
|
|
+ if len(Feishu.get_values_batch("Y8N3Vl")) == 1:
|
|
|
+ pass
|
|
|
+ else:
|
|
|
+ for i in range(len(Feishu.get_values_batch("Y8N3Vl"))):
|
|
|
+ try:
|
|
|
+ video_info_session = Common.get_session()
|
|
|
+ Common.crawler_log().info("获取视频info时,session:{}".format(video_info_session))
|
|
|
+ download_time = Feishu.get_values_batch("Y8N3Vl")[i+1][0] # 第一次获取该视频的时间
|
|
|
+ download_video_id = Feishu.get_values_batch("Y8N3Vl")[i+1][1] # 外网视频 ID
|
|
|
+ download_video_play_cnt = Feishu.get_values_batch("Y8N3Vl")[i+1][2] # 播放量
|
|
|
+ download_video_title = Feishu.get_values_batch("Y8N3Vl")[i+1][3]
|
|
|
+
|
|
|
+ url = "https://search.weixin.qq.com/cgi-bin/recwxa/recwxagetonevideoinfo?"
|
|
|
+ param = {
|
|
|
+ "session": video_info_session,
|
|
|
+ "vid": download_video_id,
|
|
|
+ "wxaVersion": "3.9.2",
|
|
|
+ "channelid": "208201",
|
|
|
+ "scene": "32",
|
|
|
+ "subscene": "1089",
|
|
|
+ "model": "iPhone 11<iPhone12,1>14.7.1",
|
|
|
+ "clientVersion": "8.0.18",
|
|
|
+ "sharesearchid": "447665862521758270",
|
|
|
+ "sharesource": "-1"
|
|
|
+ }
|
|
|
+ urllib3.disable_warnings()
|
|
|
+ r = requests.get(url=url, params=param, proxies=proxies, verify=False)
|
|
|
+ response = json.loads(r.content.decode("utf8"))
|
|
|
+ if "data" not in response:
|
|
|
+ Common.crawler_log().error("获取视频info时错误,删除该视频:{}".format(download_video_title))
|
|
|
+ # 删除行或列,可选 ROWS、COLUMNS
|
|
|
+ Feishu.dimension_range("Y8N3Vl", "ROWS", i + 2, i + 2)
|
|
|
else:
|
|
|
- if len(data["play_info"]["items"]) > 2:
|
|
|
- download_url_up = data["play_info"]["items"][2]["play_url"]
|
|
|
+ data = response["data"]
|
|
|
+ v_duration = data["duration"]
|
|
|
+ v_play_cnt_up = data["played_cnt"]
|
|
|
+ v_comment_cnt = data["comment_cnt"]
|
|
|
+ v_liked_cnt = data["liked_cnt"]
|
|
|
+ v_shared_cnt = data["shared_cnt"]
|
|
|
+ v_width = data["width"]
|
|
|
+ v_height = data["height"]
|
|
|
+ v_resolution = str(v_width) + "*" + str(v_height)
|
|
|
+ v_send_date = data["upload_time"]
|
|
|
+ v_username = data["user_info"]["nickname"].strip().replace("\n", "")
|
|
|
+ v_user_cover = data["user_info"]["headimg_url"]
|
|
|
+ v_video_cover = data["cover_url"]
|
|
|
+ if "items" not in data["play_info"]:
|
|
|
+ if len(data["play_info"]) > 2:
|
|
|
+ download_url_up = data["play_info"][2]["play_url"]
|
|
|
+ else:
|
|
|
+ download_url_up = data["play_info"][0]["play_url"]
|
|
|
else:
|
|
|
- download_url_up = data["play_info"]["items"][0]["play_url"]
|
|
|
+ if len(data["play_info"]["items"]) > 2:
|
|
|
+ download_url_up = data["play_info"]["items"][2]["play_url"]
|
|
|
+ else:
|
|
|
+ download_url_up = data["play_info"]["items"][0]["play_url"]
|
|
|
+
|
|
|
+ # 判断基本规则
|
|
|
+ if cls.up_rule(v_width, v_height, v_duration, v_play_cnt_up) is True \
|
|
|
+ and download_video_id != "" and download_video_title != "" and v_duration != "" \
|
|
|
+ and v_play_cnt_up != "" and v_comment_cnt != "" and v_liked_cnt != "" \
|
|
|
+ and v_shared_cnt != "" and v_width != "" and v_height != "" \
|
|
|
+ and v_send_date != "" and v_username != "" and v_user_cover != "" \
|
|
|
+ and v_video_cover != "" and download_url_up != "":
|
|
|
+ if int(time.time()) - int(download_time) < 3600:
|
|
|
+ Common.crawler_log().info("距上次获取该视频时间:{}分钟".format(
|
|
|
+ int((int(int(time.time()) - int(download_time))) / 60))
|
|
|
+ + ";{}".format(download_video_title))
|
|
|
+ elif 7200 >= int(time.time()) - int(download_time) >= 3600:
|
|
|
+ if int(v_play_cnt_up) - int(download_video_play_cnt) >= 1000:
|
|
|
+ Common.crawler_log().info("该视频:{}".format(
|
|
|
+ download_video_title) + " " + "在1小时内的播放量{}>=1000".format(
|
|
|
+ int(v_play_cnt_up) - int(download_video_play_cnt)))
|
|
|
|
|
|
- # 判断基本规则
|
|
|
- if cls.up_rule(v_width, v_height, v_duration, v_play_cnt_up) is True \
|
|
|
- and v_id != "" and v_title != "" and v_duration != "" \
|
|
|
- and v_play_cnt_up != "" and v_comment_cnt != "" and v_liked_cnt != "" \
|
|
|
- and v_shared_cnt != "" and v_width != "" and v_height != "" \
|
|
|
- and v_send_date != "" and v_username != "" and v_user_cover != "" \
|
|
|
- and v_video_cover != "" and download_url_up != "":
|
|
|
- if int(time.time()) - int(v_time) < 3600:
|
|
|
- Common.crawler_log().info("距上次获取该视频时间:{}分钟".format(
|
|
|
- int((int(int(time.time()) - int(v_time))) / 60)) + ";{}".format(v_title))
|
|
|
- elif 7200 >= int(time.time()) - int(v_time) >= 3600:
|
|
|
- if int(v_play_cnt_up) - int(v_play_ctn) >= 1000:
|
|
|
- Common.crawler_log().info("该视频:{}".format(
|
|
|
- v_title) + " " + "在1小时内的播放量{}>=1000".format(int(v_play_cnt_up) - int(v_play_ctn)))
|
|
|
- # 下载封面
|
|
|
- Common.download_method("cover", v_title, v_video_cover)
|
|
|
- # 下载视频
|
|
|
- Common.download_method("video", v_title, download_url_up)
|
|
|
- # 保存视频 ID 到 "./txt/kanyikan_videoid.txt"
|
|
|
- with open(r"./txt/kanyikan_videoid.txt", "a", encoding="utf8") as f_a:
|
|
|
- f_a.write(v_id + "\n")
|
|
|
- # 保存视频信息到 "./files/{视频标题}/videoinfo.txt"
|
|
|
- with open(r"./videos/" + v_title + "/" + "info.txt",
|
|
|
- "a", encoding="utf8") as f_a2:
|
|
|
- f_a2.write(str(v_id) + "\n" +
|
|
|
- str(v_title) + "\n" +
|
|
|
- str(v_duration) + "\n" +
|
|
|
- str(v_play_cnt_up) + "\n" +
|
|
|
- str(v_comment_cnt) + "\n" +
|
|
|
- str(v_liked_cnt) + "\n" +
|
|
|
- str(v_shared_cnt) + "\n" +
|
|
|
- str(v_resolution) + "\n" +
|
|
|
- str(v_send_date) + "\n" +
|
|
|
- str(v_username) + "\n" +
|
|
|
- str(v_user_cover) + "\n" +
|
|
|
- str(download_url_up) + "\n" +
|
|
|
- str(v_video_cover) + "\n" +
|
|
|
- str(get_video_info_session))
|
|
|
+ # 下载封面
|
|
|
+ Common.download_method("cover", download_video_title, v_video_cover)
|
|
|
+ # 下载视频
|
|
|
+ Common.download_method("video", download_video_title, download_url_up)
|
|
|
+ # 保存视频信息到 "./files/{视频标题}/videoinfo.txt"
|
|
|
+ with open(r"./videos/" + download_video_title
|
|
|
+ + "/" + "info.txt", "a", encoding="utf8") as f_a2:
|
|
|
+ f_a2.write(str(download_video_id) + "\n" +
|
|
|
+ str(download_video_title) + "\n" +
|
|
|
+ str(v_duration) + "\n" +
|
|
|
+ str(v_play_cnt_up) + "\n" +
|
|
|
+ str(v_comment_cnt) + "\n" +
|
|
|
+ str(v_liked_cnt) + "\n" +
|
|
|
+ str(v_shared_cnt) + "\n" +
|
|
|
+ str(v_resolution) + "\n" +
|
|
|
+ str(v_send_date) + "\n" +
|
|
|
+ str(v_username) + "\n" +
|
|
|
+ str(v_user_cover) + "\n" +
|
|
|
+ str(download_url_up) + "\n" +
|
|
|
+ str(v_video_cover) + "\n" +
|
|
|
+ str(video_info_session))
|
|
|
+
|
|
|
+ # 上传该视频
|
|
|
+ Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
|
|
|
+ Publish.upload_and_publish(env, "up")
|
|
|
|
|
|
- # 上传该视频
|
|
|
- Common.crawler_log().info("开始上传视频:{}".format(v_title))
|
|
|
- Publish.upload_and_publish(env, "up")
|
|
|
+ # 保存视频 ID 到云文档:
|
|
|
+ # https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
|
|
|
+ Common.crawler_log().info("保存视频ID至云文档:{}".format(download_video_title))
|
|
|
+ # 看一看+ ,视频ID工作表,插入首行
|
|
|
+ Feishu.insert_columns("20ce0c")
|
|
|
+ # 看一看+ ,视频ID工作表,首行写入数据
|
|
|
+ Feishu.update_values("20ce0c", download_video_id, "", "", "",
|
|
|
+ "", "", "", "", "", "", "", "", "", "", "")
|
|
|
|
|
|
- # 删除该视频在kanyikan_feeds.txt中的信息
|
|
|
- Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(v_title))
|
|
|
- with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f1:
|
|
|
- lines = f1.readlines()
|
|
|
- with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w1:
|
|
|
- for line1 in lines:
|
|
|
- if v_id in line1.split(" + ")[1]:
|
|
|
- continue
|
|
|
- f_w1.write(line1)
|
|
|
- else:
|
|
|
- # 删除之前保存的该视频信息,并把现在的信息保存进去
|
|
|
- Common.crawler_log().info("该视频1小时内的播放量:{}<1000".format(
|
|
|
- int(v_play_cnt_up) - int(v_play_ctn)) + ";"
|
|
|
- + "更新该视频在kanyikan_feeds.txt中的信息:{}".format(v_title))
|
|
|
- with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
|
|
|
- lines = f_r.readlines()
|
|
|
- with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
|
|
|
- for line2 in lines:
|
|
|
- if v_id in line2.split(" + ")[1]:
|
|
|
- continue
|
|
|
- f_w.write(line2)
|
|
|
- with open(r"./txt/kanyikan_feeds.txt", "a", encoding="utf-8") as f_a:
|
|
|
- f_a.write(str(int(time.time())) + " + "
|
|
|
- + str(v_id) + " + "
|
|
|
- + str(v_play_cnt_up) + " + "
|
|
|
- + str(v_title) + " + "
|
|
|
- + str(v_duration) + " + "
|
|
|
- + str(v_comment_cnt) + " + "
|
|
|
- + str(v_liked_cnt) + " + "
|
|
|
- + str(v_shared_cnt) + " + "
|
|
|
- + str(v_resolution) + " + "
|
|
|
- + str(v_send_date) + " + "
|
|
|
- + str(v_username) + " + "
|
|
|
- + str(v_user_cover) + " + "
|
|
|
- + str(v_video_cover) + " + "
|
|
|
- + str(download_url_up) + " + "
|
|
|
- + str(get_video_info_session) + "\n")
|
|
|
+ # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl
|
|
|
+ Common.crawler_log().info("从云文档删除该视频信息:{}".format(download_video_title))
|
|
|
+ # 删除行或列,可选 ROWS、COLUMNS
|
|
|
+ Feishu.dimension_range("Y8N3Vl", "ROWS", i + 2, i + 2)
|
|
|
+ else:
|
|
|
+ # 删除之前保存的该视频信息,并把现在的信息保存进去
|
|
|
+ Common.crawler_log().info("该视频1小时内的播放量:{}<1000".format(
|
|
|
+ int(v_play_cnt_up) - int(download_video_play_cnt)
|
|
|
+ ) + ";" + "更新该视频在kanyikan_feeds.txt中的信息:{}".format(download_video_title))
|
|
|
+ # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl
|
|
|
+ Common.crawler_log().info("从云文档删除该视频信息:{}".format(download_video_title))
|
|
|
+ # 删除行或列,可选 ROWS、COLUMNS
|
|
|
+ Feishu.dimension_range("Y8N3Vl", "ROWS", i + 2, i + 2)
|
|
|
|
|
|
- elif int(time.time()) - int(v_time) > 7200:
|
|
|
- Common.crawler_log().info("距上次获取该视频时间:{}分钟。超过2小时,删除该视频".format(
|
|
|
- int((int(time.time()) - int(v_time)) / 60)) + ";" + "标题:{}".format(v_title))
|
|
|
- # 删除之前保存的该视频信息
|
|
|
- Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(v_title))
|
|
|
- with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
|
|
|
- lines = f_r.readlines()
|
|
|
- with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
|
|
|
- for line2 in lines:
|
|
|
- if v_id in line2.split(" + ")[1]:
|
|
|
- continue
|
|
|
- f_w.write(line2)
|
|
|
- else:
|
|
|
- Common.crawler_log().info("不满足下载规则:{}".format(v_title))
|
|
|
- # 删除之前保存的该视频信息
|
|
|
- Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(v_title))
|
|
|
- with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
|
|
|
- lines = f_r.readlines()
|
|
|
- with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
|
|
|
- for line3 in lines:
|
|
|
- if v_id in line3.split(" + ")[1]:
|
|
|
- continue
|
|
|
- f_w.write(line3)
|
|
|
- except Exception as e:
|
|
|
- Common.crawler_log().error("获取视频info异常:{},删除该视频".format(e))
|
|
|
- # 删除之前保存的该视频信息
|
|
|
- with open(r"./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
|
|
|
- lines = f_r.readlines()
|
|
|
- with open(r"./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
|
|
|
- for line4 in lines:
|
|
|
- if v_id in line4.split(" + ")[1]:
|
|
|
- continue
|
|
|
- f_w.write(line4)
|
|
|
+ # 看一看+工作表,插入首行
|
|
|
+ print(Feishu.insert_columns("Y8N3Vl"))
|
|
|
+
|
|
|
+ # 获取当前时间
|
|
|
+ download_up_time = int(time.time())
|
|
|
+ # 看一看云文档,工作表 kanyikan_feeds_1 中写入数据
|
|
|
+ Feishu.update_values("Y8N3Vl",
|
|
|
+ a1=str(download_up_time),
|
|
|
+ b1=str(download_video_id),
|
|
|
+ c1=str(v_play_cnt_up),
|
|
|
+ d1=str(download_video_title),
|
|
|
+ e1=str(v_duration),
|
|
|
+ f1=str(v_comment_cnt),
|
|
|
+ g1=str(v_liked_cnt),
|
|
|
+ h1=str(v_shared_cnt),
|
|
|
+ i1=str(v_resolution),
|
|
|
+ j1=str(v_send_date),
|
|
|
+ k1=str(v_username),
|
|
|
+ l1=str(v_user_cover),
|
|
|
+ m1=str(v_video_cover),
|
|
|
+ n1=str(download_url_up),
|
|
|
+ o1=str(video_info_session))
|
|
|
+ elif int(time.time()) - int(download_time) > 7200:
|
|
|
+ Common.crawler_log().info("距上次获取该视频时间:""{}分钟。超过2小时,删除该视频"
|
|
|
+ .format(int((int(time.time()) - int(download_time)) / 60))
|
|
|
+ + ";" + "标题:{}".format(download_video_title))
|
|
|
+ # 删除行或列,可选 ROWS、COLUMNS
|
|
|
+ Feishu.dimension_range("Y8N3Vl", "ROWS", i + 2, i + 2)
|
|
|
+ else:
|
|
|
+ Common.crawler_log().info("不满足下载规则:{}".format(download_video_title))
|
|
|
+ # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl
|
|
|
+ Common.crawler_log().info("从云文档删除该视频信息:{}".format(download_video_title))
|
|
|
+ # 删除行或列,可选 ROWS、COLUMNS
|
|
|
+ Feishu.dimension_range("Y8N3Vl", "ROWS", i + 2, i + 2)
|
|
|
+ except Exception as e:
|
|
|
+ # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl
|
|
|
+ Common.crawler_log().error("获取视频info异常:{},删除该视频".format(e))
|
|
|
+ # 删除行或列,可选 ROWS、COLUMNS
|
|
|
+ Feishu.dimension_range("Y8N3Vl", "ROWS", i + 2, i + 2)
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|