wangkun vor 2 Jahren
Ursprung
Commit
b00dc28c46

+ 8 - 3
README.md

@@ -10,9 +10,14 @@ requests==2.27.1
 urllib3==1.26.9
 
 #### 使用说明
-1. cd ./crawler-kanyikan-Windows
-2. python3 main/run_XXX.py
-3. videoid.txt 存储视频信息:
+测试调试:
+sh kanyikan.sh --log_type="recommend" --crawler="kanyikan" --env="dev"
+正式环境:
+* * * * * sh kanyikan.sh --log_type="recommend" --crawler="kanyikan" --env="prod"
+杀进程
+ps aux | grep run_kanyikan | grep -v grep | awk '{print $2}' | xargs kill -9
+
+videoid.txt 存储视频信息:
    - 视频 ID
    - 视频标题
    - 视频时长

+ 0 - 3
chlsfiles/__init__.py

@@ -1,3 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2022/11/9

+ 61 - 18
kanyikan.sh

@@ -1,20 +1,63 @@
 #!/bin/bash
-echo "开始"
-echo "$(date "+%Y-%m-%d %H:%M:%S") 正在杀进程..."
-# shellcheck disable=SC2009
-# ps aux | grep run_kanyikan
-ps aux | grep run_kanyikan_moment.py | grep -v grep | awk '{print $2}' | xargs kill -9
-# shellcheck disable=SC2009
-ps aux | grep run_kanyikan_recommend.py | grep -v grep | awk '{print $2}' | xargs kill -9
-echo "$(date "+%Y-%m-%d %H:%M:%S") 进程已杀死!"
+#echo "开始"
+#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在杀进程..."
+## shellcheck disable=SC2009
+## ps aux | grep run_kanyikan
+#ps aux | grep run_kanyikan_moment.py | grep -v grep | awk '{print $2}' | xargs kill -9
+## shellcheck disable=SC2009
+#ps aux | grep run_kanyikan_recommend.py | grep -v grep | awk '{print $2}' | xargs kill -9
+#echo "$(date "+%Y-%m-%d %H:%M:%S") 进程已杀死!"
+#
+#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在更新代码..."
+#cd /Users/lieyunye/Desktop/crawler/crawler_kanyikan/ && git pull origin master --force
+#echo "$(date "+%Y-%m-%d %H:%M:%S") 代码更新完成!"
+##source /etc/profile
+#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在重启服务..."
+#nohup python3 -u main/run_kanyikan_recommend.py >>./nohup.log 2>&1 &
+#nohup python3 -u main/run_kanyikan_moment.py >>./nohup.log 2>&1 &
+#echo "$(date "+%Y-%m-%d %H:%M:%S") 服务重启完毕!"
+#echo "$(date "+%Y-%m-%d %H:%M:%S") 请手动启动 Charles,并开启 Auto save 功能;再启动 看一看+ 小程序!"
+#exit 0
 
-echo "$(date "+%Y-%m-%d %H:%M:%S") 正在更新代码..."
-cd /Users/lieyunye/Desktop/crawler/crawler_kanyikan/ && git pull origin master --force
-echo "$(date "+%Y-%m-%d %H:%M:%S") 代码更新完成!"
-#source /etc/profile
-echo "$(date "+%Y-%m-%d %H:%M:%S") 正在重启服务..."
-nohup python3 -u main/run_kanyikan_recommend.py >>./nohup.log 2>&1 &
-nohup python3 -u main/run_kanyikan_moment.py >>./nohup.log 2>&1 &
-echo "$(date "+%Y-%m-%d %H:%M:%S") 服务重启完毕!"
-echo "$(date "+%Y-%m-%d %H:%M:%S") 请手动启动 Charles,并开启 Auto save 功能;再启动 看一看+ 小程序!"
-exit 0
+
+# **********线下爬虫********** #
+log_type=$1
+crawler=$2
+env=$3          # 爬虫运行环境,正式环境: prod / 测试环境: dev
+#echo $env
+if [ ${env} = "--env=dev" ];then
+  crawler_dir=/Users/wangkun/Desktop/crawler/crawler_kanyikan/
+  profile_path=/etc/profile
+  log_path=${crawler_dir}logs/process-$(date +%Y-%m-%d).log
+else
+  crawler_dir=/Users/piaoquan/Desktop/crawler/crawler_kanyikan/
+  profile_path=./base_profile
+  log_path=${crawler_dir}logs/process-$(date +%Y-%m-%d).log
+fi
+
+time=$(date +%H:%M:%S)
+echo "$(date "+%Y-%m-%d %H:%M:%S") 更新环境变量..." >> ${log_path}
+cd ~ && source ${profile_path}
+echo "$(date "+%Y-%m-%d %H:%M:%S") 更新环境变量完成!" >> ${log_path}
+
+echo "$(date "+%Y-%m-%d %H:%M:%S") 正在检测看一看推荐爬虫服务状态" >> ${log_path}
+ps -ef | grep "run_kanyikan_recommend" | grep -v "grep"
+if [ "$?" -eq 1 ];then
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 异常停止,正在重启!" >> ${log_path}
+  cd ${crawler_dir}
+  nohup python3 -u  ${crawler_dir}main/run_kanyikan_recommend.py ${log_type} ${crawler} ${env} >> ${crawler_dir}logs/kanyikan-recommend-nohup.log 2>&1 &
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启服务完毕!" >> ${log_path}
+else
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 看一看推荐爬虫进程状态正常" >> ${log_path}
+fi
+
+echo "$(date "+%Y-%m-%d %H:%M:%S") 正在检测看一看朋友圈爬虫服务状态" >> ${log_path}
+ps -ef | grep "run_kanyikan_moment" | grep -v "grep"
+if [ "$?" -eq 1 ];then
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 异常停止,正在重启!" >> ${log_path}
+  cd ${crawler_dir}
+  nohup python3 -u ${crawler_dir}main/run_kanyikan_moment.py >>${crawler_dir}logs/kanyikan-moment-nohup.log 2>&1 &
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启服务完毕!" >> ${log_path}
+else
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 看一看朋友圈进程状态正常" >> ${log_path}
+fi

+ 0 - 82
main/demo.py

@@ -1,82 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2022/6/13
-import json
-import time
-
-import requests
-
-from main.feishu_lib import Feishu
-
-
-class Demo:
-
-    @classmethod
-    def get_video_info(cls, session, vid):
-        url = "https://search.weixin.qq.com/cgi-bin/recwxa/recwxagetonevideoinfo?"
-        param = {
-            "session": session,
-            "vid": vid,
-            "wxaVersion": "3.9.2",
-            "channelid": "208201",
-            "scene": "32",
-            "subscene": "1089",
-            "model": "iPhone 11<iPhone12,1>14.7.1",
-            "clientVersion": "8.0.18",
-            "sharesearchid": "447665862521758270",
-            "sharesource": "-1"
-        }
-        r = requests.get(url=url, params=param)
-        response = json.loads(r.content.decode("utf8"))
-        data = response["data"]
-        v_title = data["title"]
-        v_play_cnt = data["played_cnt"]
-        v_comment_cnt = data["comment_cnt"]
-        v_liked_cnt = data["liked_cnt"]
-        v_shared_cnt = data["shared_cnt"]
-        v_duration = data["duration"]
-        v_width = data["width"]
-        v_height = data["height"]
-        v_send_date = data["upload_time"]
-        v_username = data["user_info"]["nickname"].strip().replace("\n", "")
-        v_user_id = data["openid"]
-        v_user_cover = data["user_info"]["headimg_url"]
-        v_video_cover = data["cover_url"]
-        v_url = data["play_info"]["items"][-1]["play_url"]
-        print(f"v_title:{v_title}")
-        print(f"v_play_cnt:{v_play_cnt}")
-        print(f"v_liked_cnt:{v_liked_cnt}")
-        print(f"v_comment_cnt:{v_comment_cnt}")
-        print(f"v_shared_cnt:{v_shared_cnt}")
-        print(f"v_duration:{v_duration}")
-        print(f"v_url:{v_url}")
-
-    @classmethod
-    def strtime_to_int(cls):
-        download_time = "2022/06/13 17:21:01"
-        download_time = int(time.mktime(time.strptime(download_time, "%Y/%m/%d %H:%M:%S")))
-        print(download_time)
-
-    @classmethod
-    def demo1(cls):
-        a = 10
-        b = 20
-        lines = len(Feishu.get_values_batch("C8LQ1b"))
-        for i in range(1, lines):
-            print(f"i:{i}")
-            if i == a or i == b:
-                print(f"正在删除:{i+1}行")
-                Feishu.dimension_range("C8LQ1b", "ROWS", i+1, i+1)
-                lines = lines-1
-
-
-if __name__ == "__main__":
-    demo = Demo()
-
-    session = "LHacJPGEcMn7Cb0ocD_Z1BDyCkRbNjux4_HuHAwpydcHHdw9uwJl9pyiZgD__06uuDUwZ-_vqY5XQ_mS0XFz43xUYxwE_HsmxvluYfOVR5kXJmgjEVMlSDJ-R2IRr6ObHVZDaSLPV_i52pL0joEWP3FSHOZpXa72I9SPFUdQ1LRRGVvEhQBxMjuqvymnK5bv1HJwS_UoDIoue2nOrWmTCCAiR7YOUGnE8pwL0RLxsxSXU_fa3Ujgr9SbzFf3WhPo_QV92loTyXBTCJ8V8FsISqSKRlupG4n7osIyvEgtJEMS4Lt8K14Pc9i6X4e2EDUwoY66dOYbaoG5jaYgtdzKdYICRKnUK8jg_EAiSW7xzuorOLyoGDXlpL3AjVfivcys98Dg5b-BwTx7YhjUSbyVfDKrA5NFUY8cXJAJ2jWM6SSL91UNXVse7k4e8gMRGceC"
-    videoid = "ugc_xnl1c7"
-    demo.get_video_info(session, videoid)
-
-    # demo.strtime_to_int()
-    # demo.demo1()
-    # Feishu.insert_columns("C8LQ1b", "ROWS", 1, 2)

+ 0 - 9
main/feishu_lib.py

@@ -2,12 +2,9 @@
 # @Author: wangkun
 # @Time: 2022/5/6
 import json
-
 import requests
 import urllib3
-
 from main.common import Common
-
 proxies = {"http": None, "https": None}
 
 
@@ -300,10 +297,4 @@ class Feishu:
 if __name__ == "__main__":
     feishu = Feishu()
 
-    print(feishu.get_range_value("person", "xiaoniangao", "dzcWHw", "B3:B3")[0])
-    print(feishu.get_range_value("person", "xiaoniangao", "dzcWHw", "B4:B4")[0])
-    print(feishu.get_range_value("person", "xiaoniangao", "dzcWHw", "C5:C5")[0][0]["link"])
-    print(feishu.get_range_value("person", "xiaoniangao", "dzcWHw", "B6:B6")[0])
-    print(feishu.get_range_value("person", "xiaoniangao", "dzcWHw", "B7:B7")[0])
-
     pass

+ 238 - 0
main/kanyikan_recommend.py

@@ -0,0 +1,238 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/6/1
+import os
+import random
+import shutil
+import sys
+import time
+import requests
+import urllib3
+sys.path.append(os.getcwd())
+from main.common import Common
+from main.feishu_lib import Feishu
+from main.kanyikan_recommend_publish import Publish
+proxies = {"http": None, "https": None}
+
+
+class Kanyikanrecommend:
+    @classmethod
+    def get_filter_word(cls, log_type, crawler):
+        while True:
+            filter_sheet = Feishu.get_values_batch(log_type, crawler, "rofdM5")
+            if filter_sheet is None:
+                Common.logger(log_type).info(f"filter_sheet:{filter_sheet}")
+                time.sleep(1)
+                continue
+            # 敏感词库列表
+            word_list = []
+            for i in filter_sheet:
+                for j in i:
+                    # 过滤空的单元格内容
+                    if j is None:
+                        pass
+                    else:
+                        word_list.append(j)
+            return word_list
+
+    @classmethod
+    def download_rule(cls, video_dict):
+        if (int(video_dict["video_width"]) or int(video_dict["video_height"]) >= 720) \
+            and int(video_dict["duration"]) >= 40\
+            and (int(int(int(time.time()) - video_dict["publish_time_stamp"]) / (3600*24)) >= 7 and int(video_dict["play_cnt"]) >= 80000)\
+            and ((int(int(int(time.time()) - video_dict["publish_time_stamp"]) / (3600*24)) < 7 and int(video_dict["play_cnt"]) >= 20000) or (int(video_dict["publish_time_stamp"]) >= int(time.mktime(time.strptime("2021-06-01 00:00:00", "%Y-%m-%d %H:%M:%S"))))):
+            return True
+        else:
+            return False
+
+    @classmethod
+    def get_videoList(cls, log_type, crawler, env):
+        while True:
+            for page in range(1, 101):
+                Common.logger(log_type).info(f"正在抓取第{page}页")
+                try:
+                    session = Common.get_session(log_type)
+                    if session is None:
+                        time.sleep(1)
+                        continue
+                    url = 'https://search.weixin.qq.com/cgi-bin/recwxa/recwxavideolist?'
+                    header = {
+                        "Connection": "keep-alive",
+                        "content-type": "application/json",
+                        "Accept-Encoding": "gzip,compress,br,deflate",
+                        "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) "
+                                      "AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.18(0x18001236) "
+                                      "NetType/WIFI Language/zh_CN",
+                        "Referer": "https://servicewechat.com/wxbb9a805eb4f9533c/234/page-frame.html",
+                    }
+                    params = {
+                        'session': session,
+                        "offset": 0,
+                        "wxaVersion": "3.9.2",
+                        "count": "10",
+                        "channelid": "208",
+                        "scene": '310',
+                        "subscene": '1089',
+                        "clientVersion": '8.0.18',
+                        "sharesearchid": '0',
+                        "nettype": 'wifi',
+                        "switchprofile": "0",
+                        "switchnewuser": "0",
+                    }
+                    urllib3.disable_warnings()
+                    response = requests.get(url=url, headers=header, params=params, proxies=proxies, verify=False)
+                    if "data" not in response.text:
+                        Common.logger(log_type).info("获取视频list时,session过期,随机睡眠 31-50 秒")
+                        # 如果返回空信息,则随机睡眠 31-40 秒
+                        time.sleep(random.randint(31, 40))
+                        continue
+                    elif "items" not in response.json()["data"]:
+                        Common.logger(log_type).info(f"get_feeds:{response.json()},随机睡眠 1-3 分钟")
+                        # 如果返回空信息,则随机睡眠 1-3 分钟
+                        time.sleep(random.randint(60, 180))
+                        continue
+                    feeds = response.json().get("data", {}).get("items", "")
+                    if feeds == "":
+                        Common.logger(log_type).info(f"feeds:{feeds}")
+                        time.sleep(random.randint(31, 40))
+                        continue
+                    for i in range(len(feeds)):
+                        try:
+                            video_title = feeds[i].get("title", "").strip().replace("\n", "") \
+                                .replace("/", "").replace("\\", "").replace("\r", "") \
+                                .replace(":", "").replace("*", "").replace("?", "") \
+                                .replace("?", "").replace('"', "").replace("<", "") \
+                                .replace(">", "").replace("|", "").replace(" ", "") \
+                                .replace("&NBSP", "").replace(".", "。").replace(" ", "") \
+                                .replace("'", "").replace("#", "").replace("Merge", "")
+                            publish_time_stamp = feeds[i].get("date", 0)
+                            publish_time_str = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(publish_time_stamp))
+                            # 获取播放地址
+                            if "videoInfo" not in feeds[i]:
+                                video_url = ""
+                            elif "mpInfo" in feeds[i]["videoInfo"]["videoCdnInfo"]:
+                                if len(feeds[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"]) > 2:
+                                    video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"][2]["url"]
+                                else:
+                                    video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"][0]["url"]
+                            elif "ctnInfo" in feeds[i]["videoInfo"]["videoCdnInfo"]:
+                                video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["ctnInfo"]["urlInfo"][0]["url"]
+                            else:
+                                video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["urlInfo"][0]["url"]
+                            video_dict = {
+                                "video_title": video_title,
+                                "video_id":  feeds[i].get("videoId", ""),
+                                "play_cnt":  feeds[i].get("playCount", 0),
+                                "like_cnt":  feeds[i].get("liked_cnt", 0),
+                                "comment_cnt":  feeds[i].get("comment_cnt", 0),
+                                "share_cnt":  feeds[i].get("shared_cnt", 0),
+                                "duration":  feeds[i].get("mediaDuration", 0),
+                                "video_width":  feeds[i].get("short_video_info", {}).get("width", 0),
+                                "video_height":  feeds[i].get("short_video_info", {}).get("height", 0),
+                                "publish_time_stamp":  publish_time_stamp,
+                                "publish_time_str":  publish_time_str,
+                                "user_name": feeds[i].get("source", "").strip().replace("\n", ""),
+                                "user_id": feeds[i].get("openid", ""),
+                                "avatar_url": feeds[i].get("bizIcon", ""),
+                                "cover_url": feeds[i].get("thumbUrl", ""),
+                                "video_url": video_url,
+                                "session": session,
+                            }
+                            for k, v in video_dict.items():
+                                Common.logger(log_type).info(f"{k}:{v}")
+
+                            if video_dict["video_id"] == "" \
+                                    or video_dict["video_title"] == ""\
+                                    or video_dict["video_url"] == "":
+                                Common.logger(log_type).info("无效视频\n")
+                            elif cls.download_rule(video_dict) is False:
+                                Common.logger(log_type).info("不满足抓取规则\n")
+                            elif any(str(word) if str(word) in video_title else False for word in cls.get_filter_word(log_type, crawler)) is True:
+                                Common.logger(log_type).info("视频已中过滤词\n")
+                            elif video_dict["video_id"] in [j for i in Feishu.get_values_batch(log_type, crawler, "ho98Ov") for j in i]:
+                                Common.logger(log_type).info("视频已下载\n")
+                            elif video_dict["video_id"] in [j for i in Feishu.get_values_batch(log_type, crawler, "20ce0c") for j in i]:
+                                Common.logger(log_type).info("视频已下载\n")
+                            else:
+                                cls.download_publish(log_type, crawler, video_dict, env)
+                        except Exception as e:
+                            Common.logger(log_type).error(f"抓取单条视频异常:{e}\n")
+                except Exception as e:
+                    Common.logger(log_type).error(f"抓取第{page}页时异常:{e}\n")
+
+    @classmethod
+    def download_publish(cls, log_type, crawler, video_dict, env):
+        Common.download_method(log_type, "video", video_dict["video_title"], video_dict["video_url"])
+        try:
+            if os.path.getsize(f"./videos/{video_dict['video_title']}/video.mp4") == 0:
+                # 删除视频文件夹
+                shutil.rmtree(f"./videos/{video_dict['video_title']}")
+                Common.logger(log_type).info("视频size=0,删除成功\n")
+                return
+        except FileNotFoundError:
+            # 删除视频文件夹
+            shutil.rmtree(f"./videos/{video_dict['video_title']}")
+            Common.logger(log_type).info("视频文件不存在,删除文件夹成功\n")
+            return
+        Common.download_method(log_type, "cover", video_dict["video_title"], video_dict["cover_url"])
+        with open(f"./videos/{video_dict['video_title']}/info.txt", "a", encoding="utf8") as f_a2:
+            f_a2.write(str(video_dict['video_id']) + "\n" +
+                       str(video_dict['video_title']) + "\n" +
+                       str(video_dict['duration']) + "\n" +
+                       str(video_dict['play_cnt']) + "\n" +
+                       str(video_dict['comment_cnt']) + "\n" +
+                       str(video_dict['like_cnt']) + "\n" +
+                       str(video_dict['share_cnt']) + "\n" +
+                       f'{video_dict["video_width"]}*{video_dict["video_height"]}' + "\n" +
+                       str(video_dict["publish_time_stamp"]) + "\n" +
+                       str(video_dict["user_name"]) + "\n" +
+                       str(video_dict["avatar_url"]) + "\n" +
+                       str(video_dict["video_url"]) + "\n" +
+                       str(video_dict["cover_url"]) + "\n" +
+                       str(video_dict["session"]))
+        Common.logger("recommend").info("==========视频信息已保存至info.txt==========")
+
+        # 上传视频
+        our_video_id = Publish.upload_and_publish(log_type, env, "recommend")
+        if env == "dev":
+            our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
+        else:
+            our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
+        if our_video_id is None:
+            try:
+                # 删除视频文件夹
+                shutil.rmtree(f"./videos/{video_dict['video_title']}")
+                return
+            except FileNotFoundError:
+                return
+
+        # 保存视频信息到云文档:
+        Feishu.insert_columns(log_type, crawler, "20ce0c", "ROWS", 1, 2)
+        # 看一看+ ,视频ID工作表,首行写入数据
+        upload_time = int(time.time())
+        values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
+                   "推荐榜",
+                   str(video_dict["video_id"]),
+                   str(video_dict["video_title"]),
+                   our_video_link,
+                   video_dict["play_cnt"],
+                   video_dict["comment_cnt"],
+                   video_dict["like_cnt"],
+                   video_dict["share_cnt"],
+                   video_dict["duration"],
+                   f'{video_dict["video_width"]}*{video_dict["video_height"]}',
+                   video_dict["publish_time_str"],
+                   video_dict["user_name"],
+                   video_dict["user_id"],
+                   video_dict["avatar_url"],
+                   video_dict["cover_url"],
+                   video_dict["video_url"]]]
+        time.sleep(0.5)
+        Feishu.update_values(log_type, crawler, "20ce0c", "F2:Z2", values)
+        Common.logger(log_type).info("视频信息保存至云文档成功\n")
+
+
+if __name__ == "__main__":
+    print(Kanyikanrecommend.get_filter_word("recommend", "kanyikan"))
+    print(int(time.mktime(time.strptime("2021-06-01 00:00:00", "%Y-%m-%d %H:%M:%S"))))
+    pass

+ 0 - 255
main/kanyikan_recommend_feeds.py

@@ -1,255 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2022/4/18
-"""
-获取看一看+小程序,首页推荐视频列表
-"""
-import json
-import os
-import random
-import sys
-import time
-import requests
-import urllib3
-
-from main.feishu_lib import Feishu
-
-sys.path.append(os.getcwd())
-from main.common import Common
-
-proxies = {"http": None, "https": None}
-
-
-# 敏感词库
-def kanyikan_sensitive_words(log_type):
-    # 敏感词库列表
-    word_list = []
-    # 从云文档读取所有敏感词,添加到词库列表
-    lists = Feishu.get_values_batch(log_type, "kanyikan", "rofdM5")
-    for i in lists:
-        for j in i:
-            # 过滤空的单元格内容
-            if j is None:
-                pass
-            else:
-                word_list.append(j)
-    return word_list
-
-
-def get_recommend_feeds(log_type):
-    """
-    1.从看一看+小程序首页推荐,获取视频列表
-    2.先在 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c 中去重
-    3.再从 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM 中去重
-    4.添加视频信息至 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-    """
-    Common.logger(log_type).info("开始从推荐页获取视频列表")
-    host = "https://search.weixin.qq.com"
-    url = '/cgi-bin/recwxa/recwxavideolist?'
-    video_list_session = Common.get_session(log_type)
-    # Common.logger(log_type).info("获取视频list时,session:{}", video_list_session)
-    header = {
-        "Connection": "keep-alive",
-        "content-type": "application/json",
-        "Accept-Encoding": "gzip,compress,br,deflate",
-        "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) "
-                      "AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.18(0x18001236) "
-                      "NetType/WIFI Language/zh_CN",
-        "Referer": "https://servicewechat.com/wxbb9a805eb4f9533c/234/page-frame.html",
-    }
-    params = {
-        'session': video_list_session,
-        "offset": 0,
-        "wxaVersion": "3.9.2",
-        "count": "10",
-        "channelid": "208",
-        "scene": '310',
-        "subscene": '1089',
-        "clientVersion": '8.0.18',
-        "sharesearchid": '0',
-        "nettype": 'wifi',
-        "switchprofile": "0",
-        "switchnewuser": "0",
-    }
-    try:
-        urllib3.disable_warnings()
-        r = requests.get(host + url, headers=header, params=params, proxies=proxies, verify=False)
-        response = json.loads(r.content.decode("utf8"))
-
-        if "data" not in response:
-            Common.logger(log_type).info("获取视频list时,session过期,随机睡眠 31-50 秒")
-            # 如果返回空信息,则随机睡眠 31-40 秒
-            time.sleep(random.randint(31, 40))
-            get_recommend_feeds(log_type)
-        elif "items" not in response["data"]:
-            Common.logger(log_type).info("获取视频list时,response:{},随机睡眠 1-3 分钟", response)
-            # 如果返回空信息,则随机睡眠 1-3 分钟
-            time.sleep(random.randint(60, 180))
-            get_recommend_feeds(log_type)
-        else:
-            items = response["data"]["items"]
-            for i in range(len(items)):
-                # 如果该视频没有视频信息,则忽略
-                if "videoInfo" not in items[i]:
-                    Common.logger(log_type).info("无视频信息")
-                else:
-                    # 获取视频标题
-                    video_title = items[i]["title"].strip().replace("\n", "")\
-                        .replace("/", "").replace("\\", "").replace("\r", "")\
-                        .replace(":", "").replace("*", "").replace("?", "")\
-                        .replace("?", "").replace('"', "").replace("<", "")\
-                        .replace(">", "").replace("|", "").replace(" ", "")\
-                        .replace("&NBSP", "").replace(".", "。").replace(" ", "")\
-                        .replace("小年糕", "").replace("#", "").replace("Merge", "")
-                    Common.logger(log_type).info('视频标题:{}', video_title)
-
-                    # 获取视频ID
-                    video_id = items[i]["videoId"]
-                    Common.logger(log_type).info('视频ID:{}', video_id)
-                
-                    # 获取视频播放次数
-                    video_play_cnt = items[i]["playCount"]
-                    Common.logger(log_type).info('视频播放次数:{}', video_play_cnt)
-                
-                    # 获取视频点赞数
-                    video_liked_cnt = items[i]["liked_cnt"]
-                    Common.logger(log_type).info('视频点赞数:{}', video_liked_cnt)
-                
-                    # 获取视频评论数
-                    video_comment_cnt = items[i]["comment_cnt"]
-                    Common.logger(log_type).info('视频评论数:{}', video_comment_cnt)
-                
-                    # 获取视频分享数
-                    video_shared_cnt = items[i]["shared_cnt"]
-                    Common.logger(log_type).info('视频分享数:{}', video_shared_cnt)
-
-                    # 获取视频时长
-                    video_duration = items[i]["mediaDuration"]
-                    Common.logger(log_type).info('视频时长:{}秒', video_duration)
-
-                    # 获取视频宽高
-                    if "short_video_info" not in items[i]:
-                        video_width = "0"
-                        video_height = "0"
-                        video_resolution = str(video_width) + "*" + str(video_height)
-                        Common.logger(log_type).info("无分辨率:{}", video_resolution)
-                    elif len(items[i]["short_video_info"]) == 0:
-                        video_width = "0"
-                        video_height = "0"
-                        video_resolution = str(video_width) + "*" + str(video_height)
-                        Common.logger(log_type).info("无分辨率:{}", video_resolution)
-
-                    else:
-                        # 视频宽
-                        video_width = items[i]["short_video_info"]["width"]
-                        # 视频高
-                        video_height = items[i]["short_video_info"]["height"]
-                        video_resolution = str(video_width) + "*" + str(video_height)
-                        Common.logger(log_type).info('视频宽高:{}', video_resolution)
-                
-                    # 获取视频发布时间
-                    video_send_date = items[i]["date"]
-                    Common.logger(log_type).info("视频发布时间:{}",
-                                                 time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(video_send_date)))
-                
-                    # 获取视频用户名
-                    video_user = items[i]["source"].strip().replace("\n", "")
-                    Common.logger(log_type).info('视频用户名:{}', video_user)
-
-                    # user_id
-                    if "openid" not in items[i]:
-                        user_id = 0
-                    else:
-                        user_id = items[i]["openid"]
-                
-                    # 获取视频用户头像
-                    video_user_cover = items[i]["bizIcon"]
-                    Common.logger(log_type).info('视频用户头像:{}', video_user_cover)
-                
-                    # 获取视频封面
-                    if "smartCoverUrl" in items[i]:
-                        video_cover = items[i]["smartCoverUrl"]
-                        Common.logger(log_type).info('视频封面:{}', video_cover)
-                    else:
-                        video_cover = items[i]["thumbUrl"]
-                        Common.logger(log_type).info('视频封面:{}', video_cover)
-                
-                    # 获取播放地址
-                    if "mpInfo" in items[i]["videoInfo"]["videoCdnInfo"].keys():
-                        if len(items[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"]) > 2:
-                            url = items[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"][2]["url"]
-                            Common.logger(log_type).info('视频播放地址:{}', url)
-                        else:
-                            url = items[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"][0]["url"]
-                            Common.logger(log_type).info('视频播放地址:{}', url)
-                    elif "ctnInfo" in items[i]["videoInfo"]["videoCdnInfo"]:
-                        url = items[i]["videoInfo"]["videoCdnInfo"]["ctnInfo"]["urlInfo"][0]["url"]
-                        Common.logger(log_type).info('视频播放地址:{}', url)
-                    else:
-                        url = items[i]["videoInfo"]["videoCdnInfo"]["urlInfo"][0]["url"]
-                        Common.logger(log_type).info('视频播放地址:{}', url)
-
-                    # 过滤无效视频
-                    if video_id == "" \
-                            or video_send_date == "" \
-                            or video_title.strip() == "" \
-                            or video_play_cnt == "" \
-                            or video_liked_cnt == "" \
-                            or video_duration == "" \
-                            or video_comment_cnt == "" \
-                            or video_shared_cnt == "" \
-                            or video_user == "" \
-                            or video_user_cover == "" \
-                            or video_cover == "" \
-                            or url == "":
-                        Common.logger(log_type).info("无效视频")
-
-                    # 基础门槛,播放量>=20000
-                    elif int(video_play_cnt) < 10000:
-                        Common.logger(log_type).info("播放量{} < 20000", video_play_cnt)
-                    elif int(video_send_date) < 1659283200:
-                        Common.logger(log_type).info('发布时间{}<2022-08-01', video_send_date)
-                    # 过滤敏感词
-                    elif any(word if word in video_title else False
-                             for word in kanyikan_sensitive_words(log_type)) is True:
-                        Common.logger(log_type).info("视频已中敏感词:{}".format(video_title))
-                    # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
-                    elif video_id in [j for i in Feishu.get_values_batch(log_type, "kanyikan", "20ce0c") for j in i]:
-                        Common.logger(log_type).info("该视频已下载:{}", video_title)
-                    # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                    elif video_id in [j for i in Feishu.get_values_batch(log_type, "kanyikan", "SdCHOM") for j in i]:
-                        Common.logger(log_type).info("该视频已在kanyikan_feeds中:{}", video_title)
-                    else:
-                        Common.logger(log_type).info("该视频未下载,添加至kanyikan_feeds:{}", video_title)
-
-                        # 看一看+工作表,插入首行
-                        Feishu.insert_columns(log_type, "kanyikan", "SdCHOM", "ROWS", 1, 2)
-
-                        # 获取当前时间
-                        get_feeds_time = int(time.time())
-                        # 准备写入云文档的数据
-                        values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(get_feeds_time)),
-                                   "推荐榜",
-                                   video_id,
-                                   video_title,
-                                   video_play_cnt,
-                                   video_comment_cnt,
-                                   video_liked_cnt,
-                                   video_shared_cnt,
-                                   video_duration,
-                                   str(video_width) + "*" + str(video_height),
-                                   time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(video_send_date)),
-                                   video_user,
-                                   user_id,
-                                   video_user_cover,
-                                   video_cover,
-                                   url]]
-                        time.sleep(1)
-                        # 写入数据
-                        Feishu.update_values(log_type, "kanyikan", "SdCHOM", "A2:P2", values)
-    except Exception as e:
-        Common.logger(log_type).error("获取视频 list 时异常:{}", e)
-
-
-if __name__ == "__main__":
-    get_recommend_feeds("recommend")

+ 0 - 181
main/kanyikan_recommend_play.py

@@ -1,181 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2022/4/18
-
-"""
-下载并上传:播放量视频
-"""
-import os
-import sys
-import time
-
-sys.path.append(os.getcwd())
-from main.common import Common
-from main.kanyikan_recommend_feeds import get_recommend_feeds
-from main.kanyikan_recommend_publish import Publish
-from main.feishu_lib import Feishu
-
-
-class RecommendPlay:
-    # 下载规则
-    @staticmethod
-    def play_rule(play_width, play_height, play_duration, play_play_cnt):
-        """
-        1.分辨率,宽或者高 >= 720 or ==0
-        2.600s >= 时长 >= 60s
-        3.视频播放量 >= 150000
-        """
-        if int(play_width) >= 720 or int(play_height) >= 720 or play_width == "0" or play_height == "0":
-            if int(play_duration) >= 40:
-                if int(play_play_cnt) >= 100000:
-                    return True
-                else:
-                    return False
-            else:
-                return False
-        else:
-            return False
-
-    @classmethod
-    def download_play_video(cls, env):
-        """
-        下载播放量视频
-        测试环境:env == dev
-        正式环境:env == prod
-        """
-        try:
-            recommend_feeds = Feishu.get_values_batch("recommend", "kanyikan", "SdCHOM")
-            for i in range(1, len(recommend_feeds)+1):
-                time.sleep(1)
-                # download_push_time = Feishu.get_values_batch("SdCHOM")[i][0]
-                download_video_id = recommend_feeds[i][2]
-                download_video_title = recommend_feeds[i][3]
-                download_video_play_cnt = recommend_feeds[i][4]
-                download_video_comment_cnt = recommend_feeds[i][5]
-                download_video_like_cnt = recommend_feeds[i][6]
-                download_video_share_cnt = recommend_feeds[i][7]
-                download_video_duration = recommend_feeds[i][8]
-                download_video_resolution = recommend_feeds[i][9]
-                download_video_width = download_video_resolution.split("*")[0]
-                download_video_height = download_video_resolution.split("*")[-1]
-                download_video_send_time = recommend_feeds[i][10]
-                download_user_name = recommend_feeds[i][11]
-                download_user_id = recommend_feeds[i][12]
-                download_head_url = recommend_feeds[i][13]
-                download_cover_url = recommend_feeds[i][14]
-                download_video_url = recommend_feeds[i][15]
-
-                # Common.logger("recommend").info("download_video_id:{}", download_video_id)
-                # Common.logger("recommend").info("download_video_title:{}", download_video_title)
-                # Common.logger("recommend").info("download_video_play_cnt:{}", download_video_play_cnt)
-                # Common.logger("recommend").info("download_video_comment_cnt:{}", download_video_comment_cnt)
-                # Common.logger("recommend").info("download_video_like_cnt:{}", download_video_like_cnt)
-                # Common.logger("recommend").info("download_video_share_cnt:{}", download_video_share_cnt)
-                # Common.logger("recommend").info("download_video_duration:{}", download_video_duration)
-                # Common.logger("recommend").info("download_video_resolution:{}", download_video_resolution)
-                # Common.logger("recommend").info("download_video_send_time:{}", download_video_send_time)
-                # Common.logger("recommend").info("download_user_name:{}", download_user_name)
-                # Common.logger("recommend").info("download_user_id:{}", download_user_id)
-                # Common.logger("recommend").info("download_head_url:{}", download_head_url)
-                # Common.logger("recommend").info("download_cover_url:{}", download_cover_url)
-                # Common.logger("recommend").info("download_video_url:{}", download_video_url)
-
-                Common.logger("recommend").info("正在判断第{}行,视频:{}", i, download_video_title)
-
-                if cls.play_rule(
-                        play_width=download_video_width, play_height=download_video_height,
-                        play_duration=download_video_duration, play_play_cnt=download_video_play_cnt) is False:
-                    Common.logger("recommend").info("不满足播放量下载规则,删除该视频信息:{}", download_video_title)
-                    # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                    Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-                    return
-                elif download_video_id in [j for m in Feishu.get_values_batch(
-                        "recommend", "kanyikan", "20ce0c") for j in m]:
-                    Common.logger("recommend").info("视频已下载,删除该视频信息:{}", download_video_title)
-                    # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                    Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-                    return
-                else:
-                    Common.logger("recommend").info("开始下载视频:{}", download_video_title)
-                    # 下载封面
-                    Common.download_method(log_type="recommend", text="cover",
-                                           d_name=download_video_title, d_url=download_cover_url)
-                    # 下载视频
-                    Common.download_method(log_type="recommend", text="video",
-                                           d_name=download_video_title, d_url=download_video_url)
-                    # 保存视频信息至 "./videos/{download_video_title}/info.txt"
-                    with open(r"./videos/" + download_video_title
-                              + "/" + "info.txt", "a", encoding="utf8") as f_a:
-                        f_a.write(str(download_video_id) + "\n" +
-                                  str(download_video_title) + "\n" +
-                                  str(download_video_duration) + "\n" +
-                                  str(download_video_play_cnt) + "\n" +
-                                  str(download_video_comment_cnt) + "\n" +
-                                  str(download_video_like_cnt) + "\n" +
-                                  str(download_video_share_cnt) + "\n" +
-                                  str(download_video_resolution) + "\n" +
-                                  str(int(time.mktime(
-                                      time.strptime(download_video_send_time, "%Y/%m/%d %H:%M:%S")))) + "\n" +
-                                  str(download_user_name) + "\n" +
-                                  str(download_head_url) + "\n" +
-                                  str(download_video_url) + "\n" +
-                                  str(download_cover_url) + "\n" +
-                                  str(Common.get_session("recommend")))
-                    Common.logger("recommend").info("==========视频信息已保存至info.txt==========")
-
-                    # 上传视频
-                    Common.logger("recommend").info("开始上传视频:{}", download_video_title)
-                    Publish.upload_and_publish("recommend", env, "play")
-
-                    # 保存视频 ID 到云文档:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
-                    Common.logger("recommend").info("保存视频ID至云文档:{}", download_video_title)
-                    # 看一看+ ,视频ID工作表,插入首行
-                    Feishu.insert_columns("recommend", "kanyikan", "20ce0c", "ROWS", 1, 2)
-                    # 看一看+ ,视频ID工作表,首行写入数据
-                    upload_time = int(time.time())
-                    values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
-                               "播放量榜",
-                               str(download_video_id),
-                               str(download_video_title),
-                               download_video_play_cnt,
-                               download_video_comment_cnt,
-                               download_video_like_cnt,
-                               download_video_share_cnt,
-                               download_video_duration,
-                               str(download_video_resolution),
-                               str(download_video_send_time),
-                               str(download_user_name),
-                               str(download_user_id),
-                               str(download_head_url),
-                               str(download_cover_url),
-                               str(download_video_url)]]
-                    time.sleep(1)
-                    Feishu.update_values("recommend", "kanyikan", "20ce0c", "A2:Q2", values)
-
-                    # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                    Common.logger("recommend").info("从云文档删除该视频信息:{}", download_video_title)
-                    # 删除行或列,可选 ROWS、COLUMNS
-                    Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-                    return
-        except Exception as e:
-            Common.logger("recommend").error("视频info异常:{}", e)
-            Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", 2, 2)
-            return
-
-    # 执行播放量下载及上传
-    @classmethod
-    def run_download_play_video(cls, env):
-        try:
-            while True:
-                if len(Feishu.get_values_batch("recommend", "kanyikan", "SdCHOM")) == 1:
-                    break
-                else:
-                    cls.download_play_video(env)
-        except Exception as e:
-            Common.logger("recommend").error("执行上传及下载异常:{}", e)
-
-
-if __name__ == "__main__":
-    download_play = RecommendPlay()
-    get_recommend_feeds("recommend")
-    download_play.run_download_play_video("dev")

+ 0 - 254
main/kanyikan_recommend_play_sendtime.py

@@ -1,254 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2022/6/17
-import json
-import os
-import sys
-import time
-import requests
-import urllib3
-sys.path.append(os.getcwd())
-from main.common import Common
-# from main.get_feeds import get_feeds
-from main.kanyikan_recommend_publish import Publish
-from main.feishu_lib import Feishu
-
-proxies = {"http": None, "https": None}
-
-
-class RecommendPlaySendtime:
-
-    # 抓取基础规则,写入recommend_feeds表
-    @staticmethod
-    def send_time_rule(send_time_width, send_time_height, send_time_duration, send_time_share_cnt):
-        """
-        1.分辨率,宽或者高 >= 720 or == 0
-        2.时长 >= 60s
-        3.视频播放量 >= 0
-        """
-        if int(send_time_width) >= 720 or int(send_time_height) >= 720:
-            if int(send_time_duration) >= 60:
-                if int(send_time_share_cnt) > 0:
-                    return True
-                else:
-                    return False
-            else:
-                return False
-        else:
-            return False
-
-    @classmethod
-    def download_play_sendtime(cls, env):
-        """
-        - 7日内播放大于2万
-        - 或播放大于15万
-        :param env: 测试环境:dev;正式环境:prod
-        :return: 下载并上传视频
-        """
-        try:
-            recommend_feeds = Feishu.get_values_batch("recommend", "kanyikan", "SdCHOM")
-            for i in range(1, len(recommend_feeds)+1):
-                time.sleep(1)
-                sendtime_session = Common.get_session("recommend")
-                download_video_id = recommend_feeds[i][2]
-                download_video_title = recommend_feeds[i][3]
-                url = "https://search.weixin.qq.com/cgi-bin/recwxa/recwxagetonevideoinfo?"
-                param = {
-                    "session": sendtime_session,
-                    "vid": download_video_id,
-                    "wxaVersion": "3.9.2",
-                    "channelid": "208201",
-                    "scene": "32",
-                    "subscene": "1089",
-                    "model": "iPhone 11<iPhone12,1>14.7.1",
-                    "clientVersion": "8.0.18",
-                    "sharesearchid": "447665862521758270",
-                    "sharesource": "-1"
-                }
-                urllib3.disable_warnings()
-                r = requests.get(url=url, params=param, proxies=proxies, verify=False)
-                response = json.loads(r.content.decode("utf8"))
-                if "data" not in response:
-                    Common.logger("recommend").error("获取视频info时错误,删除该视频:{}", download_video_title)
-                    # 删除行或列,可选 ROWS、COLUMNS
-                    Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-                else:
-                    data = response["data"]
-                    v_duration = data["duration"]
-                    v_play_cnt_sendtime = data["played_cnt"]
-                    v_comment_cnt = data["comment_cnt"]
-                    v_liked_cnt = data["liked_cnt"]
-                    v_shared_cnt = data["shared_cnt"]
-                    v_width = data["width"]
-                    v_height = data["height"]
-                    v_resolution = str(v_width) + "*" + str(v_height)
-                    v_send_date = data["upload_time"]
-                    v_username = data["user_info"]["nickname"].strip().replace("\n", "")
-                    v_user_id = data["openid"]
-                    v_user_cover = data["user_info"]["headimg_url"]
-                    v_video_cover = data["cover_url"]
-                    if "items" not in data["play_info"]:
-                        if len(data["play_info"]) > 2:
-                            download_url_sendtime = data["play_info"][2]["play_url"]
-                        else:
-                            download_url_sendtime = data["play_info"][0]["play_url"]
-                    else:
-                        if len(data["play_info"]["items"]) > 2:
-                            download_url_sendtime = data["play_info"]["items"][2]["play_url"]
-                        else:
-                            download_url_sendtime = data["play_info"]["items"][0]["play_url"]
-
-                    Common.logger("recommend").info("正在判断第{}行,视频:{}", i, download_video_title)
-
-                    # 判断无效视频
-                    if download_video_id == "" \
-                            or download_video_id is None\
-                            and download_video_title == ""\
-                            or download_video_title is None\
-                            and v_duration == "" \
-                            and v_play_cnt_sendtime == ""\
-                            and v_send_date == ""\
-                            and v_user_cover == "" \
-                            and v_video_cover == ""\
-                            and download_url_sendtime == "":
-                        Common.logger("recommend").info("无效视频,删除该视频信息:{}", download_video_title)
-                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                        Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-                        return
-                    elif v_send_date < 1622476800:
-                        Common.logger("recommend").info(
-                            "发布时间小于2021年6月:{},{}", download_video_title, v_send_date)
-                        # 删除行或列,可选 ROWS、COLUMNS
-                        Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-                        return
-                    # 抓取基础规则
-                    elif cls.send_time_rule(v_width, v_height, v_duration, v_play_cnt_sendtime) is False:
-                        Common.logger("recommend").info("不满足发布时间榜下载规则,删除该视频信息:{}", download_video_title)
-                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                        Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-                        return
-                    # 不满足规则:发布时间 > 7 天 and 播放量 < 150000
-                    elif int(time.time()) - int(v_send_date) > 604800 and int(v_play_cnt_sendtime) < 80000:
-                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                        Common.logger("recommend").info("播放量:{} < 150000", int(v_play_cnt_sendtime))
-                        # 删除行或列,可选 ROWS、COLUMNS
-                        Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-                        return
-                    # 不满足规则:发布时间 <= 7 天 and 播放量 < 20000
-                    elif int(time.time()) - int(v_send_date) <= 604800 and int(v_play_cnt_sendtime) < 10000:
-                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                        Common.logger("recommend").info("视频7天播放量:{} < 20000", int(v_play_cnt_sendtime))
-                        # 删除行或列,可选 ROWS、COLUMNS
-                        Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-                        return
-                    elif download_video_id in [j for m in Feishu.get_values_batch(
-                            "recommend", "kanyikan", "20ce0c") for j in m]:
-                        Common.logger("recommend").info("视频已下载,删除该视频信息:{}", download_video_title)
-                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                        Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-                        return
-                    elif download_video_id in [j for m in Feishu.get_values_batch(
-                            "recommend", "kanyikan", "ho98Ov") for j in m]:
-                        Common.logger("recommend").info("视频已下载,删除该视频信息:{}", download_video_title)
-                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                        Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-                        return
-                    else:
-                        Common.logger("recommend").info("开始下载视频:{}", download_video_title)
-
-                        # 下载封面
-                        Common.download_method("recommend", "cover", download_video_title, v_video_cover)
-                        # 下载视频
-                        Common.download_method("recommend", "video", download_video_title, download_url_sendtime)
-                        # 保存视频信息到 "./files/{视频标题}/videoinfo.txt"
-                        with open(r"./videos/" + download_video_title +
-                                  "/" + "info.txt", "a", encoding="utf8") as f_a2:
-                            f_a2.write(str(download_video_id) + "\n" +
-                                       str(download_video_title) + "\n" +
-                                       str(v_duration) + "\n" +
-                                       str(v_play_cnt_sendtime) + "\n" +
-                                       str(v_comment_cnt) + "\n" +
-                                       str(v_liked_cnt) + "\n" +
-                                       str(v_shared_cnt) + "\n" +
-                                       str(v_resolution) + "\n" +
-                                       str(v_send_date) + "\n" +
-                                       str(v_username) + "\n" +
-                                       str(v_user_cover) + "\n" +
-                                       str(download_url_sendtime) + "\n" +
-                                       str(v_video_cover) + "\n" +
-                                       str(sendtime_session))
-                        Common.logger("recommend").info("==========视频信息已保存至info.txt==========")
-
-                        # 上传该视频
-                        Common.logger("recommend").info("开始上传视频:{}", download_video_title)
-                        our_video_id = Publish.upload_and_publish("recommend", env, "send_time")
-                        our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info"
-                        Common.logger("recommend").info("视频上传完成:{}", download_video_title)
-
-                        # 保存视频 ID 到云文档:
-                        # https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
-                        Common.logger("recommend").info("保存视频ID至云文档:{}", download_video_title)
-                        # 看一看+ ,视频ID工作表,插入首行
-                        Feishu.insert_columns("recommend", "kanyikan", "20ce0c", "ROWS", 1, 2)
-                        # 看一看+ ,视频ID工作表,首行写入数据
-                        upload_time = int(time.time())
-                        values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
-                                   "推荐榜",
-                                   str(download_video_id),
-                                   str(download_video_title),
-                                   our_video_link,
-                                   int(v_play_cnt_sendtime),
-                                   v_comment_cnt,
-                                   v_liked_cnt,
-                                   v_shared_cnt,
-                                   v_duration,
-                                   v_resolution,
-                                   time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(v_send_date)),
-                                   str(v_username),
-                                   str(v_user_id),
-                                   str(v_user_cover),
-                                   str(v_video_cover),
-                                   str(download_url_sendtime)]]
-                        time.sleep(1)
-                        Feishu.update_values("recommend", "kanyikan", "20ce0c", "F2:W2", values)
-
-                        # 保存视频信息到监控表
-                        Common.logger("recommend").info("添加视频到监控表:{}", download_video_title)
-                        # 插入空行
-                        time.sleep(1)
-                        Feishu.insert_columns("recommend", "monitor", "6fed97", "ROWS", 1, 2)
-                        # 视频信息写入监控表
-                        values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(upload_time))),
-                                   str(download_video_id),
-                                   download_video_title,
-                                   our_video_link,
-                                   v_duration,
-                                   time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(v_send_date)),
-                                   int(v_play_cnt_sendtime)]]
-                        time.sleep(1)
-                        Feishu.update_values("recommend", "monitor", "6fed97", "F2:L2", values)
-
-                        # 删除行或列,可选 ROWS、COLUMNS
-                        Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-                        return
-
-        except Exception as e:
-            Common.logger("recommend").error("获取视频info异常:{}", e)
-            Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", 2, 2)
-
-    # 执行上传及下载
-    @classmethod
-    def run_download_play_sendtime(cls, env):
-        try:
-            while True:
-                if len(Feishu.get_values_batch("recommend", "kanyikan", "SdCHOM")) == 1:
-                    break
-                else:
-                    cls.download_play_sendtime(env)
-        except Exception as e:
-            Common.logger("recommend").error("执行上传及下载异常:{}", e)
-
-
-if __name__ == "__main__":
-    download_sendtime = RecommendPlaySendtime()
-    download_sendtime.run_download_play_sendtime("dev")

+ 6 - 6
main/kanyikan_recommend_publish.py

@@ -144,13 +144,13 @@ class Publish:
     info_file = 'info'
     uids_dev_up = [6267140]
     uids_dev_play = [6267141]
-    uids_dev_send_time = [6267824]
+    uids_dev_recommend = [6267140, 6267141, 6267824]
     uids_prod_up = [20631208, 20631209, 20631210, 20631211, 20631212,
                     20631213, 20631214, 20631215, 20631216, 20631217]
     uids_prod_play = [20631208, 20631209, 20631210, 20631211, 20631212,
                       20631213, 20631214, 20631215, 20631216, 20631217,
                       20631223, 20631224, 20631225, 20631226, 20631227]
-    uids_prod_send_time = [20631208, 20631209, 20631210, 20631211, 20631212,
+    uids_prod_recommend = [20631208, 20631209, 20631210, 20631211, 20631212,
                            20631213, 20631214, 20631215, 20631216, 20631217,
                            20631223, 20631224, 20631225, 20631226, 20631227]
 
@@ -183,14 +183,14 @@ class Publish:
                         uid = str(random.choice(cls.uids_dev_up))
                     elif env == "dev" and job == "play":
                         uid = str(random.choice(cls.uids_dev_play))
-                    elif env == "dev" and job == "send_time":
-                        uid = str(random.choice(cls.uids_dev_send_time))
+                    elif env == "dev" and job == "recommend":
+                        uid = str(random.choice(cls.uids_dev_recommend))
                     elif env == "prod" and job == "up":
                         uid = str(random.choice(cls.uids_prod_up))
                     elif env == "prod" and job == "play":
                         uid = str(random.choice(cls.uids_prod_play))
-                    elif env == "prod" and job == "send_time":
-                        uid = str(random.choice(cls.uids_prod_send_time))
+                    elif env == "prod" and job == "recommend":
+                        uid = str(random.choice(cls.uids_prod_recommend))
                     data['loginUid'] = uid
                     # 单个视频文件夹下的所有视频文件
                     for fi in dir_files:

+ 0 - 233
main/kanyikan_recommend_sendtime.py

@@ -1,233 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2022/4/18
-"""
-下载并上传:发布时间榜
-规则:
-    1.基本规则:send_time_rule()
-    2.视频发布3日内,播放量大于2万(当前时间 - 发布时间 <= 3 天)
-"""
-import json
-import os
-import sys
-import time
-import requests
-import urllib3
-
-sys.path.append(os.getcwd())
-from main.common import Common
-from main.kanyikan_recommend_feeds import get_recommend_feeds
-from main.kanyikan_recommend_publish import Publish
-from main.feishu_lib import Feishu
-
-proxies = {"http": None, "https": None}
-
-
-class RecommendSendtime:
-
-    @staticmethod
-    def send_time_rule(send_time_width, send_time_height, send_time_duration, send_time_share_cnt):
-        """
-        1.分辨率,宽或者高 >= 720 or == 0
-        2.600s >= 时长 >= 60s
-        3.视频播放量 >= 0
-        """
-        if int(send_time_width) >= 720 or int(send_time_height) >= 720 \
-                or send_time_width == "0" or send_time_height == "0":
-            if int(send_time_duration) >= 40:
-                if int(send_time_share_cnt) > 0:
-                    return True
-                else:
-                    return False
-            else:
-                return False
-        else:
-            return False
-
-    @classmethod
-    def download_sendtime_video(cls, env):
-        """
-        视频发布3日内,播放量大于2万(当前时间 - 发布时间 <= 3 天)
-        :param env: 测试环境:dev;正式环境:prod
-        :return: 下载并上传视频
-        """
-        try:
-            recommend_feeds = Feishu.get_values_batch("recommend", "kanyikan", "SdCHOM")
-            for i in range(1, len(recommend_feeds)+1):
-                time.sleep(1)
-                sendtime_session = Common.get_session("recommend")
-                download_video_id = recommend_feeds[i][2]
-                download_video_title = recommend_feeds[i][3]
-                url = "https://search.weixin.qq.com/cgi-bin/recwxa/recwxagetonevideoinfo?"
-                param = {
-                    "session": sendtime_session,
-                    "vid": download_video_id,
-                    "wxaVersion": "3.9.2",
-                    "channelid": "208201",
-                    "scene": "32",
-                    "subscene": "1089",
-                    "model": "iPhone 11<iPhone12,1>14.7.1",
-                    "clientVersion": "8.0.18",
-                    "sharesearchid": "447665862521758270",
-                    "sharesource": "-1"
-                }
-                urllib3.disable_warnings()
-                r = requests.get(url=url, params=param, proxies=proxies, verify=False)
-                response = json.loads(r.content.decode("utf8"))
-                if "data" not in response:
-                    Common.logger("recommend").error("获取视频info时错误,删除该视频:{}", download_video_title)
-                    # 删除行或列,可选 ROWS、COLUMNS
-                    Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-                else:
-                    data = response["data"]
-                    v_duration = data["duration"]
-                    v_play_cnt_sendtime = data["played_cnt"]
-                    v_comment_cnt = data["comment_cnt"]
-                    v_liked_cnt = data["liked_cnt"]
-                    v_shared_cnt = data["shared_cnt"]
-                    v_width = data["width"]
-                    v_height = data["height"]
-                    v_resolution = str(v_width) + "*" + str(v_height)
-                    v_send_date = data["upload_time"]
-                    v_username = data["user_info"]["nickname"].strip().replace("\n", "")
-                    v_user_id = data["openid"]
-                    v_user_cover = data["user_info"]["headimg_url"]
-                    v_video_cover = data["cover_url"]
-                    if "items" not in data["play_info"]:
-                        if len(data["play_info"]) > 2:
-                            download_url_sendtime = data["play_info"][2]["play_url"]
-                        else:
-                            download_url_sendtime = data["play_info"][0]["play_url"]
-                    else:
-                        if len(data["play_info"]["items"]) > 2:
-                            download_url_sendtime = data["play_info"]["items"][2]["play_url"]
-                        else:
-                            download_url_sendtime = data["play_info"]["items"][0]["play_url"]
-
-                    Common.logger("recommend").info("正在判断第{}行,视频:{}", i, download_video_title)
-
-                    # 判断无效视频
-                    if download_video_id == "" \
-                            or download_video_id is None\
-                            and download_video_title == ""\
-                            or download_video_title is None\
-                            and v_duration == "" \
-                            and v_play_cnt_sendtime == ""\
-                            and v_send_date == ""\
-                            and v_user_cover == "" \
-                            and v_video_cover == ""\
-                            and download_url_sendtime == "":
-                        Common.logger("recommend").info("无效视频,删除该视频信息:{}", download_video_title)
-                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                        Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-                        return
-                    # 发布时间榜下载规则
-                    elif cls.send_time_rule(v_width, v_height, v_duration, v_play_cnt_sendtime) is False:
-                        Common.logger("recommend").info("不满足发布时间榜下载规则,删除该视频信息:{}", download_video_title)
-                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                        Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-                        return
-                    # 发布时间 <=7 天
-                    elif int(time.time()) - int(v_send_date) > 604800:
-                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                        Common.logger("recommend").info("视频发布时间大于7天:{}天;标题:{}",
-                                                        int((int(time.time()) - int(v_send_date)) / 86400),
-                                                        download_video_title)
-                        # 删除行或列,可选 ROWS、COLUMNS
-                        Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-                        return
-                    elif int(v_play_cnt_sendtime) < 20000:
-                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                        Common.logger("recommend").info("该视频7天播放量:{}<10000 ;不满足下载规则:{}",
-                                                        int(v_play_cnt_sendtime), download_video_title)
-                        # 删除行或列,可选 ROWS、COLUMNS
-                        Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-                        return
-                    elif download_video_id in [j for m in Feishu.get_values_batch(
-                            "recommend", "kanyikan", "20ce0c") for j in m]:
-                        Common.logger("recommend").info("视频已下载,删除该视频信息:{}", download_video_title)
-                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                        Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-                        return
-                    else:
-                        Common.logger("recommend").info("开始下载视频:{}", download_video_title)
-
-                        # 下载封面
-                        Common.download_method("recommend", "cover", download_video_title, v_video_cover)
-                        # 下载视频
-                        Common.download_method("recommend", "video", download_video_title, download_url_sendtime)
-                        # 保存视频信息到 "./files/{视频标题}/videoinfo.txt"
-                        with open(r"./videos/" + download_video_title +
-                                  "/" + "info.txt", "a", encoding="utf8") as f_a2:
-                            f_a2.write(str(download_video_id) + "\n" +
-                                       str(download_video_title) + "\n" +
-                                       str(v_duration) + "\n" +
-                                       str(v_play_cnt_sendtime) + "\n" +
-                                       str(v_comment_cnt) + "\n" +
-                                       str(v_liked_cnt) + "\n" +
-                                       str(v_shared_cnt) + "\n" +
-                                       str(v_resolution) + "\n" +
-                                       str(v_send_date) + "\n" +
-                                       str(v_username) + "\n" +
-                                       str(v_user_cover) + "\n" +
-                                       str(download_url_sendtime) + "\n" +
-                                       str(v_video_cover) + "\n" +
-                                       str(sendtime_session))
-                        Common.logger("recommend").info("==========视频信息已保存至info.txt==========")
-
-                        # 上传该视频
-                        Common.logger("recommend").info("开始上传视频:{}", download_video_title)
-                        Publish.upload_and_publish("recommend", env, "send_time")
-
-                        # 保存视频 ID 到云文档:
-                        # https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
-                        Common.logger("recommend").info("保存视频ID至云文档:{}", download_video_title)
-                        # 看一看+ ,视频ID工作表,插入首行
-                        Feishu.insert_columns("recommend", "kanyikan", "20ce0c", "ROWS", 1, 2)
-                        # 看一看+ ,视频ID工作表,首行写入数据
-                        upload_time = int(time.time())
-                        values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
-                                   "发布时间榜",
-                                   str(download_video_id),
-                                   str(download_video_title),
-                                   v_play_cnt_sendtime,
-                                   v_comment_cnt,
-                                   v_liked_cnt,
-                                   v_shared_cnt,
-                                   v_duration,
-                                   v_resolution,
-                                   time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(v_send_date)),
-                                   str(v_username),
-                                   str(v_user_id),
-                                   str(v_user_cover),
-                                   str(v_video_cover),
-                                   str(download_url_sendtime)]]
-                        time.sleep(1)
-                        Feishu.update_values("recommend", "kanyikan", "20ce0c", "A2:Q2", values)
-
-                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                        Common.logger("recommend").info("从云文档删除该视频信息:{}", download_video_title)
-                        # 删除行或列,可选 ROWS、COLUMNS
-                        Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-                        return
-
-        except Exception as e:
-            Common.logger("recommend").error("获取视频info异常:{}", e)
-
-    # 执行上传及下载
-    @classmethod
-    def run_download_sendtime_video(cls):
-        try:
-            while True:
-                if len(Feishu.get_values_batch("recommend", "kanyikan", "SdCHOM")) == 1:
-                    break
-                else:
-                    cls.download_sendtime_video("prod")
-        except Exception as e:
-            Common.logger("recommend").error("执行上传及下载异常:{}", e)
-
-
-if __name__ == "__main__":
-    download_sendtime = RecommendSendtime()
-    get_recommend_feeds("recommend")
-    download_sendtime.download_sendtime_video("dev")

+ 0 - 269
main/kanyikan_recommend_up.py

@@ -1,269 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2022/4/18
-
-"""
-下载并上传:上升榜视频
-规则:
-    1.满足基本规则
-    2.每隔一小时,检查视频播放量,>=1000,则下载及上传
-    3.超过 2 小时,则删除该视频信息
-"""
-
-import json
-import os
-import sys
-import time
-import requests
-import urllib3
-sys.path.append(os.getcwd())
-from main.common import Common
-from main.kanyikan_recommend_feeds import get_recommend_feeds
-from main.kanyikan_recommend_publish import Publish
-from main.feishu_lib import Feishu
-
-proxies = {"http": None, "https": None}
-
-
-class RecommendUp:
-
-    @staticmethod
-    def up_rule(up_width, up_height, up_duration, up_play_cnt):
-        """
-        1.分辨率,宽或者高 >= 720 or ==0
-        2.600s >= 时长 >= 60s
-        3.视频播放量 >= 0
-        """
-        if int(up_width) >= 720 or int(up_height) >= 720 or str(up_width) == "0" or str(up_height) == "0":
-            if int(up_duration) >= 40:
-                if int(up_play_cnt) >= 0:
-                    return True
-                else:
-                    return False
-            else:
-                return False
-        else:
-            return False
-
-    @classmethod
-    def download_up_video(cls, env):
-        """
-        1.从 kanyikan_feeds.txt 中获取 videoid
-        2.根据 videoid,从 videoinfo 接口,获取当前视频最新的信息
-        3.根据下载规则判断,符合规则进行下载:
-            1 更新视频 ID 到 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
-            2 视频信息写入文件 "./videos/{d_title}/info.txt"
-        4.上传完成:
-            1 删除该视频在 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM 中的信息
-        """
-        try:
-            recommend_feeds = Feishu.get_values_batch("recommend", "kanyikan", "SdCHOM")
-            for i in range(1, len(recommend_feeds)+1):
-                time.sleep(1)
-                video_info_session = Common.get_session("recommend")
-                download_time = recommend_feeds[i][0]  # 第一次获取该视频的时间
-                download_time = int(time.mktime(time.strptime(download_time, "%Y/%m/%d %H:%M:%S")))
-                download_video_id = recommend_feeds[i][2]  # 外网视频 ID
-                download_video_title = recommend_feeds[i][3]  # 视频标题
-                download_video_play_cnt = recommend_feeds[i][4]  # 播放量
-
-                url = "https://search.weixin.qq.com/cgi-bin/recwxa/recwxagetonevideoinfo?"
-                param = {
-                    "session": video_info_session,
-                    "vid": download_video_id,
-                    "wxaVersion": "3.9.2",
-                    "channelid": "208201",
-                    "scene": "32",
-                    "subscene": "1089",
-                    "model": "iPhone 11<iPhone12,1>14.7.1",
-                    "clientVersion": "8.0.18",
-                    "sharesearchid": "447665862521758270",
-                    "sharesource": "-1"
-                }
-                urllib3.disable_warnings()
-                r = requests.get(url=url, params=param, proxies=proxies, verify=False)
-                response = json.loads(r.content.decode("utf8"))
-                if "data" not in response:
-                    Common.logger("recommend").error("获取视频info时错误,删除该视频:{}", download_video_title)
-                    # 删除行或列,可选 ROWS、COLUMNS
-                    Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-                else:
-                    data = response["data"]
-                    v_duration = data["duration"]
-                    v_play_cnt_up = data["played_cnt"]
-                    v_comment_cnt = data["comment_cnt"]
-                    v_liked_cnt = data["liked_cnt"]
-                    v_shared_cnt = data["shared_cnt"]
-                    v_width = data["width"]
-                    v_height = data["height"]
-                    v_resolution = str(v_width) + "*" + str(v_height)
-                    v_send_date = data["upload_time"]
-                    v_username = data["user_info"]["nickname"].strip().replace("\n", "")
-                    v_user_id = data["openid"]
-                    v_user_cover = data["user_info"]["headimg_url"]
-                    v_video_cover = data["cover_url"]
-                    if "items" not in data["play_info"]:
-                        if len(data["play_info"]) > 2:
-                            download_url_up = data["play_info"][2]["play_url"]
-                        else:
-                            download_url_up = data["play_info"][0]["play_url"]
-                    else:
-                        if len(data["play_info"]["items"]) > 2:
-                            download_url_up = data["play_info"]["items"][2]["play_url"]
-                        else:
-                            download_url_up = data["play_info"]["items"][0]["play_url"]
-
-                    Common.logger("recommend").info("正在判断第{}行,视频:{}", i, download_video_title)
-
-                    # 判断无效视频
-                    if download_video_id == "" \
-                            or download_video_id is None\
-                            and download_video_title == ""\
-                            or download_video_title is None\
-                            and v_duration == "" \
-                            and v_play_cnt_up == ""\
-                            and v_send_date == ""\
-                            and v_user_cover == "" \
-                            and v_video_cover == ""\
-                            and download_url_up == "":
-                        Common.logger("recommend").info("无效视频,删除该视频信息:{}", download_video_title)
-                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                        Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-
-                    # 上升榜时长不足 1 小时
-                    elif int(time.time()) - int(download_time) < 3600:
-                        Common.logger("recommend").info("距上次获取该视频时间:{}分钟;{}",
-                                                        int((int(int(time.time()) - int(download_time))) / 60),
-                                                        download_video_title)
-
-                    # 上升榜时长超过 2 小时
-                    elif int(time.time()) - int(download_time) > 7200:
-                        Common.logger("recommend").info("距上次获取该视频时间:""{}分钟。超过2小时,删除该视频;标题:{}",
-                                                        int((int(time.time()) - int(download_time)) / 60),
-                                                        download_video_title)
-                        # 删除行或列,可选 ROWS、COLUMNS
-                        Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-
-                    # 上升榜下载规则
-                    elif cls.up_rule(v_width, v_height, v_duration, v_play_cnt_up) is False:
-                        Common.logger("recommend").info("不满足上升榜下载规则,删除视频:{}", download_video_title)
-                        # 删除行或列,可选 ROWS、COLUMNS
-                        Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-
-                    # 从已下载视频表中去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
-                    elif download_video_id in [j for m in Feishu.get_values_batch(
-                            "recommend", "kanyikan", "20ce0c") for j in m]:
-                        Common.logger("recommend").info("视频已下载,删除该视频信息:{}", download_video_title)
-                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                        Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-
-                    # 上升榜时长在 1-2 小时内,播放量增长>=1000
-                    elif (7200 >= int(time.time()) - int(download_time) >= 3600)\
-                            and (int(v_play_cnt_up) - int(download_video_play_cnt) >= 1000):
-                        Common.logger("recommend").info(
-                            "视频:{},在上升榜时间内的播放量{}>=1000,开始下载视频",
-                            download_video_title, int(v_play_cnt_up) - int(download_video_play_cnt))
-
-                        # 下载封面
-                        Common.download_method("recommend", "cover", download_video_title, v_video_cover)
-                        # 下载视频
-                        Common.download_method("recommend", "video", download_video_title, download_url_up)
-                        # 保存视频信息到 "./files/{视频标题}/videoinfo.txt"
-                        with open(r"./videos/" + download_video_title
-                                  + "/" + "info.txt", "a", encoding="utf8") as f_a2:
-                            f_a2.write(str(download_video_id) + "\n" +
-                                       str(download_video_title) + "\n" +
-                                       str(v_duration) + "\n" +
-                                       str(v_play_cnt_up) + "\n" +
-                                       str(v_comment_cnt) + "\n" +
-                                       str(v_liked_cnt) + "\n" +
-                                       str(v_shared_cnt) + "\n" +
-                                       str(v_resolution) + "\n" +
-                                       str(v_send_date) + "\n" +
-                                       str(v_username) + "\n" +
-                                       str(v_user_cover) + "\n" +
-                                       str(download_url_up) + "\n" +
-                                       str(v_video_cover) + "\n" +
-                                       str(video_info_session))
-                        Common.logger("recommend").info("==========视频信息已保存至info.txt==========")
-
-                        # 上传该视频
-                        Common.logger("recommend").info("开始上传视频:{}", download_video_title)
-                        Publish.upload_and_publish("recommend", env, "up")
-
-                        # 保存视频 ID 到云文档:
-                        # https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
-                        Common.logger("recommend").info("保存视频ID至云文档:{}", download_video_title)
-                        # 看一看+ ,视频ID工作表,插入首行
-                        Feishu.insert_columns("recommend", "kanyikan", "20ce0c", "ROWS", 1, 2)
-                        # 看一看+ ,视频ID工作表,首行写入数据
-                        upload_time = int(time.time())
-                        values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
-                                   "上升榜",
-                                   str(download_video_id),
-                                   str(download_video_title),
-                                   v_play_cnt_up,
-                                   v_comment_cnt,
-                                   v_liked_cnt,
-                                   v_shared_cnt,
-                                   v_duration,
-                                   v_resolution,
-                                   time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(v_send_date)),
-                                   str(v_username),
-                                   str(v_user_id),
-                                   str(v_user_cover),
-                                   str(v_video_cover),
-                                   str(download_url_up)]]
-                        time.sleep(1)
-                        Feishu.update_values("recommend", "kanyikan", "20ce0c", "A2:Q2", values)
-
-                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                        Common.logger("recommend").info("从云文档删除该视频信息:{}", download_video_title)
-                        # 删除行或列,可选 ROWS、COLUMNS
-                        Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-
-                    # 上升榜时长在 1-2 小时内,播放量增长<1000
-                    elif (7200 >= int(time.time()) - int(download_time) >= 3600)\
-                            and (int(v_play_cnt_up) - int(download_video_play_cnt) < 1000):
-                        # 删除之前保存的该视频信息,并把现在的信息保存进去
-                        Common.logger("recommend").info("该视频1小时内的播放量:{}<1000;更新该视频信息:{}",
-                                                        int(v_play_cnt_up) - int(download_video_play_cnt),
-                                                        download_video_title)
-                        # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-                        Common.logger("recommend").info("从云文档删除该视频信息:{}", download_video_title)
-                        # 删除行或列,可选 ROWS、COLUMNS
-                        Feishu.dimension_range("recommend", "kanyikan", "SdCHOM", "ROWS", i + 1, i + 1)
-
-                        # 看一看+工作表,插入首行
-                        Feishu.insert_columns("recommend", "kanyikan", "SdCHOM", "ROWS", 1, 2)
-
-                        # 获取当前时间
-                        download_up_time = int(time.time())
-                        values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(download_up_time)),
-                                   "上升榜",
-                                   str(download_video_id),
-                                   str(download_video_title),
-                                   v_play_cnt_up,
-                                   v_comment_cnt,
-                                   v_liked_cnt,
-                                   v_shared_cnt,
-                                   v_duration,
-                                   v_resolution,
-                                   time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(v_send_date)),
-                                   str(v_username),
-                                   str(v_user_id),
-                                   str(v_user_cover),
-                                   str(v_video_cover),
-                                   str(download_url_up)]]
-                        time.sleep(1)
-                        Feishu.update_values("recommend", "kanyikan", "SdCHOM", "A2:Q2", values)
-
-        except Exception as e:
-            # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
-            Common.logger("recommend").error("获取视频info异常:{}", e)
-
-
-if __name__ == "__main__":
-    downloadup = RecommendUp()
-    get_recommend_feeds("recommend")
-    downloadup.download_up_video("dev")

+ 19 - 22
main/run_kanyikan_recommend.py

@@ -1,35 +1,32 @@
 # -*- coding: utf-8 -*-
 # @Author: wangkun
-# @Time: 2022/6/17
+# @Time: 2023/6/1
+import argparse
 import datetime
 import os
-import random
 import sys
 import time
 sys.path.append(os.getcwd())
 from main.common import Common
-from main.kanyikan_recommend_feeds import get_recommend_feeds
-from main.kanyikan_recommend_play_sendtime import RecommendPlaySendtime
+from main.kanyikan_recommend import Kanyikanrecommend
 
 
-class Main:
-    @classmethod
-    def prod_job(cls):
-        while True:
-            while True:
-                prod_job_time = datetime.datetime.now()
-                if 1 >= prod_job_time.hour >= 0:
-                    break
-                else:
-                    Common.logger("recommend").info("开始抓取看一看+推荐列表")
-                    get_recommend_feeds("recommend")
-                    RecommendPlaySendtime.run_download_play_sendtime("prod")
-                    Common.del_logs("recommend")
-                    Common.del_charles_files("recommend")
-                    time.sleep(random.randint(3, 5))
-                    break
+def main(log_type, crawler, env):
+    if 1 >= datetime.datetime.now().hour >= 0:
+        time.sleep(1)
+        pass
+    else:
+        Common.logger(log_type=log_type).info("开始抓取看一看+推荐列表")
+        Kanyikanrecommend.get_videoList(log_type=log_type, crawler=crawler, env=env)
+        Common.del_logs(log_type=log_type)
+        Common.del_charles_files(log_type=log_type)
+        Common.logger(log_type).info("抓取完一轮\n")
 
 
 if __name__ == "__main__":
-    main = Main()
-    main.prod_job()
+    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
+    parser.add_argument('--log_type', default="recommend")  ## 添加参数,注明参数类型
+    parser.add_argument('--crawler', default="kanyikan")  ## 添加参数
+    parser.add_argument('--env')  ## 添加参数
+    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
+    main(log_type=args.log_type, crawler=args.crawler, env=args.env)