пре 2 година · 766295af54
--- a/README.MD
+++ b/README.MD
@@ -62,85 +62,6 @@ ps aux | grep run_youtube
 
															 ps aux | grep run_youtube | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															 ```
														
 
															-
														
 
															-#### 西瓜视频
														
 
															-```commandline
														
 
															-阿里云 102 服务器
														
 
															-西瓜定向: sh ./main/main.sh ./xigua/xigua_main/run_xigua_follow.py --log_type="follow" --crawler="xigua" --strategy="定向爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" xigua/nohup.log
														
 
															-西瓜推荐: /usr/bin/sh ./main/scheduling_main.sh ./xigua/xigua_main/run_xigua_recommend.py --log_type="recommend" --crawler="xigua" --env="prod" xigua/logs/nohup-recommend.log
														
 
															-本机
														
 
															-西瓜定向: sh ./main/main.sh ./xigua/xigua_main/run_xigua_follow.py --log_type="follow" --crawler="xigua" --strategy="定向爬虫策略" --oss_endpoint="out" --env="prod" --machine="local" xigua/nohup.log
														
 
															-西瓜推荐: sh ./main/scheduling_main.sh ./xigua/xigua_main/run_xigua_recommend.py --log_type="recommend" --crawler="xigua" --env="dev" xigua/logs/nohup-recommend.log
														
 
															-西瓜搜索: sh main/scheduling_main.sh ./xigua/xigua_main/run_xigua_search_new.py --log_type="search" --crawler="xigua" --env="dev" xigua/logs/search-shell.log
														
 
															-杀进程命令：
														
 
															-ps aux | grep run_xigua
														
 
															-ps aux | grep run_xigua | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															-ps aux | grep run_xigua_follow | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															-ps aux | grep run_xigua_recommend | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															-ps aux | grep run_xigua_search | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															-```
														
 
															-
														
 
															-#### 快手
														
 
															-```commandline
														
 
															-阿里云 102 服务器
														
 
															-sh ./main/main.sh ./kuaishou/kuaishou_main/run_kuaishou_recommend.py --log_type="recommend" --crawler="kuaishou" --strategy="推荐爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" kuaishou/recommend.log
														
 
															-sh ./main/main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="follow" --crawler="kuaishou" --strategy="定向爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" kuaishou/follow.log
														
 
															-# sh ./main/main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="follow" --crawler="kuaishou" --strategy="定向爬虫策略" --env="prod" --machine="aliyun" kuaishou/nohup.log
														
 
															-本机
														
 
															-sh ./main/main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="follow" --crawler="kuaishou" --strategy="定向爬虫策略" --oss_endpoint="out" --env="dev" --machine="local" kuaishou/nohup.log
														
 
															-# sh ./main/main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="follow" --crawler="kuaishou" --strategy="定向爬虫策略" --env="dev" --machine="local" kuaishou/nohup.log
														
 
															-macpro
														
 
															-sh ./main/main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="follow" --crawler="kuaishou" --strategy="定向爬虫策略" --oss_endpoint="out" --env="prod" --machine="macpro" kuaishou/nohup.log
														
 
															-# sh ./main/main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="follow" --crawler="kuaishou" --strategy="定向爬虫策略" --env="prod" --machine="macpro" kuaishou/nohup.log
														
 
															-杀进程命令：
														
 
															-ps aux | grep run_kuaishou
														
 
															-ps aux | grep run_kuaishou | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															-ps aux | grep Appium.app | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															-```
														
 
															-
														
 
															-#### 小年糕
														
 
															-```commandline
														
 
															-阿里云 102 服务器
														
 
															-定向爬虫策略: /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_follow.py --log_type="follow" --crawler="xiaoniangao" --env="prod"  xiaoniangao/nohup-follow.log
														
 
															-小时榜爬虫策略: /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour.py --log_type="hour" --crawler="xiaoniangao" --env="prod" xiaoniangao/nohup-hour.log
														
 
															-播放量榜爬虫策略: /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_play.py --log_type="play" --crawler="xiaoniangao" --env="prod" xiaoniangao/nohup-play.log
														
 
															-
														
 
															-线下调试
														
 
															-定向爬虫策略: sh main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_follow.py --log_type="follow" --crawler="xiaoniangao" --env="dev" xiaoniangao/logs/nohup-follow.log
														
 
															-小时榜爬虫策略: sh main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour.py --log_type="hour" --crawler="xiaoniangao" --env="dev" xiaoniangao/logs/nohup-hour.log
														
 
															-播放量榜爬虫策略: sh main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_play.py --log_type="play" --crawler="xiaoniangao" --env="dev" xiaoniangao/logs/nohup-play.log
														
 
															-
														
 
															-nohup python3 -u xiaoniangao/xiaoniangao_follow/insert_video_1.py >> xiaoniangao/nohup-1.log 2>&1 &
														
 
															-nohup python3 -u xiaoniangao/xiaoniangao_follow/insert_video_2.py >> xiaoniangao/nohup-1.log 2>&1 &
														
 
															-nohup python3 -u xiaoniangao/xiaoniangao_follow/insert_video_3.py >> xiaoniangao/nohup-1.log 2>&1 &
														
 
															-
														
 
															-杀进程命令
														
 
															-ps aux | grep run_xiaoniangao_follow
														
 
															-ps aux | grep run_xiaoniangao_hour
														
 
															-ps aux | grep run_xiaoniangao_play
														
 
															-ps aux | grep run_xiaoniangao | grep -v grep | awk '{print $2}' | xargs kill -9 
														
 
															-ps aux | grep run_xiaoniangao_follow | grep -v grep | awk '{print $2}' | xargs kill -9 
														
 
															-ps aux | grep run_xiaoniangao_hour | grep -v grep | awk '{print $2}' | xargs kill -9 
														
 
															-ps aux | grep run_xiaoniangao_play | grep -v grep | awk '{print $2}' | xargs kill -9 
														
 
															-```
														
 
															-
														
 
															-#### 公众号
														
 
															-```commandline
														
 
															-阿里云 102 服务器
														
 
															-定向爬虫策略: 
														
 
															-/usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py --log_type="follow" --crawler="gongzhonghao" --env="prod"  gongzhonghao/nohup-follow.log
														
 
															-/usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow-2" --crawler="gongzhonghao" --env="prod"  gongzhonghao/nohup-follow-2.log
														
 
															-/usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_3.py --log_type="follow-3" --crawler="gongzhonghao" --env="prod"  gongzhonghao/nohup-follow-3.log
														
 
															-线下调试
														
 
															-定向爬虫策略: 
														
 
															-sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py --log_type="follow" --crawler="gongzhonghao" --env="dev" gongzhonghao/nohup-follow.log
														
 
															-sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow-2" --crawler="gongzhonghao" --env="dev" gongzhonghao/nohup-follow-2.log
														
 
															-sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_3.py --log_type="follow-3" --crawler="gongzhonghao" --env="dev" gongzhonghao/nohup-follow-3.log
														
 
															-杀进程命令
														
 
															-ps aux | grep run_gongzhonghao
														
 
															-ps aux | grep run_gongzhonghao | grep -v grep | awk '{print $2}' | xargs kill -9 
														
 
															-```
														
 
															-
														
 
															 #### 微信指数
														
 
															 ```commandline
														
 
															 获取站外标题, crontab定时脚本, 每天 12:00:00 点运行一次
														
@@ -165,43 +86,6 @@ ps aux | grep 微信 | grep -v grep | awk '{print $2}' | xargs kill -9
 
															 ```
														
 
															-#### 抖音
														
 
															-```commandline
														
 
															-阿里云 102 服务器
														
 
															-sh ./main/main.sh ./douyin/douyin_main/run_douyin_recommend.py --log_type="recommend" --crawler="douyin" --strategy="推荐爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" douyin/recommend.log
														
 
															-# sh ./main/main.sh ./kuaishou/douyin_main/run_douyin_recommend.py --log_type="recommend" --crawler="douyin" --strategy="定向爬策策略" --env="prod" --machine="aliyun" kuaishou/nohup.log
														
 
															-本机
														
 
															-
														
 
															-#### 爬虫进程监测
														
 
															-```commandline
														
 
															-阿里云 102 服务器：/usr/bin/sh /data5/piaoquan_crawler/main/process.sh "prod"
														
 
															-香港 服务器:/usr/bin/sh /root/piaoquan_crawler/main/process.sh "hk"
														
 
															-线下调试：sh /Users/wangkun/Desktop/crawler/piaoquan_crawler/main/process.sh "dev"
														
 
															-```
														
 
															-
														
 
															-
														
 
															-#### 本山祝福小程序
														
 
															-```commandline
														
 
															-阿里云 102 服务器
														
 
															-/usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./benshanzhufu/benshanzhufu_main/run_benshanzhufu_recommend.py --log_type="recommend" --crawler="benshanzhufu" --env="prod"  ./benshanzhufu/logs/nohup-recommend.log
														
 
															-线下调试
														
 
															-sh ./main/scheduling_main.sh ./benshanzhufu/benshanzhufu_main/run_benshanzhufu_recommend.py --log_type="recommend" --crawler="benshanzhufu" --env="dev"  ./benshanzhufu/logs/nohup-recommend.log
														
 
															-检测进程
														
 
															-ps aux | grep run_benshanzhufu
														
 
															-ps aux | grep run_benshanzhufu | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															-```
														
 
															-
														
 
															-#### 岁岁年年迎福气小程序
														
 
															-```commandline
														
 
															-阿里云 102 服务器
														
 
															-/usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./suisuiniannianyingfuqi/suisuiniannianyingfuqi_main/run_suisuiniannianyingfuqi_recommend.py --log_type="recommend" --crawler="suisuiniannianyingfuqi" --env="prod"  ./suisuiniannianyingfuqi/logs/nohup-recommend.log
														
 
															-线下调试
														
 
															-sh ./main/scheduling_main.sh ./suisuiniannianyingfuqi/suisuiniannianyingfuqi_main/run_suisuiniannianyingfuqi_recommend.py --log_type="recommend" --crawler="suisuiniannianyingfuqi" --env="dev"  ./suisuiniannianyingfuqi/logs/nohup-recommend.log
														
 
															-检测进程
														
 
															-ps aux | grep run_suisuiniannianyingfuqi
														
 
															-ps aux | grep run_suisuiniannianyingfuqi | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															-```
														
 
															-
														
 
															 #### 线下爬虫: 刚刚都传 / 吉祥幸福 / 知青天天看 / 众妙音信 / wechat_search_key
														
 
															 ```commandline
														
 
															 MacAir 设备, crontab定时任务
														
@@ -228,29 +112,6 @@ ps aux | grep shipinhao_search
 
															 ps aux | grep shipinhao_search | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															 ```
														
 
															-#### 爬虫进程监控: main/process.sh
														
 
															-```commandline
														
 
															-102 服务器: 
														
 
															-* * * * * /usr/bin/sh /data5/piaoquan_crawler/main/process.sh "prod"  >>/data5/piaoquan_crawler/main/main_logs/run-process.log 2>&1
														
 
															-线下调试: 
														
 
															-sh main/process.sh "dev" >> main/main_logs/run-process.log 2>&1
														
 
															-进程监控
														
 
															-ps aux | grep search_key_mac | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															-ps aux | grep gongzhonghao | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															-ps aux | grep xiaoniangao | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															-ps aux | grep run_xigua_search | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															-ps aux | grep run_suisuiniannianyingfuqi | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															-ps aux | grep run_benshanzhufu | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															-ps aux | grep run_kuaishou | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															-ps aux | grep run_gongzhonghao | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															-ps aux | grep run_shipinhao | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															-ps aux | grep Appium.app | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															-ps aux | grep kuaishou | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															-ps aux | grep xigua_search | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															-ps aux | grep kanyikan | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															-ps aux | grep shipinhao_search | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															-```
														
 
															-
														
 
															 #### 调用MQ的爬虫进程守护: main/process_mq.sh
														
 
															 ```commandline
														
--- a/benshanzhufu/benshanzhufu_main/run_benshanzhufu_recommend.py
+++ b/benshanzhufu/benshanzhufu_main/run_benshanzhufu_recommend.py
@@ -1,28 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/4/13
														
 
															-import argparse
														
 
															-import os
														
 
															-import sys
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.common import Common
														
 
															-from benshanzhufu.benshanzhufu_recommend.benshanzhufu_recommend import BenshanzhufuRecommend
														
 
															-
														
 
															-def main(log_type, crawler, env):
														
 
															-    if env == "dev":
														
 
															-        oss_endpoint = "out"
														
 
															-    else:
														
 
															-        oss_endpoint = "inner"
														
 
															-    Common.logger(log_type, crawler).info('开始抓取 本山祝福小程序\n')
														
 
															-    BenshanzhufuRecommend.get_videoList(log_type, crawler, oss_endpoint, env)
														
 
															-    Common.del_logs(log_type, crawler)
														
 
															-    Common.logger(log_type, crawler).info('抓取完一轮，休眠 1 分钟\n')
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
														
 
															-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
														
 
															-    parser.add_argument('--crawler')  ## 添加参数
														
 
															-    parser.add_argument('--env')  ## 添加参数
														
 
															-    args = parser.parse_args()  ### 参数赋值，也可以通过终端赋值
														
 
															-    main(log_type=args.log_type, crawler=args.crawler, env=args.env)
														
--- a/benshanzhufu/benshanzhufu_main/run_benshanzhufu_recommend_scheduling.py
+++ b/benshanzhufu/benshanzhufu_main/run_benshanzhufu_recommend_scheduling.py
@@ -1,48 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/4/13
														
 
															-import argparse
														
 
															-import os
														
 
															-import random
														
 
															-import sys
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.common import Common
														
 
															-from common.public import task_fun
														
 
															-from common.scheduling_db import MysqlHelper
														
 
															-from benshanzhufu.benshanzhufu_recommend.benshanzhufu_recommend_scheduling import BenshanzhufuRecommend
														
 
															-
														
 
															-
														
 
															-def main(log_type, crawler, task, env):
														
 
															-    task_dict = task_fun(task)['task_dict']
														
 
															-    rule_dict = task_fun(task)['rule_dict']
														
 
															-    task_id = task_dict['task_id']
														
 
															-    select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
														
 
															-    user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
														
 
															-    our_uid_list = []
														
 
															-    for user in user_list:
														
 
															-        our_uid_list.append(user["uid"])
														
 
															-    our_uid = random.choice(our_uid_list)
														
 
															-    Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
														
 
															-    Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
														
 
															-    Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
														
 
															-    Common.logger(log_type, crawler).info('开始抓取 本山祝福小程序\n')
														
 
															-    BenshanzhufuRecommend.get_videoList(log_type=log_type,
														
 
															-                                        crawler=crawler,
														
 
															-                                        our_uid=our_uid,
														
 
															-                                        rule_dict=rule_dict,
														
 
															-                                        env=env)
														
 
															-    Common.del_logs(log_type, crawler)
														
 
															-    Common.logger(log_type, crawler).info('抓取完一轮\n')
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
														
 
															-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
														
 
															-    parser.add_argument('--crawler')  ## 添加参数
														
 
															-    parser.add_argument('--task')  ## 添加参数
														
 
															-    parser.add_argument('--env')  ## 添加参数
														
 
															-    args = parser.parse_args()  ### 参数赋值，也可以通过终端赋值
														
 
															-    main(log_type=args.log_type,
														
 
															-         crawler=args.crawler,
														
 
															-         task=args.task,
														
 
															-         env=args.env)
														
--- a/benshanzhufu/benshanzhufu_recommend/benshanzhufu_recommend.py
+++ b/benshanzhufu/benshanzhufu_recommend/benshanzhufu_recommend.py
@@ -1,272 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/4/13
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2022/4/25
														
 
															-import json
														
 
															-import os
														
 
															-import random
														
 
															-import shutil
														
 
															-import sys
														
 
															-import time
														
 
															-from hashlib import md5
														
 
															-from urllib import parse
														
 
															-import requests
														
 
															-import urllib3
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.common import Common
														
 
															-from common.scheduling_db import MysqlHelper
														
 
															-from common.feishu import Feishu
														
 
															-from common.publish import Publish
														
 
															-proxies = {"http": None, "https": None}
														
 
															-
														
 
															-
														
 
															-class BenshanzhufuRecommend:
														
 
															-    # 翻页参数
														
 
															-    visitor_key = ""
														
 
															-    page = 1
														
 
															-    platform = "本山祝福"
														
 
															-
														
 
															-    # 过滤词库
														
 
															-    @classmethod
														
 
															-    def benshanzhufu_config(cls, log_type, crawler, text, env):
														
 
															-        select_sql = f"""select * from crawler_config where source="benshanzhufu" """
														
 
															-        contents = MysqlHelper.get_values(log_type, crawler, select_sql, env, action='')
														
 
															-        title_list = []
														
 
															-        filter_list = []
														
 
															-        for content in contents:
														
 
															-            config = content['config']
														
 
															-            config_dict = eval(config)
														
 
															-            for k, v in config_dict.items():
														
 
															-                if k == "title":
														
 
															-                    title_list_config = v.split(",")
														
 
															-                    for title in title_list_config:
														
 
															-                        title_list.append(title)
														
 
															-                if k == "filter":
														
 
															-                    filter_list_config = v.split(",")
														
 
															-                    for filter_word in filter_list_config:
														
 
															-                        filter_list.append(filter_word)
														
 
															-        if text == "title":
														
 
															-            return title_list
														
 
															-        elif text == "filter":
														
 
															-            return filter_list
														
 
															-
														
 
															-    @classmethod
														
 
															-    def repeat_video(cls, log_type, crawler, video_id, env):
														
 
															-        sql = f""" select * from crawler_video where platform="本山祝福" and out_video_id="{video_id}"; """
														
 
															-        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
														
 
															-        return len(repeat_video)
														
 
															-
														
 
															-    # 推荐列表获取视频
														
 
															-    @classmethod
														
 
															-    def get_videoList(cls, log_type, crawler, oss_endpoint, env):
														
 
															-        while True:
														
 
															-            now = int(time.time() * 1000)
														
 
															-            url = "https://bszf.wentingyou.cn/index.php/v111/index/index?parameter="
														
 
															-            header = {
														
 
															-                "content-time": str(now),
														
 
															-                # "visitorKey": "165086930003741",
														
 
															-                "chatKey": "wx0fb8149da961d3b0",
														
 
															-                "cache-time": str(now),
														
 
															-                "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) "
														
 
															-                              "AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 "
														
 
															-                              "MicroMessenger/8.0.20(0x1800142d) NetType/WIFI Language/zh_CN",
														
 
															-                "Referer": "https://servicewechat.com/wx0fb8149da961d3b0/2/page-frame.html"
														
 
															-            }
														
 
															-            parameter = {
														
 
															-                "page": random.randint(1, 76),
														
 
															-                "ini_id": cls.visitor_key
														
 
															-            }
														
 
															-            params = parse.quote(json.dumps(parameter))
														
 
															-            url = url + str(params)
														
 
															-            # try:
														
 
															-            urllib3.disable_warnings()
														
 
															-            r = requests.get(headers=header, url=url, proxies=proxies, verify=False)
														
 
															-            if r.status_code != 200:
														
 
															-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.status_code}, {r.text}\n")
														
 
															-                cls.visitor_key = ""
														
 
															-                cls.page = 1
														
 
															-                return
														
 
															-            elif r.json()['message'] != "list success":
														
 
															-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.status_code}, {r.json()}\n")
														
 
															-                cls.visitor_key = ""
														
 
															-                cls.page = 1
														
 
															-                return
														
 
															-            elif "data" not in r.json():
														
 
															-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.status_code}, {r.json()}\n")
														
 
															-                cls.visitor_key = ""
														
 
															-                cls.page = 1
														
 
															-                return
														
 
															-            elif len(r.json()['data']["list"]) == 0:
														
 
															-                Common.logger(log_type, crawler).info(f"没有更多数据了~ {r.json()}\n")
														
 
															-                cls.visitor_key = ""
														
 
															-                cls.page = 1
														
 
															-                return
														
 
															-            else:
														
 
															-                # 翻页
														
 
															-                cls.visitor_key = r.json()["data"]["visitor_key"]
														
 
															-                cls.page += 1
														
 
															-                feeds = r.json()["data"]["list"]
														
 
															-                for i in range(len(feeds)):
														
 
															-                    video_title = feeds[i].get("title", "").strip().replace("\n", "")\
														
 
															-                            .replace("/", "").replace("本山祝福", "").replace(" ", "")\
														
 
															-                            .replace(" ", "").replace("&NBSP", "").replace("\r", "")\
														
 
															-                            .replace("#", "").replace(".", "。").replace("\\", "")\
														
 
															-                            .replace(":", "").replace("*", "").replace("？", "")\
														
 
															-                            .replace("?", "").replace('"', "").replace("<", "")\
														
 
															-                            .replace(">", "").replace("|", "").replace("'", "").replace('"', "")
														
 
															-                    video_id = str(feeds[i].get("nid", ""))
														
 
															-                    play_cnt = 0
														
 
															-                    comment_cnt = feeds[i].get("commentCount", 0)
														
 
															-                    share_cnt = 0
														
 
															-                    like_cnt = 0
														
 
															-                    publish_time_stamp = feeds[i].get("update_time", 0)
														
 
															-                    publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
														
 
															-                    user_name = "本山祝福"
														
 
															-                    user_id = "benshanzhufu"
														
 
															-                    cover_url = feeds[i].get("video_cover", "")
														
 
															-                    video_url = feeds[i].get("video_url", "")
														
 
															-                    if ".mp4" not in video_url:
														
 
															-                        video_url = ""
														
 
															-
														
 
															-                    video_dict = {
														
 
															-                        'video_title': video_title,
														
 
															-                        'video_id': video_id,
														
 
															-                        'play_cnt': play_cnt,
														
 
															-                        'comment_cnt': comment_cnt,
														
 
															-                        'like_cnt': like_cnt,
														
 
															-                        'share_cnt': share_cnt,
														
 
															-                        'publish_time_stamp': publish_time_stamp,
														
 
															-                        'publish_time_str': publish_time_str,
														
 
															-                        'user_name': user_name,
														
 
															-                        'user_id': user_id,
														
 
															-                        'avatar_url': cover_url,
														
 
															-                        'cover_url': cover_url,
														
 
															-                        'video_url': video_url,
														
 
															-                        'session': f"benshanzhufu-{int(time.time())}"
														
 
															-                    }
														
 
															-                    for k, v in video_dict.items():
														
 
															-                        Common.logger(log_type, crawler).info(f"{k}:{v}")
														
 
															-
														
 
															-                    # 过滤无效视频
														
 
															-                    if video_id == "" or cover_url == "" or video_url == "":
														
 
															-                        Common.logger(log_type, crawler).info("无效视频\n")
														
 
															-                    elif any(str(word) if str(word) in video_title else False for word in cls.benshanzhufu_config(log_type, crawler, "filter", env)) is True:
														
 
															-                        Common.logger(log_type, crawler).info('已中过滤词\n')
														
 
															-                    elif cls.repeat_video(log_type, crawler, video_id, env) != 0:
														
 
															-                        Common.logger(log_type, crawler).info('视频已下载\n')
														
 
															-                    else:
														
 
															-                        cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
														
 
															-            # except Exception as e:
														
 
															-            #     Common.logger(log_type, crawler).error(f"get_videoList异常:{e}\n")
														
 
															-
														
 
															-    # 下载 / 上传
														
 
															-    @classmethod
														
 
															-    def download_publish(cls, log_type, crawler, video_dict, oss_endpoint, env):
														
 
															-        # try:
														
 
															-        # 下载视频
														
 
															-        Common.download_method(log_type=log_type, crawler=crawler, text='video', title=video_dict['video_title'], url=video_dict['video_url'])
														
 
															-        ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
														
 
															-        if ffmpeg_dict is None:
														
 
															-            md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
														
 
															-            shutil.rmtree(f"./{crawler}/videos/{md_title}/")
														
 
															-            Common.logger(log_type, crawler).info("视频size=0，删除成功\n")
														
 
															-            return
														
 
															-        video_dict["duration"] = ffmpeg_dict["duration"]
														
 
															-        video_dict["video_width"] = ffmpeg_dict["width"]
														
 
															-        video_dict["video_height"] = ffmpeg_dict["height"]
														
 
															-
														
 
															-        # 下载封面
														
 
															-        Common.download_method(log_type=log_type, crawler=crawler, text='cover', title=video_dict['video_title'], url=video_dict['cover_url'])
														
 
															-        # 保存视频信息至txt
														
 
															-        Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
														
 
															-
														
 
															-        # 上传视频
														
 
															-        Common.logger(log_type, crawler).info("开始上传视频...")
														
 
															-        our_video_id = Publish.upload_and_publish(log_type=log_type,
														
 
															-                                                  crawler=crawler,
														
 
															-                                                  strategy="推荐榜爬虫策略",
														
 
															-                                                  our_uid="recommend",
														
 
															-                                                  env=env,
														
 
															-                                                  oss_endpoint=oss_endpoint)
														
 
															-        if env == 'dev':
														
 
															-            our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
														
 
															-        else:
														
 
															-            our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
														
 
															-        Common.logger(log_type, crawler).info("视频上传完成")
														
 
															-
														
 
															-        if our_video_id is None:
														
 
															-            # 删除视频文件夹
														
 
															-            shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
														
 
															-            return
														
 
															-
														
 
															-        # 视频写入飞书
														
 
															-        Feishu.insert_columns(log_type, crawler, "440018", "ROWS", 1, 2)
														
 
															-        upload_time = int(time.time())
														
 
															-        values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
														
 
															-                   "推荐榜爬虫策略",
														
 
															-                   video_dict['video_id'],
														
 
															-                   video_dict['video_title'],
														
 
															-                   our_video_link,
														
 
															-                   video_dict['play_cnt'],
														
 
															-                   video_dict['comment_cnt'],
														
 
															-                   video_dict['like_cnt'],
														
 
															-                   video_dict['share_cnt'],
														
 
															-                   video_dict['duration'],
														
 
															-                   f"{video_dict['video_width']}*{video_dict['video_height']}",
														
 
															-                   video_dict['publish_time_str'],
														
 
															-                   video_dict['user_name'],
														
 
															-                   video_dict['user_id'],
														
 
															-                   video_dict['avatar_url'],
														
 
															-                   video_dict['cover_url'],
														
 
															-                   video_dict['video_url']]]
														
 
															-        time.sleep(0.5)
														
 
															-        Feishu.update_values(log_type, crawler, "440018", "E2:Z2", values)
														
 
															-        Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
														
 
															-
														
 
															-        rule_dict = {}
														
 
															-        # 视频信息保存数据库
														
 
															-        insert_sql = f""" insert into crawler_video(video_id,
														
 
															-                                                out_user_id,
														
 
															-                                                platform,
														
 
															-                                                strategy,
														
 
															-                                                out_video_id,
														
 
															-                                                video_title,
														
 
															-                                                cover_url,
														
 
															-                                                video_url,
														
 
															-                                                duration,
														
 
															-                                                publish_time,
														
 
															-                                                play_cnt,
														
 
															-                                                crawler_rule,
														
 
															-                                                width,
														
 
															-                                                height)
														
 
															-                                                values({our_video_id},
														
 
															-                                                "{video_dict['user_id']}",
														
 
															-                                                "{cls.platform}",
														
 
															-                                                "推荐榜爬虫策略",
														
 
															-                                                "{video_dict['video_id']}",
														
 
															-                                                "{video_dict['video_title']}",
														
 
															-                                                "{video_dict['cover_url']}",
														
 
															-                                                "{video_dict['video_url']}",
														
 
															-                                                {int(video_dict['duration'])},
														
 
															-                                                "{video_dict['publish_time_str']}",
														
 
															-                                                {int(video_dict['play_cnt'])},
														
 
															-                                                '{json.dumps(rule_dict)}',
														
 
															-                                                {int(video_dict['video_width'])},
														
 
															-                                                {int(video_dict['video_height'])}) """
														
 
															-        Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
														
 
															-        MysqlHelper.update_values(log_type, crawler, insert_sql, env, action='')
														
 
															-        Common.logger(log_type, crawler).info('视频信息插入数据库成功！\n')
														
 
															-
														
 
															-        # except Exception as e:
														
 
															-        #     Common.logger(log_type, crawler).error(f"download_publish异常:{e}\n")
														
 
															-        #     # 删除视频文件夹
														
 
															-        #     shutil.rmtree(f"./{crawler}/videos/")
														
 
															-        #     return
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    BenshanzhufuRecommend.get_videoList("recommend", "benshanzhufu", "out", "dev")
														
 
															-
														
 
															-    pass
														
--- a/common/public.py
+++ b/common/public.py
@@ -232,11 +232,9 @@ def get_title_score(log_type, crawler, stop_sheet, score_sheet, title):
 
															                     stop_word_list.append(y)
														
 
															         break
														
 
															-    # 将文本分词
														
 
															-    cut_list = jieba.lcut(title)
														
 
															-
														
 
															-    # 生成分词列表
														
 
															+    # 文本分词
														
 
															     cut_word_list = []
														
 
															+    cut_list = jieba.lcut(title)
														
 
															     for cut_item in cut_list:
														
 
															         if cut_item == " ":
														
 
															             continue
														
--- a/kuaishou/kuaishou_follow/__init__.py
+++ b/kuaishou/kuaishou_follow/__init__.py
@@ -1,3 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/2/23
														
--- a/kuaishou/kuaishou_follow/kuaishou_follow.py
+++ b/kuaishou/kuaishou_follow/kuaishou_follow.py
@@ -1,659 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/2/24
														
 
															-import os
														
 
															-# import random
														
 
															-import shutil
														
 
															-# import string
														
 
															-import sys
														
 
															-import time
														
 
															-from hashlib import md5
														
 
															-
														
 
															-import requests
														
 
															-import json
														
 
															-
														
 
															-import urllib3
														
 
															-from requests.adapters import HTTPAdapter
														
 
															-
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.common import Common
														
 
															-from common.feishu import Feishu
														
 
															-from common.getuser import getUser
														
 
															-from common.db import MysqlHelper
														
 
															-from common.publish import Publish
														
 
															-from common.public import random_title, get_config_from_mysql
														
 
															-from common.public import get_user_from_mysql
														
 
															-
														
 
															-
														
 
															-class KuaiShouFollow:
														
 
															-    platform = "快手"
														
 
															-    tag = "快手爬虫,定向爬虫策略"
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_rule(cls, log_type, crawler, index):
														
 
															-        try:
														
 
															-            rule_sheet = Feishu.get_values_batch(log_type, crawler, "3iqG4z")
														
 
															-            if index == 1:
														
 
															-                rule_dict = {
														
 
															-                    "play_cnt": f"{rule_sheet[1][1]}{rule_sheet[1][2]}",
														
 
															-                    "video_width": f"{rule_sheet[2][1]}{rule_sheet[2][2]}",
														
 
															-                    "video_height": f"{rule_sheet[3][1]}{rule_sheet[3][2]}",
														
 
															-                    "like_cnt": f"{rule_sheet[4][1]}{rule_sheet[4][2]}",
														
 
															-                    "duration": f"{rule_sheet[5][1]}{rule_sheet[5][2]}",
														
 
															-                    "download_cnt": f"{rule_sheet[6][1]}{rule_sheet[6][2]}",
														
 
															-                    "publish_time": f"{rule_sheet[7][1]}{rule_sheet[7][2]}",
														
 
															-                }
														
 
															-                # for k, v in rule_dict.items():
														
 
															-                #     Common.logger(log_type, crawler).info(f"{k}:{v}")
														
 
															-                return rule_dict
														
 
															-            elif index == 2:
														
 
															-                rule_dict = {
														
 
															-                    "play_cnt": f"{rule_sheet[9][1]}{rule_sheet[9][2]}",
														
 
															-                    "video_width": f"{rule_sheet[10][1]}{rule_sheet[10][2]}",
														
 
															-                    "video_height": f"{rule_sheet[11][1]}{rule_sheet[11][2]}",
														
 
															-                    "like_cnt": f"{rule_sheet[12][1]}{rule_sheet[12][2]}",
														
 
															-                    "duration": f"{rule_sheet[13][1]}{rule_sheet[13][2]}",
														
 
															-                    "download_cnt": f"{rule_sheet[14][1]}{rule_sheet[14][2]}",
														
 
															-                    "publish_time": f"{rule_sheet[15][1]}{rule_sheet[15][2]}",
														
 
															-                }
														
 
															-                # for k, v in rule_dict.items():
														
 
															-                #     Common.logger(log_type, crawler).info(f"{k}:{v}")
														
 
															-                return rule_dict
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f"get_rule:{e}\n")
														
 
															-
														
 
															-    @classmethod
														
 
															-    def download_rule(cls, video_dict, rule_dict):
														
 
															-        if eval(f"{video_dict['play_cnt']}{rule_dict['play_cnt']}") is True \
														
 
															-                and eval(f"{video_dict['video_width']}{rule_dict['video_width']}") is True \
														
 
															-                and eval(f"{video_dict['video_height']}{rule_dict['video_height']}") is True \
														
 
															-                and eval(f"{video_dict['like_cnt']}{rule_dict['like_cnt']}") is True \
														
 
															-                and eval(f"{video_dict['duration']}{rule_dict['duration']}") is True \
														
 
															-                and eval(f"{video_dict['publish_time']}{rule_dict['publish_time']}") is True:
														
 
															-            return True
														
 
															-        else:
														
 
															-            return False
														
 
															-
														
 
															-    # 过滤词库
														
 
															-    @classmethod
														
 
															-    def filter_words(cls, log_type, crawler):
														
 
															-        try:
														
 
															-            while True:
														
 
															-                filter_words_sheet = Feishu.get_values_batch(log_type, crawler, 'HIKVvs')
														
 
															-                if filter_words_sheet is None:
														
 
															-                    Common.logger(log_type, crawler).warning(f"filter_words_sheet:{filter_words_sheet} 10秒钟后重试")
														
 
															-                    continue
														
 
															-                filter_words_list = []
														
 
															-                for x in filter_words_sheet:
														
 
															-                    for y in x:
														
 
															-                        if y is None:
														
 
															-                            pass
														
 
															-                        else:
														
 
															-                            filter_words_list.append(y)
														
 
															-                return filter_words_list
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
														
 
															-
														
 
															-    # 获取站外用户信息
														
 
															-    @classmethod
														
 
															-    def get_out_user_info(cls, log_type, crawler, out_uid):
														
 
															-        try:
														
 
															-            url = "https://www.kuaishou.com/graphql"
														
 
															-
														
 
															-            payload = json.dumps({
														
 
															-                "operationName": "visionProfile",
														
 
															-                "variables": {
														
 
															-                    "userId": out_uid
														
 
															-                },
														
 
															-                "query": "query visionProfile($userId: String) {\n  visionProfile(userId: $userId) {\n    result\n    hostName\n    userProfile {\n      ownerCount {\n        fan\n        photo\n        follow\n        photo_public\n        __typename\n      }\n      profile {\n        gender\n        user_name\n        user_id\n        headurl\n        user_text\n        user_profile_bg_url\n        __typename\n      }\n      isFollowing\n      __typename\n    }\n    __typename\n  }\n}\n"
														
 
															-            })
														
 
															-            # s = string.ascii_lowercase
														
 
															-            # r = random.choice(s)
														
 
															-            headers = {
														
 
															-                'Accept': '*/*',
														
 
															-                'Content-Type': 'application/json',
														
 
															-                'Origin': 'https://www.kuaishou.com',
														
 
															-                'Cookie': f'kpf=PC_WEB; clientid=3; did={cls.get_did(log_type, crawler)}; kpn=KUAISHOU_VISION',
														
 
															-                'Content-Length': '552',
														
 
															-                'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
														
 
															-                'Host': 'www.kuaishou.com',
														
 
															-                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15',
														
 
															-                'Referer': 'https://www.kuaishou.com/profile/{}'.format(out_uid),
														
 
															-                'Accept-Encoding': 'gzip, deflate, br',
														
 
															-                'Connection': 'keep-alive'
														
 
															-            }
														
 
															-            urllib3.disable_warnings()
														
 
															-            s = requests.session()
														
 
															-            # max_retries=3 重试3次
														
 
															-            s.mount('http://', HTTPAdapter(max_retries=3))
														
 
															-            s.mount('https://', HTTPAdapter(max_retries=3))
														
 
															-            response = s.post(url=url, headers=headers, data=payload, proxies=Common.tunnel_proxies(), verify=False,
														
 
															-                              timeout=5)
														
 
															-            response.close()
														
 
															-            # Common.logger(log_type, crawler).info(f"get_out_user_info_response:{response.text}")
														
 
															-            if response.status_code != 200:
														
 
															-                Common.logger(log_type, crawler).warning(f"get_out_user_info_response:{response.text}\n")
														
 
															-                return
														
 
															-            elif 'data' not in response.json():
														
 
															-                Common.logger(log_type, crawler).warning(f"get_out_user_info_response:{response.json()}\n")
														
 
															-                return
														
 
															-            elif 'visionProfile' not in response.json()['data']:
														
 
															-                Common.logger(log_type, crawler).warning(f"get_out_user_info_response:{response.json()['data']}\n")
														
 
															-                return
														
 
															-            elif 'userProfile' not in response.json()['data']['visionProfile']:
														
 
															-                Common.logger(log_type, crawler).warning(
														
 
															-                    f"get_out_user_info_response:{response.json()['data']['visionProfile']['userProfile']}\n")
														
 
															-                return
														
 
															-            else:
														
 
															-                userProfile = response.json()['data']['visionProfile']['userProfile']
														
 
															-                # Common.logger(log_type, crawler).info(f"userProfile:{userProfile}")
														
 
															-
														
 
															-                try:
														
 
															-                    out_fans_str = str(userProfile['ownerCount']['fan'])
														
 
															-                except Exception:
														
 
															-                    out_fans_str = "0"
														
 
															-
														
 
															-                try:
														
 
															-                    out_follow_str = str(userProfile['ownerCount']['follow'])
														
 
															-                except Exception:
														
 
															-                    out_follow_str = "0"
														
 
															-
														
 
															-                try:
														
 
															-                    out_avatar_url = userProfile['profile']['headurl']
														
 
															-                except Exception:
														
 
															-                    out_avatar_url = ""
														
 
															-
														
 
															-                Common.logger(log_type, crawler).info(f"out_fans_str:{out_fans_str}")
														
 
															-                Common.logger(log_type, crawler).info(f"out_follow_str:{out_follow_str}")
														
 
															-                Common.logger(log_type, crawler).info(f"out_avatar_url:{out_avatar_url}")
														
 
															-
														
 
															-                if "万" in out_fans_str:
														
 
															-                    out_fans = int(float(out_fans_str.split("万")[0]) * 10000)
														
 
															-                else:
														
 
															-                    out_fans = int(out_fans_str.replace(",", ""))
														
 
															-                if "万" in out_follow_str:
														
 
															-                    out_follow = int(float(out_follow_str.split("万")[0]) * 10000)
														
 
															-                else:
														
 
															-                    out_follow = int(out_follow_str.replace(",", ""))
														
 
															-
														
 
															-                out_user_dict = {
														
 
															-                    "out_fans": out_fans,
														
 
															-                    "out_follow": out_follow,
														
 
															-                    "out_avatar_url": out_avatar_url
														
 
															-                }
														
 
															-                Common.logger(log_type, crawler).info(f"out_user_dict:{out_user_dict}")
														
 
															-                return out_user_dict
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f"get_out_user_info:{e}\n")
														
 
															-
														
 
															-    # 获取用户信息列表
														
 
															-    @classmethod
														
 
															-    def get_user_list(cls, log_type, crawler, sheetid, env, machine):
														
 
															-        try:
														
 
															-            while True:
														
 
															-                user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
														
 
															-                if user_sheet is None:
														
 
															-                    Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet} 10秒钟后重试")
														
 
															-                    continue
														
 
															-                our_user_list = []
														
 
															-                for i in range(1, len(user_sheet)):
														
 
															-                    # for i in range(1, 2):
														
 
															-                    out_uid = user_sheet[i][2]
														
 
															-                    user_name = user_sheet[i][3]
														
 
															-                    our_uid = user_sheet[i][6]
														
 
															-                    our_user_link = user_sheet[i][7]
														
 
															-                    if out_uid is None or user_name is None:
														
 
															-                        Common.logger(log_type, crawler).info("空行\n")
														
 
															-                    else:
														
 
															-                        Common.logger(log_type, crawler).info(f"正在更新 {user_name} 用户信息\n")
														
 
															-                        if our_uid is None:
														
 
															-                            out_user_info = cls.get_out_user_info(log_type, crawler, out_uid)
														
 
															-                            out_user_dict = {
														
 
															-                                "out_uid": out_uid,
														
 
															-                                "user_name": user_name,
														
 
															-                                "out_avatar_url": out_user_info["out_avatar_url"],
														
 
															-                                "out_create_time": '',
														
 
															-                                "out_tag": '',
														
 
															-                                "out_play_cnt": 0,
														
 
															-                                "out_fans": out_user_info["out_fans"],
														
 
															-                                "out_follow": out_user_info["out_follow"],
														
 
															-                                "out_friend": 0,
														
 
															-                                "out_like": 0,
														
 
															-                                "platform": cls.platform,
														
 
															-                                "tag": cls.tag,
														
 
															-                            }
														
 
															-                            our_user_dict = getUser.create_user(log_type=log_type, crawler=crawler,
														
 
															-                                                                out_user_dict=out_user_dict, env=env, machine=machine)
														
 
															-                            our_uid = our_user_dict['our_uid']
														
 
															-                            our_user_link = our_user_dict['our_user_link']
														
 
															-                            Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}',
														
 
															-                                                 [[our_uid, our_user_link]])
														
 
															-                            Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功！\n')
														
 
															-                            our_user_list.append(our_user_dict)
														
 
															-                        else:
														
 
															-                            our_user_dict = {
														
 
															-                                'out_uid': out_uid,
														
 
															-                                'user_name': user_name,
														
 
															-                                'our_uid': our_uid,
														
 
															-                                'our_user_link': our_user_link,
														
 
															-                            }
														
 
															-                            our_user_list.append(our_user_dict)
														
 
															-                return our_user_list
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f'get_user_list:{e}\n')
														
 
															-
														
 
															-    # 处理视频标题
														
 
															-    @classmethod
														
 
															-    def video_title(cls, log_type, crawler, env, title):
														
 
															-        title_split1 = title.split(" #")
														
 
															-        if title_split1[0] != "":
														
 
															-            title1 = title_split1[0]
														
 
															-        else:
														
 
															-            title1 = title_split1[-1]
														
 
															-
														
 
															-        title_split2 = title1.split(" #")
														
 
															-        if title_split2[0] != "":
														
 
															-            title2 = title_split2[0]
														
 
															-        else:
														
 
															-            title2 = title_split2[-1]
														
 
															-
														
 
															-        title_split3 = title2.split("@")
														
 
															-        if title_split3[0] != "":
														
 
															-            title3 = title_split3[0]
														
 
															-        else:
														
 
															-            title3 = title_split3[-1]
														
 
															-
														
 
															-        video_title = title3.strip().replace("\n", "") \
														
 
															-                          .replace("/", "").replace("快手", "").replace(" ", "") \
														
 
															-                          .replace(" ", "").replace("&NBSP", "").replace("\r", "") \
														
 
															-                          .replace("#", "").replace(".", "。").replace("\\", "") \
														
 
															-                          .replace(":", "").replace("*", "").replace("？", "") \
														
 
															-                          .replace("?", "").replace('"', "").replace("<", "") \
														
 
															-                          .replace(">", "").replace("|", "").replace("@", "").replace('"', '').replace("'", '')[:40]
														
 
															-        if video_title.replace(" ", "") == "" or video_title == "。。。" or video_title == "...":
														
 
															-            return random_title(log_type, crawler, env, text='title')
														
 
															-        else:
														
 
															-            return video_title
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_did(cls, log_type, crawler):
														
 
															-        while True:
														
 
															-            did_sheet = Feishu.get_values_batch(log_type, crawler, "G7acT6")
														
 
															-            if did_sheet is None:
														
 
															-                Common.logger(log_type, crawler).warning(f"did_sheet:{did_sheet}")
														
 
															-                time.sleep(2)
														
 
															-                continue
														
 
															-            return did_sheet[0][1]
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_videoList(cls, log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine, pcursor=""):
														
 
															-        download_cnt_1, download_cnt_2 = 0, 0
														
 
															-        rule_dict_1 = cls.get_rule(log_type, crawler, 1)
														
 
															-        rule_dict_2 = cls.get_rule(log_type, crawler, 2)
														
 
															-        if rule_dict_1 is None or rule_dict_2 is None:
														
 
															-            Common.logger(log_type, crawler).warning(f"rule_dict is None")
														
 
															-            return
														
 
															-
														
 
															-        url = "https://www.kuaishou.com/graphql"
														
 
															-        payload = json.dumps({
														
 
															-            "operationName": "visionProfilePhotoList",
														
 
															-            "variables": {
														
 
															-                "userId": out_uid,
														
 
															-                "pcursor": "",
														
 
															-                "page": "profile"
														
 
															-            },
														
 
															-            "query": "fragment photoContent on PhotoEntity {\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  commentCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n  __typename\n}\n\nfragment feedContent on Feed {\n  type\n  author {\n    id\n    name\n    headerUrl\n    following\n    headerUrls {\n      url\n      __typename\n    }\n    __typename\n  }\n  photo {\n    ...photoContent\n    __typename\n  }\n  canAddComment\n  llsid\n  status\n  currentPcursor\n  tags {\n    type\n    name\n    __typename\n  }\n  __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n  visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      ...feedContent\n      __typename\n    }\n    hostName\n    pcursor\n    __typename\n  }\n}\n"
														
 
															-        })
														
 
															-        headers = {
														
 
															-            'Accept': '*/*',
														
 
															-            'Content-Type': 'application/json',
														
 
															-            'Origin': 'https://www.kuaishou.com',
														
 
															-            'Cookie': f'kpf=PC_WEB; clientid=3; did={cls.get_did(log_type, crawler)}; kpn=KUAISHOU_VISION',
														
 
															-            'Content-Length': '1260',
														
 
															-            'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
														
 
															-            'Host': 'www.kuaishou.com',
														
 
															-            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15',
														
 
															-            'Referer': 'https://www.kuaishou.com/profile/{}'.format(out_uid),
														
 
															-            'Accept-Encoding': 'gzip, deflate, br',
														
 
															-            'Connection': 'keep-alive'
														
 
															-        }
														
 
															-        response = requests.post(url=url, headers=headers, data=payload, proxies=Common.tunnel_proxies(),
														
 
															-                                 verify=False, timeout=10)
														
 
															-        try:
														
 
															-            feeds = response.json()['data']['visionProfilePhotoList']['feeds']
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f"get_videoList:{e},response:{response.text}")
														
 
															-            return
														
 
															-        if not feeds:
														
 
															-            Common.logger(log_type, crawler).info("没有更多视频啦 ~\n")
														
 
															-            return
														
 
															-        pcursor = response.json()['data']['visionProfilePhotoList']['pcursor']
														
 
															-        # Common.logger(log_type, crawler).info(f"feeds0: {feeds}\n")
														
 
															-        for i in range(len(feeds)):
														
 
															-            try:
														
 
															-                # video_title
														
 
															-                if 'caption' not in feeds[i]['photo']:
														
 
															-                    video_title = random_title(log_type, crawler, env, text='title')
														
 
															-                elif feeds[i]['photo']['caption'].strip() == "":
														
 
															-                    video_title = random_title(log_type, crawler, env, text='title')
														
 
															-                else:
														
 
															-                    video_title = cls.video_title(log_type, crawler, env, feeds[i]['photo']['caption'])
														
 
															-
														
 
															-                if 'videoResource' not in feeds[i]['photo'] \
														
 
															-                        and 'manifest' not in feeds[i]['photo'] \
														
 
															-                        and 'manifestH265' not in feeds[i]['photo']:
														
 
															-                    Common.logger(log_type, crawler).warning(f"get_videoList:{feeds[i]['photo']}\n")
														
 
															-                    break
														
 
															-                videoResource = feeds[i]['photo']['videoResource']
														
 
															-
														
 
															-                if 'h264' not in videoResource and 'hevc' not in videoResource:
														
 
															-                    Common.logger(log_type, crawler).warning(f"get_videoList:{videoResource}\n")
														
 
															-                    break
														
 
															-
														
 
															-                # video_id
														
 
															-                if 'h264' in videoResource and 'videoId' in videoResource['h264']:
														
 
															-                    video_id = videoResource['h264']['videoId']
														
 
															-                elif 'hevc' in videoResource and 'videoId' in videoResource['hevc']:
														
 
															-                    video_id = videoResource['hevc']['videoId']
														
 
															-                else:
														
 
															-                    video_id = ""
														
 
															-
														
 
															-                # play_cnt
														
 
															-                if 'viewCount' not in feeds[i]['photo']:
														
 
															-                    play_cnt = 0
														
 
															-                else:
														
 
															-                    play_cnt = int(feeds[i]['photo']['viewCount'])
														
 
															-
														
 
															-                # like_cnt
														
 
															-                if 'realLikeCount' not in feeds[i]['photo']:
														
 
															-                    like_cnt = 0
														
 
															-                else:
														
 
															-                    like_cnt = feeds[i]['photo']['realLikeCount']
														
 
															-
														
 
															-                # publish_time
														
 
															-                if 'timestamp' not in feeds[i]['photo']:
														
 
															-                    publish_time_stamp = 0
														
 
															-                    publish_time_str = ''
														
 
															-                    publish_time = 0
														
 
															-                else:
														
 
															-                    publish_time_stamp = int(int(feeds[i]['photo']['timestamp']) / 1000)
														
 
															-                    publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
														
 
															-                    publish_time = int((int(time.time()) - publish_time_stamp) / (3600 * 24))
														
 
															-
														
 
															-                # duration
														
 
															-                if 'duration' not in feeds[i]['photo']:
														
 
															-                    duration = 0
														
 
															-                else:
														
 
															-                    duration = int(int(feeds[i]['photo']['duration']) / 1000)
														
 
															-
														
 
															-                # video_width / video_height / video_url
														
 
															-                mapping = {}
														
 
															-                for item in ['width', 'height']:
														
 
															-                    try:
														
 
															-                        val = str(videoResource['h264']['adaptationSet'][0]['representation'][0][item])
														
 
															-                    except:
														
 
															-                        val = str(videoResource['hevc']['adaptationSet'][0]['representation'][0][item])
														
 
															-                    mapping[item] = val
														
 
															-                video_width = int(mapping['width']) if mapping['width'] else 0
														
 
															-                video_height = int(mapping['height']) if mapping['height'] else 0
														
 
															-                # cover_url
														
 
															-                if 'coverUrl' not in feeds[i]['photo']:
														
 
															-                    cover_url = ""
														
 
															-                else:
														
 
															-                    cover_url = feeds[i]['photo']['coverUrl']
														
 
															-
														
 
															-                # user_name / avatar_url
														
 
															-                user_name = feeds[i]['author']['name']
														
 
															-                avatar_url = feeds[i]['author']['headerUrl']
														
 
															-
														
 
															-                video_url = feeds[i]['photo']['photoUrl']
														
 
															-                video_dict = {'video_title': video_title,
														
 
															-                              'video_id': video_id,
														
 
															-                              'play_cnt': play_cnt,
														
 
															-                              'comment_cnt': 0,
														
 
															-                              'like_cnt': like_cnt,
														
 
															-                              'share_cnt': 0,
														
 
															-                              'video_width': video_width,
														
 
															-                              'video_height': video_height,
														
 
															-                              'duration': duration,
														
 
															-                              'publish_time': publish_time,
														
 
															-                              'publish_time_stamp': publish_time_stamp,
														
 
															-                              'publish_time_str': publish_time_str,
														
 
															-                              'user_name': user_name,
														
 
															-                              'user_id': out_uid,
														
 
															-                              'avatar_url': avatar_url,
														
 
															-                              'cover_url': cover_url,
														
 
															-                              'video_url': video_url,
														
 
															-                              'session': f"kuaishou{int(time.time())}"}
														
 
															-
														
 
															-                rule_1 = cls.download_rule(video_dict, rule_dict_1)
														
 
															-                Common.logger(log_type, crawler).info(f"video_title:{video_title}")
														
 
															-                Common.logger(log_type, crawler).info(f"video_id:{video_id}\n")
														
 
															-
														
 
															-                Common.logger(log_type, crawler).info(
														
 
															-                    f"play_cnt:{video_dict['play_cnt']}{rule_dict_1['play_cnt']}, {eval(str(video_dict['play_cnt']) + str(rule_dict_1['play_cnt']))}")
														
 
															-                Common.logger(log_type, crawler).info(
														
 
															-                    f"like_cnt:{video_dict['like_cnt']}{rule_dict_1['like_cnt']}, {eval(str(video_dict['like_cnt']) + str(rule_dict_1['like_cnt']))}")
														
 
															-                Common.logger(log_type, crawler).info(
														
 
															-                    f"video_width:{video_dict['video_width']}{rule_dict_1['video_width']}, {eval(str(video_dict['video_width']) + str(rule_dict_1['video_width']))}")
														
 
															-                Common.logger(log_type, crawler).info(
														
 
															-                    f"video_height:{video_dict['video_height']}{rule_dict_1['video_height']}, {eval(str(video_dict['video_height']) + str(rule_dict_1['video_height']))}")
														
 
															-                Common.logger(log_type, crawler).info(
														
 
															-                    f"duration:{video_dict['duration']}{rule_dict_1['duration']}, {eval(str(video_dict['duration']) + str(rule_dict_1['duration']))}")
														
 
															-                Common.logger(log_type, crawler).info(
														
 
															-                    f"publish_time:{video_dict['publish_time']}{rule_dict_1['publish_time']}, {eval(str(video_dict['publish_time']) + str(rule_dict_1['publish_time']))}")
														
 
															-                Common.logger(log_type, crawler).info(f"rule_1:{rule_1}\n")
														
 
															-
														
 
															-                rule_2 = cls.download_rule(video_dict, rule_dict_2)
														
 
															-                Common.logger(log_type, crawler).info(
														
 
															-                    f"play_cnt:{video_dict['play_cnt']}{rule_dict_2['play_cnt']}, {eval(str(video_dict['play_cnt']) + str(rule_dict_2['play_cnt']))}")
														
 
															-                Common.logger(log_type, crawler).info(
														
 
															-                    f"like_cnt:{video_dict['like_cnt']}{rule_dict_2['like_cnt']}, {eval(str(video_dict['like_cnt']) + str(rule_dict_2['like_cnt']))}")
														
 
															-                Common.logger(log_type, crawler).info(
														
 
															-                    f"video_width:{video_dict['video_width']}{rule_dict_2['video_width']}, {eval(str(video_dict['video_width']) + str(rule_dict_2['video_width']))}")
														
 
															-                Common.logger(log_type, crawler).info(
														
 
															-                    f"video_height:{video_dict['video_height']}{rule_dict_2['video_height']}, {eval(str(video_dict['video_height']) + str(rule_dict_2['video_height']))}")
														
 
															-                Common.logger(log_type, crawler).info(
														
 
															-                    f"duration:{video_dict['duration']}{rule_dict_2['duration']}, {eval(str(video_dict['duration']) + str(rule_dict_2['duration']))}")
														
 
															-                Common.logger(log_type, crawler).info(
														
 
															-                    f"publish_time:{video_dict['publish_time']}{rule_dict_2['publish_time']}, {eval(str(video_dict['publish_time']) + str(rule_dict_2['publish_time']))}")
														
 
															-                Common.logger(log_type, crawler).info(f"rule_2:{rule_2}\n")
														
 
															-
														
 
															-                if video_title == "" or video_url == "":
														
 
															-                    Common.logger(log_type, crawler).info("无效视频\n")
														
 
															-                    continue
														
 
															-                elif rule_1 is True:
														
 
															-                    if download_cnt_1 < int(
														
 
															-                            rule_dict_1['download_cnt'].replace("=", "")[-1].replace("<", "")[-1].replace(">",
														
 
															-                                                                                                          "")[
														
 
															-                                -1]):
														
 
															-                        cls.download_publish(log_type=log_type,
														
 
															-                                             crawler=crawler,
														
 
															-                                             strategy=strategy,
														
 
															-                                             video_dict=video_dict,
														
 
															-                                             rule_dict=rule_dict_1,
														
 
															-                                             our_uid=our_uid,
														
 
															-                                             oss_endpoint=oss_endpoint,
														
 
															-                                             env=env,
														
 
															-                                             machine=machine)
														
 
															-                        # if download_finished is True:
														
 
															-                        #     download_cnt_1 += 1
														
 
															-                elif rule_2 is True:
														
 
															-                    if download_cnt_2 < int(
														
 
															-                            rule_dict_2['download_cnt'].replace("=", "")[-1].replace("<", "")[-1].replace(">",
														
 
															-                                                                                                          "")[
														
 
															-                                -1]):
														
 
															-                        cls.download_publish(log_type=log_type,
														
 
															-                                             crawler=crawler,
														
 
															-                                             strategy=strategy,
														
 
															-                                             video_dict=video_dict,
														
 
															-                                             rule_dict=rule_dict_2,
														
 
															-                                             our_uid=our_uid,
														
 
															-                                             oss_endpoint=oss_endpoint,
														
 
															-                                             env=env,
														
 
															-                                             machine=machine)
														
 
															-                        # if download_finished is True:
														
 
															-                        #     download_cnt_2 += 1
														
 
															-                else:
														
 
															-                    Common.logger(log_type, crawler).info("不满足下载规则\n")
														
 
															-                    # Common.logger(log_type, crawler).info(f"feeds: {feeds}\n")
														
 
															-            except Exception as e:
														
 
															-                Common.logger(log_type, crawler).warning(f"抓取单条视频异常:{e}\n")
														
 
															-
														
 
															-            # if pcursor == "no_more":
														
 
															-            #     Common.logger(log_type, crawler).info(f"作者,{out_uid},已经到底了，没有更多内容了\n")
														
 
															-            #     return
														
 
															-            # cls.get_videoList(log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine,
														
 
															-            #               pcursor=pcursor)
														
 
															-            # time.sleep(random.randint(1, 3))
														
 
															-
														
 
															-    @classmethod
														
 
															-    def repeat_video(cls, log_type, crawler, video_id, video_title, publish_time, env, machine):
														
 
															-        sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{video_id}" or (platform="{cls.platform}" and video_title="{video_title}" and publish_time="{publish_time}") """
														
 
															-        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env, machine)
														
 
															-        return len(repeat_video)
														
 
															-
														
 
															-    @classmethod
														
 
															-    def download_publish(cls, log_type, crawler, strategy, video_dict, rule_dict, our_uid, oss_endpoint, env, machine):
														
 
															-        filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
														
 
															-        for filter_word in filter_words:
														
 
															-            if filter_word in video_dict['video_title']:
														
 
															-                Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
														
 
															-                return
														
 
															-        download_finished = False
														
 
															-        if cls.repeat_video(log_type, crawler, video_dict['video_id'], video_dict['video_title'],
														
 
															-                            video_dict['publish_time_str'], env, machine) != 0:
														
 
															-            Common.logger(log_type, crawler).info('视频已下载\n')
														
 
															-        else:
														
 
															-            # 下载视频
														
 
															-            Common.download_method(log_type=log_type, crawler=crawler, text='video',
														
 
															-                                   title=video_dict['video_title'], url=video_dict['video_url'])
														
 
															-            md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
														
 
															-            try:
														
 
															-                if os.path.getsize(f"./{crawler}/videos/{md_title}/video.mp4") == 0:
														
 
															-                    # 删除视频文件夹
														
 
															-                    shutil.rmtree(f"./{crawler}/videos/{md_title}")
														
 
															-                    Common.logger(log_type, crawler).info("视频size=0，删除成功\n")
														
 
															-                    return
														
 
															-            except FileNotFoundError:
														
 
															-                # 删除视频文件夹
														
 
															-                shutil.rmtree(f"./{crawler}/videos/{md_title}")
														
 
															-                Common.logger(log_type, crawler).info("未发现视频文件，删除成功\n")
														
 
															-                return
														
 
															-
														
 
															-            # 下载封面
														
 
															-            Common.download_method(log_type=log_type, crawler=crawler, text='cover',
														
 
															-                                   title=video_dict['video_title'], url=video_dict['cover_url'])
														
 
															-            # 保存视频信息至txt
														
 
															-            Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
														
 
															-
														
 
															-            # 上传视频
														
 
															-            Common.logger(log_type, crawler).info("开始上传视频...")
														
 
															-            our_video_id = Publish.upload_and_publish(log_type=log_type,
														
 
															-                                                      crawler=crawler,
														
 
															-                                                      strategy=strategy,
														
 
															-                                                      our_uid=our_uid,
														
 
															-                                                      env=env,
														
 
															-                                                      oss_endpoint=oss_endpoint)
														
 
															-            if env == 'dev':
														
 
															-                our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
														
 
															-            else:
														
 
															-                our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
														
 
															-            Common.logger(log_type, crawler).info("视频上传完成")
														
 
															-
														
 
															-            if our_video_id is None:
														
 
															-                try:
														
 
															-                    Common.logger(log_type, crawler).warning(f"our_video_id:{our_video_id} 删除该视频文件夹")
														
 
															-                    # 删除视频文件夹
														
 
															-                    shutil.rmtree(f"./{crawler}/videos/{md_title}")
														
 
															-                    return download_finished
														
 
															-                except FileNotFoundError:
														
 
															-                    return download_finished
														
 
															-
														
 
															-            # 视频信息保存数据库
														
 
															-            insert_sql = f""" insert into crawler_video(video_id,
														
 
															-                                                    user_id,
														
 
															-                                                    out_user_id,
														
 
															-                                                    platform,
														
 
															-                                                    strategy,
														
 
															-                                                    out_video_id,
														
 
															-                                                    video_title,
														
 
															-                                                    cover_url,
														
 
															-                                                    video_url,
														
 
															-                                                    duration,
														
 
															-                                                    publish_time,
														
 
															-                                                    play_cnt,
														
 
															-                                                    crawler_rule,
														
 
															-                                                    width,
														
 
															-                                                    height)
														
 
															-                                                    values({our_video_id},
														
 
															-                                                    {our_uid},
														
 
															-                                                    "{video_dict['user_id']}",
														
 
															-                                                    "{cls.platform}",
														
 
															-                                                    "定向爬虫策略",
														
 
															-                                                    "{video_dict['video_id']}",
														
 
															-                                                    "{video_dict['video_title']}",
														
 
															-                                                    "{video_dict['cover_url']}",
														
 
															-                                                    "{video_dict['video_url']}",
														
 
															-                                                    {int(video_dict['duration'])},
														
 
															-                                                    "{video_dict['publish_time_str']}",
														
 
															-                                                    {int(video_dict['play_cnt'])},
														
 
															-                                                    '{json.dumps(rule_dict)}',
														
 
															-                                                    {int(video_dict['video_width'])},
														
 
															-                                                    {int(video_dict['video_height'])}) """
														
 
															-            Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
														
 
															-            MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine)
														
 
															-            Common.logger(log_type, crawler).info('视频信息插入数据库成功！\n')
														
 
															-
														
 
															-            # 视频写入飞书
														
 
															-            Feishu.insert_columns(log_type, 'kuaishou', "fYdA8F", "ROWS", 1, 2)
														
 
															-            upload_time = int(time.time())
														
 
															-            values = [[our_video_id,
														
 
															-                       time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
														
 
															-                       "定向榜",
														
 
															-                       str(video_dict['video_id']),
														
 
															-                       video_dict['video_title'],
														
 
															-                       our_video_link,
														
 
															-                       video_dict['play_cnt'],
														
 
															-                       video_dict['comment_cnt'],
														
 
															-                       video_dict['like_cnt'],
														
 
															-                       video_dict['share_cnt'],
														
 
															-                       video_dict['duration'],
														
 
															-                       f"{video_dict['video_width']}*{video_dict['video_height']}",
														
 
															-                       video_dict['publish_time_str'],
														
 
															-                       video_dict['user_name'],
														
 
															-                       video_dict['user_id'],
														
 
															-                       video_dict['avatar_url'],
														
 
															-                       video_dict['cover_url'],
														
 
															-                       video_dict['video_url']]]
														
 
															-            time.sleep(1)
														
 
															-            Feishu.update_values(log_type, 'kuaishou', "fYdA8F", "E2:Z2", values)
														
 
															-            Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
														
 
															-            download_finished = True
														
 
															-        return download_finished
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_follow_videos(cls, log_type, crawler, strategy, oss_endpoint, env, machine):
														
 
															-        user_list = get_user_from_mysql(log_type, crawler, crawler, env)
														
 
															-        for user in user_list:
														
 
															-            try:
														
 
															-                spider_link = user["link"]
														
 
															-                out_uid = spider_link.split('/')[-1]
														
 
															-                user_name = user["nick_name"]
														
 
															-                our_uid = user["uid"]
														
 
															-                Common.logger(log_type, crawler).info(f"开始抓取 {user_name} 用户主页视频\n")
														
 
															-                cls.get_videoList(log_type=log_type,
														
 
															-                                  crawler=crawler,
														
 
															-                                  strategy=strategy,
														
 
															-                                  our_uid=our_uid,
														
 
															-                                  out_uid=out_uid,
														
 
															-                                  oss_endpoint=oss_endpoint,
														
 
															-                                  env=env,
														
 
															-                                  machine=machine)
														
 
															-            except Exception as e:
														
 
															-                Common.logger(log_type, crawler).warning(f"抓取用户{user}时异常:{e}\n")
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    print(KuaiShouFollow.get_did("follow", "kuaishou"))
														
 
															-    pass
														
--- a/kuaishou/kuaishou_main/run_ks_recommend.py
+++ b/kuaishou/kuaishou_main/run_ks_recommend.py
@@ -10,7 +10,7 @@ sys.path.append(os.getcwd())
 
															 from common.common import Common
														
 
															 from common.public import get_consumer, ack_message, task_fun_mq
														
 
															 from common.scheduling_db import MysqlHelper
														
 
															-from kuaishou.kuaishou_recommend.kuaishou_recommend_shceduling import KuaiShouRecommendScheduling
														
 
															+from kuaishou.kuaishou_recommend.kuaishou_recommend_cut_title import KuaiShouRecommendScheduling
														
 
															 def main(log_type, crawler, topic_name, group_id, env):
														
--- a/kuaishou/kuaishou_main/run_ks_recommend_dev.py
+++ b/kuaishou/kuaishou_main/run_ks_recommend_dev.py
@@ -13,7 +13,7 @@ def kuaishou_recommend_main(log_type, crawler, env):
 
															     KuaiShouRecommendScheduling.get_videoList(log_type=log_type,
														
 
															                                               crawler=crawler,
														
 
															                                               our_uid=6267140,
														
 
															-                                              rule_dict={"play_cnt":{"min":100000,"max":0},"like_cnt":{"min":80000,"max":0},"duration":{"min":50,"max":0},"period":{"min":30,"max":30}},
														
 
															+                                              rule_dict={"play_cnt":{"min":10000,"max":0},"like_cnt":{"min":8000,"max":0},"duration":{"min":50,"max":0},"period":{"min":30,"max":30}},
														
 
															                                               env=env)
														
 
															     Common.del_logs(log_type, crawler)
														
 
															     Common.logger(log_type, crawler).info("抓取一轮结束\n")
														
--- a/kuaishou/kuaishou_recommend/recommend_kuaishou.py
+++ b/kuaishou/kuaishou_recommend/recommend_kuaishou.py
@@ -1,501 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/2/24
														
 
															-import os
														
 
															-import random
														
 
															-import shutil
														
 
															-import sys
														
 
															-import time
														
 
															-import string
														
 
															-from hashlib import md5
														
 
															-
														
 
															-import requests
														
 
															-import json
														
 
															-
														
 
															-import urllib3
														
 
															-from requests.adapters import HTTPAdapter
														
 
															-
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.common import Common
														
 
															-from common.feishu import Feishu
														
 
															-from common.getuser import getUser
														
 
															-from common.db import MysqlHelper
														
 
															-from common.publish import Publish
														
 
															-from common.public import get_user_from_mysql, random_title, get_config_from_mysql
														
 
															-from common.userAgent import get_random_user_agent
														
 
															-
														
 
															-
														
 
															-class KuaiShouRecommend:
														
 
															-    platform = "快手"
														
 
															-    tag = "快手爬虫,推荐爬虫策略"
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_rule(cls, log_type, crawler):
														
 
															-        try:
														
 
															-            rule_sheet = Feishu.get_values_batch(log_type, crawler, "NQ6CZN")
														
 
															-            rule_dict = {
														
 
															-                "play_cnt": f"{rule_sheet[0][1]}{rule_sheet[0][2]}",
														
 
															-                "video_width": f"{rule_sheet[1][1]}{rule_sheet[1][2]}",
														
 
															-                "video_height": f"{rule_sheet[2][1]}{rule_sheet[2][2]}",
														
 
															-                "like_cnt": f"{rule_sheet[5][1]}{rule_sheet[5][2]}",
														
 
															-                "duration": f"{rule_sheet[3][1]}{rule_sheet[3][2]}",
														
 
															-                "publish_time": f"{rule_sheet[4][1]}{rule_sheet[4][2]}",
														
 
															-            }
														
 
															-            return rule_dict
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f"get_rule:{e}\n")
														
 
															-
														
 
															-    @classmethod
														
 
															-    def download_rule(cls, video_dict, rule_dict):
														
 
															-        if eval(f"{video_dict['video_width']}{rule_dict['video_width']}") is True \
														
 
															-                and eval(f"{video_dict['play_cnt']}{rule_dict['play_cnt']}") is True \
														
 
															-                and eval(f"{video_dict['video_height']}{rule_dict['video_height']}") is True \
														
 
															-                and eval(f"{video_dict['like_cnt']}{rule_dict['like_cnt']}") is True \
														
 
															-                and eval(f"{video_dict['duration']}{rule_dict['duration']}") is True \
														
 
															-                and eval(f"{video_dict['publish_time']}{rule_dict['publish_time']}") is True:
														
 
															-            return True
														
 
															-        else:
														
 
															-            return False
														
 
															-
														
 
															-    # 过滤词库
														
 
															-    @classmethod
														
 
															-    def filter_words(cls, log_type, crawler):
														
 
															-        try:
														
 
															-            while True:
														
 
															-                filter_words_sheet = Feishu.get_values_batch(log_type, crawler, 'HIKVvs')
														
 
															-                if filter_words_sheet is None:
														
 
															-                    Common.logger(log_type, crawler).warning(f"filter_words_sheet:{filter_words_sheet} 10秒钟后重试")
														
 
															-                    continue
														
 
															-                filter_words_list = []
														
 
															-                for x in filter_words_sheet:
														
 
															-                    for y in x:
														
 
															-                        if y is None:
														
 
															-                            pass
														
 
															-                        else:
														
 
															-                            filter_words_list.append(y)
														
 
															-                return filter_words_list
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
														
 
															-
														
 
															-    # 获取用户信息列表
														
 
															-    @classmethod
														
 
															-    def get_user_list(cls, log_type, crawler, sheetid, env, machine):
														
 
															-        try:
														
 
															-            while True:
														
 
															-                user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
														
 
															-                if user_sheet is None:
														
 
															-                    Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet} 10秒钟后重试")
														
 
															-                    continue
														
 
															-                our_user_list = []
														
 
															-                for i in range(1, len(user_sheet)):
														
 
															-                    # for i in range(1, 2):
														
 
															-                    out_uid = user_sheet[i][2]
														
 
															-                    user_name = user_sheet[i][3]
														
 
															-                    our_uid = user_sheet[i][6]
														
 
															-                    our_user_link = user_sheet[i][7]
														
 
															-                    if out_uid is None or user_name is None:
														
 
															-                        Common.logger(log_type, crawler).info("空行\n")
														
 
															-                    else:
														
 
															-                        Common.logger(log_type, crawler).info(f"正在更新 {user_name} 用户信息\n")
														
 
															-                        if our_uid is None:
														
 
															-                            out_user_info = cls.get_out_user_info(log_type, crawler, out_uid)
														
 
															-                            out_user_dict = {
														
 
															-                                "out_uid": out_uid,
														
 
															-                                "user_name": user_name,
														
 
															-                                "out_avatar_url": out_user_info["out_avatar_url"],
														
 
															-                                "out_create_time": '',
														
 
															-                                "out_tag": '',
														
 
															-                                "out_play_cnt": 0,
														
 
															-                                "out_fans": out_user_info["out_fans"],
														
 
															-                                "out_follow": out_user_info["out_follow"],
														
 
															-                                "out_friend": 0,
														
 
															-                                "out_like": 0,
														
 
															-                                "platform": cls.platform,
														
 
															-                                "tag": cls.tag,
														
 
															-                            }
														
 
															-                            our_user_dict = getUser.create_user(log_type=log_type, crawler=crawler,
														
 
															-                                                                out_user_dict=out_user_dict, env=env, machine=machine)
														
 
															-                            our_uid = our_user_dict['our_uid']
														
 
															-                            our_user_link = our_user_dict['our_user_link']
														
 
															-                            Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}',
														
 
															-                                                 [[our_uid, our_user_link]])
														
 
															-                            Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功！\n')
														
 
															-                            our_user_list.append(our_user_dict)
														
 
															-                        else:
														
 
															-                            our_user_dict = {
														
 
															-                                'out_uid': out_uid,
														
 
															-                                'user_name': user_name,
														
 
															-                                'our_uid': our_uid,
														
 
															-                                'our_user_link': our_user_link,
														
 
															-                            }
														
 
															-                            our_user_list.append(our_user_dict)
														
 
															-                return our_user_list
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f'get_user_list:{e}\n')
														
 
															-
														
 
															-    # 处理视频标题
														
 
															-    @classmethod
														
 
															-    def video_title(cls, log_type, crawler, env, title):
														
 
															-        title_split1 = title.split(" #")
														
 
															-        if title_split1[0] != "":
														
 
															-            title1 = title_split1[0]
														
 
															-        else:
														
 
															-            title1 = title_split1[-1]
														
 
															-
														
 
															-        title_split2 = title1.split(" #")
														
 
															-        if title_split2[0] != "":
														
 
															-            title2 = title_split2[0]
														
 
															-        else:
														
 
															-            title2 = title_split2[-1]
														
 
															-
														
 
															-        title_split3 = title2.split("@")
														
 
															-        if title_split3[0] != "":
														
 
															-            title3 = title_split3[0]
														
 
															-        else:
														
 
															-            title3 = title_split3[-1]
														
 
															-
														
 
															-        video_title = title3.strip().replace("\n", "") \
														
 
															-                          .replace("/", "").replace("快手", "").replace(" ", "") \
														
 
															-                          .replace(" ", "").replace("&NBSP", "").replace("\r", "") \
														
 
															-                          .replace("#", "").replace(".", "。").replace("\\", "") \
														
 
															-                          .replace(":", "").replace("*", "").replace("？", "") \
														
 
															-                          .replace("?", "").replace('"', "").replace("<", "") \
														
 
															-                          .replace(">", "").replace("|", "").replace("@", "").replace('"', '').replace("'", '')[:40]
														
 
															-        if video_title.replace(" ", "") == "" or video_title == "。。。" or video_title == "...":
														
 
															-            return random_title(log_type, crawler, env, text='title')
														
 
															-        else:
														
 
															-            return video_title
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_videoList(cls, log_type, crawler, strategy, our_uid, oss_endpoint, env, machine):
														
 
															-        rule_dict_1 = cls.get_rule(log_type, crawler)
														
 
															-        if rule_dict_1 is None:
														
 
															-            Common.logger(log_type, crawler).warning(f"rule_dict is None")
														
 
															-            return
														
 
															-
														
 
															-        for i in range(100):
														
 
															-            url = "https://www.kuaishou.com/graphql"
														
 
															-
														
 
															-            payload = json.dumps({
														
 
															-                "operationName": "visionNewRecoFeed",
														
 
															-                "variables": {
														
 
															-                    "dailyFirstPage": False
														
 
															-                },
														
 
															-                "query": "fragment photoContent on PhotoEntity {\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n  __typename\n}\n\nfragment feedContent on Feed {\n  type\n  author {\n    id\n    name\n    headerUrl\n    following\n    headerUrls {\n      url\n      __typename\n    }\n    __typename\n  }\n  photo {\n    ...photoContent\n    __typename\n  }\n  canAddComment\n  llsid\n  status\n  currentPcursor\n  tags {\n    type\n    name\n    __typename\n  }\n  __typename\n}\n\nfragment photoResult on PhotoResult {\n  result\n  llsid\n  expTag\n  serverExpTag\n  pcursor\n  feeds {\n    ...feedContent\n    __typename\n  }\n  webPageArea\n  __typename\n}\n\nquery visionNewRecoFeed($semKeyword: String, $semCrowd: String, $utmSource: String, $utmMedium: String, $utmCampaign: String, $dailyFirstPage: Boolean) {\n  visionNewRecoFeed(semKeyword: $semKeyword, semCrowd: $semCrowd, utmSource: $utmSource, utmMedium: $utmMedium, utmCampaign: $utmCampaign, dailyFirstPage: $dailyFirstPage) {\n    ...photoResult\n    __typename\n  }\n}\n"
														
 
															-            })
														
 
															-            s = string.ascii_lowercase
														
 
															-            r = random.choice(s)
														
 
															-
														
 
															-            headers = {
														
 
															-                'Accept-Language': 'zh-CN,zh;q=0.9',
														
 
															-                'Connection': 'keep-alive',
														
 
															-                'Cookie': 'kpf=PC_WEB; clientid=3; did=web_7cdc486ebd1aba220455a7781d6ae5b5{r}7; kpn=KUAISHOU_VISION;'.format(
														
 
															-                    r=r),
														
 
															-                'Origin': 'https://www.kuaishou.com',
														
 
															-                'Referer': 'https://www.kuaishou.com/new-reco',
														
 
															-                'Sec-Fetch-Dest': 'empty',
														
 
															-                'Sec-Fetch-Mode': 'cors',
														
 
															-                'Sec-Fetch-Site': 'same-origin',
														
 
															-                'User-Agent': get_random_user_agent('pc'),
														
 
															-                'accept': '*/*',
														
 
															-                'content-type': 'application/json',
														
 
															-                'sec-ch-ua': '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"',
														
 
															-                'sec-ch-ua-mobile': '?0',
														
 
															-                'sec-ch-ua-platform': '"macOS"'
														
 
															-            }
														
 
															-
														
 
															-            try:
														
 
															-                urllib3.disable_warnings()
														
 
															-                s = requests.session()
														
 
															-                # max_retries=3 重试3次
														
 
															-                s.mount('http://', HTTPAdapter(max_retries=3))
														
 
															-                s.mount('https://', HTTPAdapter(max_retries=3))
														
 
															-                response = s.post(url=url, headers=headers, data=payload, proxies=Common.tunnel_proxies(), verify=False,
														
 
															-                                  timeout=10)
														
 
															-                response.close()
														
 
															-            except Exception as e:
														
 
															-                Common.logger(log_type, crawler).error(f"get_videoList:{e}\n")
														
 
															-                continue
														
 
															-            # Common.logger(log_type, crawler).info(f"get_videoList:{response.text}\n")
														
 
															-            if response.status_code != 200:
														
 
															-                Common.logger(log_type, crawler).warning(f"get_videoList_response:{response.text}\n")
														
 
															-                continue
														
 
															-            elif 'data' not in response.json():
														
 
															-                Common.logger(log_type, crawler).warning(f"get_videoList_response:{response.json()}\n")
														
 
															-                continue
														
 
															-            elif 'visionNewRecoFeed' not in response.json()['data']:
														
 
															-                Common.logger(log_type, crawler).warning(f"get_videoList_response:{response.json()['data']}\n")
														
 
															-                continue
														
 
															-            elif 'feeds' not in response.json()['data']['visionNewRecoFeed']:
														
 
															-                Common.logger(log_type, crawler).warning(
														
 
															-                    f"get_videoList_response:{response.json()['data']['visionNewRecoFeed']}\n")
														
 
															-                continue
														
 
															-            elif len(response.json()['data']['visionNewRecoFeed']['feeds']) == 0:
														
 
															-                Common.logger(log_type, crawler).info("没有更多视频啦 ~\n")
														
 
															-                continue
														
 
															-            else:
														
 
															-                feeds = response.json()['data']['visionNewRecoFeed']['feeds']
														
 
															-                for i in range(len(feeds)):
														
 
															-                    if 'photo' not in feeds[i]:
														
 
															-                        Common.logger(log_type, crawler).warning(f"get_videoList:{feeds[i]}\n")
														
 
															-                        continue
														
 
															-
														
 
															-                    # video_title
														
 
															-                    if 'caption' not in feeds[i]['photo']:
														
 
															-                        video_title = random_title(log_type, crawler, env, text='title')
														
 
															-
														
 
															-                    elif feeds[i]['photo']['caption'].strip() == "":
														
 
															-                        video_title = random_title(log_type, crawler, env, text='title')
														
 
															-                    else:
														
 
															-                        video_title = cls.video_title(log_type, crawler, env, feeds[i]['photo']['caption'])
														
 
															-
														
 
															-                    if 'videoResource' not in feeds[i]['photo'] \
														
 
															-                            and 'manifest' not in feeds[i]['photo'] \
														
 
															-                            and 'manifestH265' not in feeds[i]['photo']:
														
 
															-                        Common.logger(log_type, crawler).warning(f"get_videoList:{feeds[i]['photo']}\n")
														
 
															-                        continue
														
 
															-                    videoResource = feeds[i]['photo']['videoResource']
														
 
															-
														
 
															-                    if 'h264' not in videoResource and 'hevc' not in videoResource:
														
 
															-                        Common.logger(log_type, crawler).warning(f"get_videoList:{videoResource}\n")
														
 
															-                        continue
														
 
															-
														
 
															-                    # video_id
														
 
															-                    if 'h264' in videoResource and 'videoId' in videoResource['h264']:
														
 
															-                        video_id = videoResource['h264']['videoId']
														
 
															-                    elif 'hevc' in videoResource and 'videoId' in videoResource['hevc']:
														
 
															-                        video_id = videoResource['hevc']['videoId']
														
 
															-                    else:
														
 
															-                        video_id = ""
														
 
															-
														
 
															-                    # play_cnt
														
 
															-                    if 'viewCount' not in feeds[i]['photo']:
														
 
															-                        play_cnt = 0
														
 
															-                    else:
														
 
															-                        play_cnt = int(feeds[i]['photo']['viewCount'])
														
 
															-
														
 
															-                    # like_cnt
														
 
															-                    if 'realLikeCount' not in feeds[i]['photo']:
														
 
															-                        like_cnt = 0
														
 
															-                    else:
														
 
															-                        like_cnt = feeds[i]['photo']['realLikeCount']
														
 
															-
														
 
															-                    # publish_time
														
 
															-                    if 'timestamp' not in feeds[i]['photo']:
														
 
															-                        publish_time_stamp = 0
														
 
															-                        publish_time_str = ''
														
 
															-                        publish_time = 0
														
 
															-                    else:
														
 
															-                        publish_time_stamp = int(int(feeds[i]['photo']['timestamp']) / 1000)
														
 
															-                        publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
														
 
															-                        publish_time = int((int(time.time()) - publish_time_stamp) / (3600 * 24))
														
 
															-
														
 
															-                    # duration
														
 
															-                    if 'duration' not in feeds[i]['photo']:
														
 
															-                        duration = 0
														
 
															-                    else:
														
 
															-                        duration = int(int(feeds[i]['photo']['duration']) / 1000)
														
 
															-
														
 
															-                    # video_width / video_height / video_url
														
 
															-                    mapping = {}
														
 
															-                    for item in ['width', 'height']:
														
 
															-                        try:
														
 
															-                            val = str(videoResource['h264']['adaptationSet'][0]['representation'][0][item])
														
 
															-                        except Exception:
														
 
															-                            val = str(videoResource['hevc']['adaptationSet'][0]['representation'][0][item])
														
 
															-                        except:
														
 
															-                            val = ''
														
 
															-                        mapping[item] = val
														
 
															-                    video_width = int(mapping['width']) if mapping['width'] != '' else 0
														
 
															-                    video_height = int(mapping['height']) if mapping['height'] != '' else 0
														
 
															-                    # cover_url
														
 
															-                    if 'coverUrl' not in feeds[i]['photo']:
														
 
															-                        cover_url = ""
														
 
															-                    else:
														
 
															-                        cover_url = feeds[i]['photo']['coverUrl']
														
 
															-
														
 
															-                    # user_name / avatar_url
														
 
															-                    try:
														
 
															-                        user_name = feeds[i]['author']['name']
														
 
															-                        avatar_url = feeds[i]['author']['headerUrl']
														
 
															-                        user_id = feeds[i]['author']['id']
														
 
															-                    except Exception:
														
 
															-                        user_name = ''
														
 
															-                        avatar_url = ''
														
 
															-                        user_id = ''
														
 
															-                    video_url = feeds[i]['photo']['photoUrl']
														
 
															-                    video_dict = {'video_title': video_title,
														
 
															-                                  'video_id': video_id,
														
 
															-                                  'play_cnt': play_cnt,
														
 
															-                                  'comment_cnt': 0,
														
 
															-                                  'like_cnt': like_cnt,
														
 
															-                                  'share_cnt': 0,
														
 
															-                                  'video_width': video_width,
														
 
															-                                  'video_height': video_height,
														
 
															-                                  'duration': duration,
														
 
															-                                  'publish_time': publish_time,
														
 
															-                                  'publish_time_stamp': publish_time_stamp,
														
 
															-                                  'publish_time_str': publish_time_str,
														
 
															-                                  'user_name': user_name,
														
 
															-                                  'user_id': user_id,
														
 
															-                                  'avatar_url': avatar_url,
														
 
															-                                  'cover_url': cover_url,
														
 
															-                                  'video_url': video_url,
														
 
															-                                  'session': f"kuaishou{int(time.time())}"}
														
 
															-
														
 
															-                    rule_1 = cls.download_rule(video_dict, rule_dict_1)
														
 
															-                    Common.logger(log_type, crawler).info(f"video_title:{video_title}")
														
 
															-                    Common.logger(log_type, crawler).info(f"video_id:{video_id}\n")
														
 
															-
														
 
															-                    Common.logger(log_type, crawler).info(
														
 
															-                        f"play_cnt:{video_dict['play_cnt']}{rule_dict_1['play_cnt']}, {eval(str(video_dict['play_cnt']) + str(rule_dict_1['play_cnt']))}")
														
 
															-                    Common.logger(log_type, crawler).info(
														
 
															-                        f"like_cnt:{video_dict['like_cnt']}{rule_dict_1['like_cnt']}, {eval(str(video_dict['like_cnt']) + str(rule_dict_1['like_cnt']))}")
														
 
															-                    Common.logger(log_type, crawler).info(
														
 
															-                        f"video_width:{video_dict['video_width']}{rule_dict_1['video_width']}, {eval(str(video_dict['video_width']) + str(rule_dict_1['video_width']))}")
														
 
															-                    Common.logger(log_type, crawler).info(
														
 
															-                        f"video_height:{video_dict['video_height']}{rule_dict_1['video_height']}, {eval(str(video_dict['video_height']) + str(rule_dict_1['video_height']))}")
														
 
															-                    Common.logger(log_type, crawler).info(
														
 
															-                        f"duration:{video_dict['duration']}{rule_dict_1['duration']}, {eval(str(video_dict['duration']) + str(rule_dict_1['duration']))}")
														
 
															-                    Common.logger(log_type, crawler).info(
														
 
															-                        f"publish_time:{video_dict['publish_time']}{rule_dict_1['publish_time']}, {eval(str(video_dict['publish_time']) + str(rule_dict_1['publish_time']))}")
														
 
															-                    Common.logger(log_type, crawler).info(f"rule_1:{rule_1}\n")
														
 
															-
														
 
															-                    if video_title == "" or video_url == "":
														
 
															-                        Common.logger(log_type, crawler).info("无效视频\n")
														
 
															-                        continue
														
 
															-                    elif rule_1 is True:
														
 
															-                        cls.download_publish(log_type=log_type,
														
 
															-                                             crawler=crawler,
														
 
															-                                             strategy=strategy,
														
 
															-                                             video_dict=video_dict,
														
 
															-                                             rule_dict=rule_dict_1,
														
 
															-                                             our_uid=our_uid,
														
 
															-                                             oss_endpoint=oss_endpoint,
														
 
															-                                             env=env,
														
 
															-                                             machine=machine)
														
 
															-
														
 
															-                    else:
														
 
															-                        Common.logger(log_type, crawler).info("不满足下载规则\n")
														
 
															-
														
 
															-    @classmethod
														
 
															-    def repeat_video(cls, log_type, crawler, video_id, video_title, publish_time, env, machine):
														
 
															-        sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{video_id}" or (platform="{cls.platform}" and video_title="{video_title}" and publish_time="{publish_time}") """
														
 
															-        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env, machine)
														
 
															-        return len(repeat_video)
														
 
															-
														
 
															-    @classmethod
														
 
															-    def download_publish(cls, log_type, crawler, strategy, video_dict, rule_dict, our_uid, oss_endpoint, env, machine):
														
 
															-        try:
														
 
															-            filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
														
 
															-            for filter_word in filter_words:
														
 
															-                if filter_word in video_dict['video_title']:
														
 
															-                    Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
														
 
															-                    return
														
 
															-            download_finished = False
														
 
															-            if cls.repeat_video(log_type, crawler, video_dict['video_id'], video_dict['video_title'],
														
 
															-                                video_dict['publish_time_str'], env, machine) != 0:
														
 
															-                Common.logger(log_type, crawler).info('视频已下载\n')
														
 
															-            else:
														
 
															-                # 下载视频
														
 
															-                Common.download_method(log_type=log_type, crawler=crawler, text='video',
														
 
															-                                       title=video_dict['video_title'], url=video_dict['video_url'])
														
 
															-                md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
														
 
															-                if os.path.getsize(f"./{crawler}/videos/{md_title}/video.mp4") == 0:
														
 
															-                    # 删除视频文件夹
														
 
															-                    shutil.rmtree(f"./{crawler}/videos/{md_title}")
														
 
															-                    Common.logger(log_type, crawler).info("视频size=0，删除成功\n")
														
 
															-                    return
														
 
															-                # 下载封面
														
 
															-                Common.download_method(log_type=log_type, crawler=crawler, text='cover',
														
 
															-                                       title=video_dict['video_title'], url=video_dict['cover_url'])
														
 
															-                # 保存视频信息至txt
														
 
															-                Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
														
 
															-
														
 
															-                # 上传视频
														
 
															-                Common.logger(log_type, crawler).info("开始上传视频...")
														
 
															-                our_video_id = Publish.upload_and_publish(log_type=log_type,
														
 
															-                                                          crawler=crawler,
														
 
															-                                                          strategy=strategy,
														
 
															-                                                          our_uid=our_uid,
														
 
															-                                                          env=env,
														
 
															-                                                          oss_endpoint=oss_endpoint)
														
 
															-                if env == 'dev':
														
 
															-                    our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
														
 
															-                else:
														
 
															-                    our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
														
 
															-                Common.logger(log_type, crawler).info("视频上传完成")
														
 
															-
														
 
															-                if our_video_id is None:
														
 
															-                    Common.logger(log_type, crawler).warning(f"our_video_id:{our_video_id} 删除该视频文件夹")
														
 
															-                    # 删除视频文件夹
														
 
															-                    shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
														
 
															-                    return download_finished
														
 
															-
														
 
															-                # 视频信息保存数据库
														
 
															-                insert_sql = f""" insert into crawler_video(video_id,
														
 
															-                                                        user_id,
														
 
															-                                                        out_user_id,
														
 
															-                                                        platform,
														
 
															-                                                        strategy,
														
 
															-                                                        out_video_id,
														
 
															-                                                        video_title,
														
 
															-                                                        cover_url,
														
 
															-                                                        video_url,
														
 
															-                                                        duration,
														
 
															-                                                        publish_time,
														
 
															-                                                        play_cnt,
														
 
															-                                                        crawler_rule,
														
 
															-                                                        width,
														
 
															-                                                        height)
														
 
															-                                                        values({our_video_id},
														
 
															-                                                        {our_uid},
														
 
															-                                                        "{video_dict['user_id']}",
														
 
															-                                                        "{cls.platform}",
														
 
															-                                                        "{strategy}",
														
 
															-                                                        "{video_dict['video_id']}",
														
 
															-                                                        "{video_dict['video_title']}",
														
 
															-                                                        "{video_dict['cover_url']}",
														
 
															-                                                        "{video_dict['video_url']}",
														
 
															-                                                        {int(video_dict['duration'])},
														
 
															-                                                        "{video_dict['publish_time_str']}",
														
 
															-                                                        {int(video_dict['play_cnt'])},
														
 
															-                                                        '{json.dumps(rule_dict)}',
														
 
															-                                                        {int(video_dict['video_width'])},
														
 
															-                                                        {int(video_dict['video_height'])}) """
														
 
															-                Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
														
 
															-                MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine)
														
 
															-                Common.logger(log_type, crawler).info('视频信息插入数据库成功！\n')
														
 
															-
														
 
															-                # 视频写入飞书
														
 
															-                Feishu.insert_columns(log_type, 'kuaishou', "Aps2BI", "ROWS", 1, 2)
														
 
															-                upload_time = int(time.time())
														
 
															-                values = [[our_video_id,
														
 
															-                           time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
														
 
															-                           strategy,
														
 
															-                           str(video_dict['video_id']),
														
 
															-                           video_dict['video_title'],
														
 
															-                           our_video_link,
														
 
															-                           video_dict['play_cnt'],
														
 
															-                           video_dict['comment_cnt'],
														
 
															-                           video_dict['like_cnt'],
														
 
															-                           video_dict['share_cnt'],
														
 
															-                           video_dict['duration'],
														
 
															-                           f"{video_dict['video_width']}*{video_dict['video_height']}",
														
 
															-                           video_dict['publish_time_str'],
														
 
															-                           video_dict['user_name'],
														
 
															-                           video_dict['user_id'],
														
 
															-                           video_dict['avatar_url'],
														
 
															-                           video_dict['cover_url'],
														
 
															-                           video_dict['video_url']]]
														
 
															-                time.sleep(1)
														
 
															-                Feishu.update_values(log_type, 'kuaishou', "Aps2BI", "E2:Z2", values)
														
 
															-                Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
														
 
															-                download_finished = True
														
 
															-            return download_finished
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f"download_publish:{e}\n")
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    KuaiShouRecommend.get_videoList('recommend', 'kuaishou', '推荐抓取策略', 55440319, 'outer', 'prod', 'aliyun')
														
--- a/main/process.sh
+++ b/main/process.sh
@@ -24,190 +24,6 @@ echo "$(date "+%Y-%m-%d %H:%M:%S") 更新环境变量..." >> ${log_path}
 
															 cd ~ && source /etc/profile
														
 
															 echo "$(date "+%Y-%m-%d %H:%M:%S") 更新环境变量完成！" >> ${log_path}
														
 
															-## 公众号爬虫策略
														
 
															-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 公众号爬虫策略 1-100个账号 进程状态" >> ${log_path}
														
 
															-#ps -ef | grep "run_gongzhonghao_follow.py" | grep -v "grep"
														
 
															-#if [ "$?" -eq 1 ];then
														
 
															-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
														
 
															-#  if [ ${env} = "dev" ];then
														
 
															-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py --log_type="author" --crawler="gongzhonghao" --env="dev" gongzhonghao/logs/nohup-follow.log
														
 
															-#  else
														
 
															-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py --log_type="author" --crawler="gongzhonghao" --env="prod"  gongzhonghao/logs/nohup-follow.log
														
 
															-#  fi
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
														
 
															-#else
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 公众号爬虫策略1-100个账号 进程状态正常" >> ${log_path}
														
 
															-#fi
														
 
															-#
														
 
															-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 公众号爬虫策略101-145个账号 进程状态" >> ${log_path}
														
 
															-#ps -ef | grep "run_gongzhonghao_follow_2.py" | grep -v "grep"
														
 
															-#if [ "$?" -eq 1 ];then
														
 
															-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
														
 
															-#  if [ ${env} = "dev" ];then
														
 
															-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow-2" --crawler="gongzhonghao" --env="dev" gongzhonghao/logs/nohup-follow-2.log
														
 
															-#  else
														
 
															-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow-2" --crawler="gongzhonghao" --env="prod"  gongzhonghao/logs/nohup-follow-2.log
														
 
															-#  fi
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
														
 
															-#else
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 公众号爬虫策略101-145个账号 进程状态正常" >> ${log_path}
														
 
															-#fi
														
 
															-
														
 
															-## 小年糕定向爬虫策略
														
 
															-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 小年糕定向爬虫策略 进程状态" >> ${log_path}
														
 
															-#ps -ef | grep "run_xiaoniangao_follow.py" | grep -v "grep"
														
 
															-#if [ "$?" -eq 1 ];then
														
 
															-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
														
 
															-#  if [ ${env} = "dev" ];then
														
 
															-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_follow.py --log_type="author" --crawler="xiaoniangao" --env="dev" xiaoniangao/logs/nohup-follow.log
														
 
															-#  else
														
 
															-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_follow.py --log_type="author" --crawler="xiaoniangao" --env="prod"  xiaoniangao/logs/nohup-follow.log
														
 
															-#  fi
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
														
 
															-#else
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕定向爬虫策略 进程状态正常" >> ${log_path}
														
 
															-#fi
														
 
															-
														
 
															-## 小年糕小时榜爬虫策略
														
 
															-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 小年糕小时榜爬虫策略 进程状态" >> ${log_path}
														
 
															-#ps -ef | grep "run_xiaoniangao_hour.py" | grep -v "grep"
														
 
															-#if [ "$?" -eq 1 ];then
														
 
															-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
														
 
															-#  if [ ${env} = "dev" ];then
														
 
															-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour.py --log_type="hour" --crawler="xiaoniangao" --env="dev" xiaoniangao/logs/nohup-hour.log
														
 
															-#  else
														
 
															-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour.py --log_type="hour" --crawler="xiaoniangao" --env="prod" xiaoniangao/logs/nohup-hour.log
														
 
															-#  fi
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
														
 
															-#else
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕小时榜爬虫策略 进程状态正常" >> ${log_path}
														
 
															-#fi
														
 
															-
														
 
															-## 小年糕播放量榜爬虫策略
														
 
															-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 播放量榜爬虫策略 进程状态" >> ${log_path}
														
 
															-#ps -ef | grep "run_xiaoniangao_play.py" | grep -v "grep"
														
 
															-#if [ "$?" -eq 1 ];then
														
 
															-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
														
 
															-#  if [ ${env} = "dev" ];then
														
 
															-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_play.py --log_type="play" --crawler="xiaoniangao" --env="dev" xiaoniangao/logs/nohup-play.log
														
 
															-#  else
														
 
															-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_play.py --log_type="play" --crawler="xiaoniangao" --env="prod" xiaoniangao/logs/nohup-play.log
														
 
															-#  fi
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
														
 
															-#else
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 播放量榜爬虫策略 进程状态正常" >> ${log_path}
														
 
															-#fi
														
 
															-
														
 
															-
														
 
															-## 快手定向爬虫策略
														
 
															-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 快手定向爬虫策略 进程状态" >> ${log_path}
														
 
															-#ps -ef | grep "run_kuaishou_follow.py" | grep -v "grep"
														
 
															-#if [ "$?" -eq 1 ];then
														
 
															-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
														
 
															-#  if [ ${env} = "dev" ];then
														
 
															-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="author" --crawler="kuaishou" --env="dev" kuaishou/logs/nohup-follow.log
														
 
															-#  else
														
 
															-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh ./main/main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="author" --crawler="kuaishou" --strategy="定向爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" kuaishou/logs/nohup-follow.log
														
 
															-#  fi
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
														
 
															-#else
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 快手定向爬虫策略 进程状态正常" >> ${log_path}
														
 
															-#fi
														
 
															-
														
 
															-## 快手推荐爬虫策略
														
 
															-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 快手推荐爬虫策略 进程状态" >> ${log_path}
														
 
															-#ps -ef | grep "run_kuaishou_recommend.py" | grep -v "grep"
														
 
															-#if [ "$?" -eq 1 ];then
														
 
															-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
														
 
															-#  if [ ${env} = "dev" ];then
														
 
															-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="author" --crawler="kuaishou" --env="dev" kuaishou/logs/nohup-recommend.log
														
 
															-#  else
														
 
															-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh ./main/main.sh ./kuaishou/kuaishou_main/run_kuaishou_recommend.py --log_type="recommend" --crawler="kuaishou" --strategy="推荐爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" kuaishou/logs/nohup-recommend.log
														
 
															-#  fi
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
														
 
															-#else
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 快手推荐爬虫策略 进程状态正常" >> ${log_path}
														
 
															-#fi
														
 
															-
														
 
															-## 抖音推荐爬虫策略
														
 
															-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 抖音推荐爬虫策略 进程状态" >> ${log_path}
														
 
															-#ps -ef | grep "run_douyin_recommend.py" | grep -v "grep"
														
 
															-#if [ "$?" -eq 1 ];then
														
 
															-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
														
 
															-#  if [ ${env} = "dev" ];then
														
 
															-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="author" --crawler="kuaishou" --env="dev" douyin/logs/nohup-recommend.log
														
 
															-#  else
														
 
															-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh ./main/main.sh ./douyin/douyin_main/run_douyin_recommend.py --log_type="recommend" --crawler="douyin" --strategy="抖音推荐爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" douyin/logs/nohup-recommend.log
														
 
															-#  fi
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
														
 
															-#else
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 抖音推荐爬虫策略 进程状态正常" >> ${log_path}
														
 
															-#fi
														
 
															-
														
 
															-## 抖音定向爬虫策略
														
 
															-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 抖音定向爬虫策略 进程状态" >> ${log_path}
														
 
															-#ps -ef | grep "run_douyin_follow.py" | grep -v "grep"
														
 
															-#if [ "$?" -eq 1 ];then
														
 
															-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
														
 
															-#  if [ ${env} = "dev" ];then
														
 
															-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./kuaishou/douyin_main/run_douyin_follow.py --log_type="author" --crawler="douyin" --env="dev" douyin/logs/nohup-follow.log
														
 
															-#  else
														
 
															-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh ./main/main.sh ./douyin/douyin_main/run_douyin_follow.py --log_type="author" --crawler="douyin" --strategy="抖音定向爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" douyin/logs/nohup-author.log
														
 
															-#  fi
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
														
 
															-#else
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 抖音推荐爬虫策略 进程状态正常" >> ${log_path}
														
 
															-#fi
														
 
															-
														
 
															-## 西瓜定向爬虫策略
														
 
															-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 西瓜定向爬虫策略 进程状态" >> ${log_path}
														
 
															-#ps -ef | grep "run_xigua_follow.py" | grep -v "grep"
														
 
															-#if [ "$?" -eq 1 ];then
														
 
															-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
														
 
															-#  if [ ${env} = "dev" ];then
														
 
															-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./xigua/xigua_main/run_xigua_follow.py --log_type="author" --crawler="xigua" --env="dev" xigua/logs/nohup-follow.log
														
 
															-#  else
														
 
															-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh ./main/main.sh ./xigua/xigua_main/run_xigua_follow.py --log_type="author" --crawler="xigua" --strategy="定向爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" xigua/logs/nohup-follow.log
														
 
															-#  fi
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
														
 
															-#else
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 西瓜定向爬虫策略 进程状态正常" >> ${log_path}
														
 
															-#fi
														
 
															-
														
 
															-## 西瓜推荐榜爬虫策略
														
 
															-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 西瓜推荐榜爬虫策略 进程状态" >> ${log_path}
														
 
															-#ps -ef | grep "run_xigua_recommend.py" | grep -v "grep"
														
 
															-#if [ "$?" -eq 1 ];then
														
 
															-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
														
 
															-#  if [ ${env} = "dev" ];then
														
 
															-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./xigua/xigua_main/run_xigua_recommend.py --log_type="recommend" --crawler="xigua" --env="dev" xigua/logs/nohup-recommend.log
														
 
															-#  else
														
 
															-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh main/scheduling_main.sh ./xigua/xigua_main/run_xigua_recommend.py --log_type="recommend" --crawler="xigua" --env="prod" xigua/logs/nohup-recommend.log
														
 
															-#  fi
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
														
 
															-#else
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 西瓜推荐榜爬虫策略 进程状态正常" >> ${log_path}
														
 
															-#fi
														
 
															-
														
 
															-## 西瓜搜索爬虫策略
														
 
															-#if [[ "$time" > "00:00:00" ]] && [[ "$time" < "00:10:00" ]]; then
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 西瓜搜索爬虫策略 进程状态" >> ${log_path}
														
 
															-#  ps -ef | grep "run_xigua_search_new" | grep -v "grep"
														
 
															-#  if [ "$?" -eq 1 ];then
														
 
															-#    echo "$(date "+%Y-%m-%d %H:%M:%S") 西瓜搜索爬虫策略, 异常停止, 正在重启!" >> ${log_path}
														
 
															-#    if [ ${env} = "dev" ];then
														
 
															-#      cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./xigua/xigua_main/run_xigua_search_new.py --log_type="search" --crawler="xigua" --env="dev" xigua/logs/nohup-search.log
														
 
															-#    else
														
 
															-#      cd ${piaoquan_crawler_dir} && /usr/bin/sh main/scheduling_main.sh ./xigua/xigua_main/run_xigua_search_new.py --log_type="search" --crawler="xigua" --env="prod" xigua/logs/nohup-search.log
														
 
															-#    fi
														
 
															-#    echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
														
 
															-#  else
														
 
															-#    echo "$(date "+%Y-%m-%d %H:%M:%S") 西瓜搜索爬虫策略 进程状态正常" >> ${log_path}
														
 
															-#  fi
														
 
															-#else
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 不在任务启动时间范围: 西瓜搜索爬虫" >> ${log_path}
														
 
															-#fi
														
 
															-
														
 
															 # youtube定向爬虫策略
														
 
															 echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 youtube定向爬虫策略 进程状态" >> ${log_path}
														
 
															 ps -ef | grep "run_youtube_follow.py" | grep -v "grep"
														
@@ -223,35 +39,6 @@ else
 
															   echo "$(date "+%Y-%m-%d %H:%M:%S") youtube定向爬虫策略 进程状态正常" >> ${log_path}
														
 
															 fi
														
 
															-## 本山祝福小程序爬虫
														
 
															-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 本山祝福小程序爬虫 进程状态" >> ${log_path}
														
 
															-#ps -ef | grep "run_benshanzhufu" | grep -v "grep"
														
 
															-#if [ "$?" -eq 1 ];then
														
 
															-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
														
 
															-#  if [ ${env} = "dev" ];then
														
 
															-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./benshanzhufu/benshanzhufu_main/run_benshanzhufu_recommend.py --log_type="recommend" --crawler="benshanzhufu" --env="dev" benshanzhufu/logs/nohup-recommend.log
														
 
															-#  else
														
 
															-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./benshanzhufu/benshanzhufu_main/run_benshanzhufu_recommend.py --log_type="recommend" --crawler="benshanzhufu" --env="prod"  benshanzhufu/logs/nohup-recommend.log
														
 
															-#  fi
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
														
 
															-#else
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 本山祝福小程序爬虫 进程状态正常" >> ${log_path}
														
 
															-#fi
														
 
															-
														
 
															-## 岁岁年年迎福气小程序爬虫
														
 
															-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 岁岁年年迎福气小程序爬虫 进程状态" >> ${log_path}
														
 
															-#ps -ef | grep "run_suisuiniannianyingfuqi" | grep -v "grep"
														
 
															-#if [ "$?" -eq 1 ];then
														
 
															-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
														
 
															-#  if [ ${env} = "dev" ];then
														
 
															-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./suisuiniannianyingfuqi/suisuiniannianyingfuqi_main/run_suisuiniannianyingfuqi_recommend.py --log_type="recommend" --crawler="suisuiniannianyingfuqi" --env="dev" suisuiniannianyingfuqi/logs/nohup-recommend.log
														
 
															-#  else
														
 
															-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./suisuiniannianyingfuqi/suisuiniannianyingfuqi_main/run_suisuiniannianyingfuqi_recommend.py --log_type="recommend" --crawler="suisuiniannianyingfuqi" --env="prod"  suisuiniannianyingfuqi/logs/nohup-recommend.log
														
 
															-#  fi
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
														
 
															-#else
														
 
															-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 岁岁年年迎福气小程序爬虫 进程状态正常" >> ${log_path}
														
 
															-#fi
														
 
															 # 微信指数监控
														
 
															 echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 微信指数 bot 爬虫 进程状态" >> ${log_path}
														
--- a/main/process_mq.sh
+++ b/main/process_mq.sh
@@ -37,7 +37,6 @@ cd ${piaoquan_crawler_dir} && git pull origin master --force
 
															 echo "$(date "+%Y-%m-%d %H:%M:%S") 代码更新完成！" >> ${log_path}
														
 
															 # ====================接入爬虫平台，且调用MQ进程检测====================
														
 
															-# 岁岁年年迎福气
														
 
															 echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 ${crawler}_${log_type} 进程状态" >> ${log_path}
														
 
															 ps -ef | grep "run_${crawler}_${log_type}.py" | grep -v "grep"
														
 
															 if [ "$?" -eq 1 ];then
														
--- a/suisuiniannianyingfuqi/suisuiniannianyingfuqi_main/run_suisuiniannianyingfuqi_recommend.py
+++ b/suisuiniannianyingfuqi/suisuiniannianyingfuqi_main/run_suisuiniannianyingfuqi_recommend.py
@@ -1,24 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/4/13
														
 
															-import argparse
														
 
															-import os
														
 
															-import sys
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.common import Common
														
 
															-from suisuiniannianyingfuqi.suisuiniannianyingfuqi_recommend.suisuiniannianyingfuqi_recommend import SuisuiniannianyingfuqiRecommend
														
 
															-
														
 
															-def main(log_type, crawler, env):
														
 
															-    Common.logger(log_type, crawler).info('开始抓取 岁岁年年迎福气小程序\n')
														
 
															-    SuisuiniannianyingfuqiRecommend.get_videoList(log_type, crawler, env)
														
 
															-    Common.del_logs(log_type, crawler)
														
 
															-    Common.logger(log_type, crawler).info('抓取完一轮\n')
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
														
 
															-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
														
 
															-    parser.add_argument('--crawler')  ## 添加参数
														
 
															-    parser.add_argument('--env')  ## 添加参数
														
 
															-    args = parser.parse_args()  ### 参数赋值，也可以通过终端赋值
														
 
															-    main(log_type=args.log_type, crawler=args.crawler, env=args.env)
														
--- a/suisuiniannianyingfuqi/suisuiniannianyingfuqi_main/run_suisuiniannianyingfuqi_recommend_scheduling.py
+++ b/suisuiniannianyingfuqi/suisuiniannianyingfuqi_main/run_suisuiniannianyingfuqi_recommend_scheduling.py
@@ -1,49 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/4/13
														
 
															-import argparse
														
 
															-import os
														
 
															-import random
														
 
															-import sys
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.public import task_fun
														
 
															-from common.common import Common
														
 
															-from common.scheduling_db import MysqlHelper
														
 
															-from suisuiniannianyingfuqi.suisuiniannianyingfuqi_recommend.suisuiniannianyingfuqi_recommend_scheduling import SuisuiniannianyingfuqiRecommendScheduling
														
 
															-
														
 
															-
														
 
															-def main(log_type, crawler, task, env):
														
 
															-    task_dict = task_fun(task)['task_dict']
														
 
															-    rule_dict = task_fun(task)['rule_dict']
														
 
															-    task_id = task_dict['task_id']
														
 
															-    select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
														
 
															-    user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
														
 
															-    our_uid_list = []
														
 
															-    for user in user_list:
														
 
															-        our_uid_list.append(user["uid"])
														
 
															-    our_uid = random.choice(our_uid_list)
														
 
															-    Common.logger(log_type, crawler).info(f"调度任务:\n{task_dict}")
														
 
															-    Common.logger(log_type, crawler).info(f"抓取规则:\n{rule_dict}")
														
 
															-    Common.logger(log_type, crawler).info(f"用户列表:\n{user_list}")
														
 
															-    Common.logger(log_type, crawler).info('开始抓取 岁岁年年迎福气小程序\n')
														
 
															-    SuisuiniannianyingfuqiRecommendScheduling.get_videoList(log_type=log_type,
														
 
															-                                                            crawler=crawler,
														
 
															-                                                            our_uid=our_uid,
														
 
															-                                                            rule_dict=rule_dict,
														
 
															-                                                            env=env)
														
 
															-    Common.del_logs(log_type, crawler)
														
 
															-    Common.logger(log_type, crawler).info('抓取完一轮\n')
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
														
 
															-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
														
 
															-    parser.add_argument('--crawler')  ## 添加参数
														
 
															-    parser.add_argument('--task')  ## 添加参数
														
 
															-    # parser.add_argument('--oss_endpoint')  ## 添加参数
														
 
															-    parser.add_argument('--env')  ## 添加参数
														
 
															-    args = parser.parse_args()  ### 参数赋值，也可以通过终端赋值
														
 
															-    main(log_type=args.log_type,
														
 
															-         crawler=args.crawler,
														
 
															-         task=args.task,
														
 
															-         env=args.env)
														
--- a/suisuiniannianyingfuqi/suisuiniannianyingfuqi_recommend/suisuiniannianyingfuqi_recommend.py
+++ b/suisuiniannianyingfuqi/suisuiniannianyingfuqi_recommend/suisuiniannianyingfuqi_recommend.py
@@ -1,214 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/4/13
														
 
															-import json
														
 
															-import os
														
 
															-import random
														
 
															-import shutil
														
 
															-import sys
														
 
															-import time
														
 
															-from hashlib import md5
														
 
															-import requests
														
 
															-import urllib3
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.common import Common
														
 
															-from common.feishu import Feishu
														
 
															-from common.publish import Publish
														
 
															-from common.scheduling_db import MysqlHelper
														
 
															-# from common.public import download_rule
														
 
															-
														
 
															-
														
 
															-class SuisuiniannianyingfuqiRecommend:
														
 
															-    platform = "岁岁年年迎福气"
														
 
															-
														
 
															-    @classmethod
														
 
															-    def repeat_video(cls, log_type, crawler, video_id, env):
														
 
															-        sql = f""" select * from crawler_video where platform="岁岁年年迎福气" and out_video_id="{video_id}"; """
														
 
															-        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
														
 
															-        return len(repeat_video)
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_videoList(cls, log_type, crawler, env):
														
 
															-        page = 1
														
 
															-        while True:
														
 
															-            try:
														
 
															-                url = 'https://www.jzkksp.com/index/home/get_home_list.html'
														
 
															-                headers = {
														
 
															-                    'content-type': 'application/x-www-form-urlencoded',
														
 
															-                    'Accept-Encoding': 'gzip,compress,br,deflate',
														
 
															-                    'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_3_1 like Mac OS X) '
														
 
															-                                  'AppleWebKit/605.1.15 (KHTML, like Gecko) '
														
 
															-                                  'Mobile/15E148 MicroMessenger/8.0.25(0x1800192b) NetType/WIFI Language/zh_CN',
														
 
															-                    'Referer': 'https://servicewechat.com/wxd4c54f60812f6f36/1/page-frame.html',
														
 
															-                }
														
 
															-                data = {
														
 
															-                    'token': '851ae159fd33f955bf433e7c47a4a298',
														
 
															-                    'time': '1667905857000',
														
 
															-                    'str_data': 'uT551tU8',
														
 
															-                    'page': str(page),
														
 
															-                    'limit': '10',
														
 
															-                    'appid': 'wxd4c54f60812f6f36',
														
 
															-                    'version': '1.4.1',
														
 
															-                    'openid': 'oDAjy5SCFe7Ml3PNgiow3ncozL1o'
														
 
															-                }
														
 
															-                urllib3.disable_warnings()
														
 
															-                response = requests.post(url=url, headers=headers, data=data, verify=False)
														
 
															-                page += 1
														
 
															-                if response.status_code != 200:
														
 
															-                    Common.logger(log_type, crawler).warning(f'get_videoList:{response.status_code}, {response.text}\n')
														
 
															-                    return
														
 
															-                elif 'data' not in response.json():
														
 
															-                    Common.logger(log_type, crawler).warning(f'get_videoList:{response.status_code}, {response.json()}\n')
														
 
															-                    return
														
 
															-                elif len(response.json()['data']['video_list']['data']) == 0:
														
 
															-                    Common.logger(log_type, crawler).info(f'没有更多数据啦~ {response.json()}\n')
														
 
															-                    return
														
 
															-                else:
														
 
															-                    feeds = response.json()['data']['video_list']['data']
														
 
															-                    for i in range(len(feeds)):
														
 
															-                        try:
														
 
															-                            publish_time_str = feeds[i].get('createtime', '')
														
 
															-                            publish_time_stamp = int(time.mktime(time.strptime(publish_time_str, "%Y-%m-%d")))
														
 
															-                            video_dict = {'video_title': feeds[i].get('title', "").replace("'", "").replace('"', ''),
														
 
															-                                          'video_id': str(feeds[i].get('id', '')),
														
 
															-                                          'play_cnt': feeds[i].get('browse', 0),
														
 
															-                                          'comment_cnt': 0,
														
 
															-                                          'like_cnt': 0,
														
 
															-                                          'share_cnt': 0,
														
 
															-                                          'publish_time_stamp': publish_time_stamp,
														
 
															-                                          'publish_time_str': publish_time_str,
														
 
															-                                          'user_name': "岁岁年年迎福气",
														
 
															-                                          'user_id': "suisuiniannianyingfuqi",
														
 
															-                                          'avatar_url': feeds[i].get('thumb', ''),
														
 
															-                                          'cover_url': feeds[i].get('thumb', ''),
														
 
															-                                          'video_url': feeds[i].get('url', ''),
														
 
															-                                          'session': f"suisuiniannianyingfuqi-{int(time.time())}"}
														
 
															-                            for k, v in video_dict.items():
														
 
															-                                Common.logger(log_type, crawler).info(f"{k}:{v}")
														
 
															-
														
 
															-                            if video_dict["video_id"] == '' or video_dict["video_title"] == '' or video_dict["cover_url"] == '' or video_dict["video_url"] == '':
														
 
															-                                Common.logger(log_type, crawler).info('无效视频\n')
														
 
															-                            elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
														
 
															-                                Common.logger(log_type, crawler).info('视频已下载\n')
														
 
															-                            else:
														
 
															-                                cls.download_publish(log_type, crawler, video_dict, env)
														
 
															-                        except Exception as e:
														
 
															-                            Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
														
 
															-            except Exception as e:
														
 
															-                Common.logger(log_type, crawler).error(f"抓取第{page}页时异常:{e}\n")
														
 
															-
														
 
															-# 下载 / 上传
														
 
															-    @classmethod
														
 
															-    def download_publish(cls, log_type, crawler, video_dict, env):
														
 
															-        # 下载视频
														
 
															-        Common.download_method(log_type=log_type, crawler=crawler, text='video', title=video_dict['video_title'], url=video_dict['video_url'])
														
 
															-        md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
														
 
															-        try:
														
 
															-            if os.path.getsize(f"./{crawler}/videos/{md_title}/video.mp4") == 0:
														
 
															-                # 删除视频文件夹
														
 
															-                shutil.rmtree(f"./{crawler}/videos/{md_title}")
														
 
															-                Common.logger(log_type, crawler).info("视频size=0，删除成功\n")
														
 
															-                return
														
 
															-        except FileNotFoundError:
														
 
															-            # 删除视频文件夹
														
 
															-            shutil.rmtree(f"./{crawler}/videos/{md_title}")
														
 
															-            Common.logger(log_type, crawler).info("视频文件不存在，删除文件夹成功\n")
														
 
															-            return
														
 
															-
														
 
															-        ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
														
 
															-        video_dict["duration"] = ffmpeg_dict["duration"]
														
 
															-        video_dict["video_width"] = ffmpeg_dict["width"]
														
 
															-        video_dict["video_height"] = ffmpeg_dict["height"]
														
 
															-
														
 
															-        # 下载封面
														
 
															-        Common.download_method(log_type=log_type, crawler=crawler, text='cover', title=video_dict['video_title'], url=video_dict['cover_url'])
														
 
															-        # 保存视频信息至txt
														
 
															-        Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
														
 
															-        if env == "dev":
														
 
															-            oss_endpoint = "out"
														
 
															-        else:
														
 
															-            oss_endpoint = "inner"
														
 
															-
														
 
															-        select_user_sql = f"""select * from crawler_user_v3 where source="suisuiniannianyingfuqi" """
														
 
															-        user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
														
 
															-        our_uid_list = []
														
 
															-        for user in user_list:
														
 
															-            our_uid_list.append(user["uid"])
														
 
															-        our_uid = random.choice(our_uid_list)
														
 
															-
														
 
															-        # 上传视频
														
 
															-        Common.logger(log_type, crawler).info("开始上传视频...")
														
 
															-        our_video_id = Publish.upload_and_publish(log_type=log_type,
														
 
															-                                                  crawler=crawler,
														
 
															-                                                  strategy="推荐榜爬虫策略",
														
 
															-                                                  our_uid=our_uid,
														
 
															-                                                  env=env,
														
 
															-                                                  oss_endpoint=oss_endpoint)
														
 
															-        if env == 'dev':
														
 
															-            our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
														
 
															-        else:
														
 
															-            our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
														
 
															-        Common.logger(log_type, crawler).info("视频上传完成")
														
 
															-
														
 
															-        if our_video_id is None:
														
 
															-            try:
														
 
															-                # 删除视频文件夹
														
 
															-                shutil.rmtree(f"./{crawler}/videos/{md_title}")
														
 
															-                return
														
 
															-            except FileNotFoundError:
														
 
															-                return
														
 
															-
														
 
															-        # 视频写入飞书
														
 
															-        Feishu.insert_columns(log_type, crawler, "290bae", "ROWS", 1, 2)
														
 
															-        upload_time = int(time.time())
														
 
															-        values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
														
 
															-                   "推荐榜爬虫策略",
														
 
															-                   video_dict['video_title'],
														
 
															-                   video_dict['video_id'],
														
 
															-                   our_video_link,
														
 
															-                   video_dict['play_cnt'],
														
 
															-                   video_dict['duration'],
														
 
															-                   f"{video_dict['video_width']}*{video_dict['video_height']}",
														
 
															-                   video_dict['cover_url'],
														
 
															-                   video_dict['video_url']]]
														
 
															-        time.sleep(0.5)
														
 
															-        Feishu.update_values(log_type, crawler, "290bae", "F2:Z2", values)
														
 
															-        Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
														
 
															-
														
 
															-        rule_dict = {}
														
 
															-        # 视频信息保存数据库
														
 
															-        insert_sql = f""" insert into crawler_video(video_id,
														
 
															-                                                out_user_id,
														
 
															-                                                platform,
														
 
															-                                                strategy,
														
 
															-                                                out_video_id,
														
 
															-                                                video_title,
														
 
															-                                                cover_url,
														
 
															-                                                video_url,
														
 
															-                                                duration,
														
 
															-                                                publish_time,
														
 
															-                                                play_cnt,
														
 
															-                                                crawler_rule,
														
 
															-                                                width,
														
 
															-                                                height)
														
 
															-                                                values({our_video_id},
														
 
															-                                                "{video_dict['user_id']}",
														
 
															-                                                "{cls.platform}",
														
 
															-                                                "推荐榜爬虫策略",
														
 
															-                                                "{video_dict['video_id']}",
														
 
															-                                                "{video_dict['video_title']}",
														
 
															-                                                "{video_dict['cover_url']}",
														
 
															-                                                "{video_dict['video_url']}",
														
 
															-                                                {int(video_dict['duration'])},
														
 
															-                                                "{video_dict['publish_time_str']}",
														
 
															-                                                {int(video_dict['play_cnt'])},
														
 
															-                                                '{json.dumps(rule_dict)}',
														
 
															-                                                {int(video_dict['video_width'])},
														
 
															-                                                {int(video_dict['video_height'])}) """
														
 
															-        Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
														
 
															-        MysqlHelper.update_values(log_type, crawler, insert_sql, env, action='')
														
 
															-        Common.logger(log_type, crawler).info('视频信息插入数据库成功！\n')
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    pass
														
--- a/weixinzhishu/weixinzhishu_main/weixinzhishu_inner_long.py
+++ b/weixinzhishu/weixinzhishu_main/weixinzhishu_inner_long.py
@@ -1,152 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/2/28
														
 
															-import json
														
 
															-import os
														
 
															-import sys
														
 
															-import time
														
 
															-from datetime import date, timedelta
														
 
															-import requests
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.common import Common
														
 
															-from common.feishu import Feishu
														
 
															-proxies = {"http": None, "https": None}
														
 
															-
														
 
															-
														
 
															-class Test:
														
 
															-    # 获取微信 key / openid
														
 
															-    @classmethod
														
 
															-    def get_wechat_key(cls, log_type, crawler):
														
 
															-        """
														
 
															-        获取微信 key / openid
														
 
															-        https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?sheet=sVL74k
														
 
															-        :param log_type: 日志名
														
 
															-        :param crawler: 哪款爬虫，填写:weixinzhishu
														
 
															-        :return: search_key, openid
														
 
															-        """
														
 
															-        try:
														
 
															-            # while True:
														
 
															-            sheet = Feishu.get_values_batch(log_type, crawler, 'sVL74k')
														
 
															-                # if sheet is None:
														
 
															-                #     Common.logger(log_type, crawler).warning(f"获取热词sheet:{sheet} ，10秒钟后重试")
														
 
															-                #     time.sleep(10)
														
 
															-                # else:
														
 
															-                #     break
														
 
															-            for i in range(len(sheet)):
														
 
															-                search_key = sheet[1][1]
														
 
															-                openid = sheet[1][2]
														
 
															-                return search_key, openid
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f"wechat_key:{e}\n")
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_words(cls, log_type, crawler):
														
 
															-        try:
														
 
															-            while True:
														
 
															-                sheet = Feishu.get_values_batch(log_type, crawler, 'X6K0vN')
														
 
															-                if sheet is None:
														
 
															-                    Common.logger(log_type, crawler).warning(f"获取热词sheet:{sheet} ，10秒钟后重试")
														
 
															-                    time.sleep(10)
														
 
															-                else:
														
 
															-                    break
														
 
															-            word_list = []
														
 
															-            for i in range(len(sheet)):
														
 
															-                word_dict = {
														
 
															-                    "title": sheet[i][0],
														
 
															-                    "word": sheet[i][1]
														
 
															-                }
														
 
															-                word_list.append(word_dict)
														
 
															-            return word_list
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f"get_words:{e}\n")
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_score_test(cls, log_type, crawler):
														
 
															-
														
 
															-        start_ymd = (date.today() + timedelta(days=-7)).strftime("%Y%m%d")
														
 
															-        end_ymd = (date.today() + timedelta(days=0)).strftime("%Y%m%d")
														
 
															-
														
 
															-        word_list = cls.get_words(log_type, crawler)
														
 
															-        for i in range(len(word_list)):
														
 
															-            Common.logger(log_type, crawler).info(f"热词: {word_list[i]['word']}")
														
 
															-            while True:
														
 
															-                wechat_key = cls.get_wechat_key(log_type, crawler)
														
 
															-                if wechat_key is None:
														
 
															-                    Common.logger(log_type, crawler).info(
														
 
															-                        f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time())))} auth 过期，休眠 10 秒，重新获取")
														
 
															-                    time.sleep(10)
														
 
															-                    continue
														
 
															-
														
 
															-                search_key = wechat_key[0]
														
 
															-                openid = wechat_key[-1]
														
 
															-                url = "https://search.weixin.qq.com/cgi-bin/wxaweb/wxindex"
														
 
															-                payload = json.dumps({
														
 
															-                    "openid": openid,
														
 
															-                    "search_key": search_key,
														
 
															-                    "cgi_name": "GetDefaultIndex",
														
 
															-                    "start_ymd": start_ymd,
														
 
															-                    "end_ymd": end_ymd,
														
 
															-                    "query": word_list[i]['word']
														
 
															-                })
														
 
															-                headers = {
														
 
															-                    'Host': 'search.weixin.qq.com',
														
 
															-                    'content-type': 'application/json',
														
 
															-                    'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.32(0x1800202a) NetType/WIFI Language/zh_CN',
														
 
															-                    'Referer': 'https://servicewechat.com/wxc026e7662ec26a3a/42/page-frame.html'
														
 
															-                }
														
 
															-                response = requests.request("POST", url, headers=headers, data=payload, proxies=proxies)
														
 
															-                if response.json()['code'] == -10000:
														
 
															-                    Common.logger(log_type, crawler).info(f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time())))} response:{response.json()['msg']} 休眠 10 秒，重新获取")
														
 
															-                    time.sleep(10)
														
 
															-                    continue
														
 
															-
														
 
															-                wechat_score_list = []
														
 
															-                word_wechat_score_dict = {
														
 
															-                    "id": i+1,
														
 
															-                    "word": word_list[i]['word'],
														
 
															-                    "wechatScores": wechat_score_list,
														
 
															-                }
														
 
															-                if response.json()['code'] == -10002:
														
 
															-                    Common.logger(log_type, crawler).info("该词暂未收录")
														
 
															-                    # # 写飞书
														
 
															-                    # if word_list[i]['word'] in [x for y in Feishu.get_values_batch(log_type, crawler, "JpgyAv") for x in y]:
														
 
															-                    #     Common.logger(log_type, crawler).info("该词已存在")
														
 
															-                    #     continue
														
 
															-                    Feishu.insert_columns(log_type, crawler, "JpgyAv", "ROWS", 1, 2)
														
 
															-                    time.sleep(0.5)
														
 
															-                    Feishu.update_values(log_type, crawler, "JpgyAv", "F2:Z2",
														
 
															-                                         [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
														
 
															-                                           word_list[i]['title'],
														
 
															-                                           word_list[i]['word'],
														
 
															-                                           "",
														
 
															-                                           "该词暂未收录"]])
														
 
															-                    Common.logger(log_type, crawler).info("写入飞书成功\n")
														
 
															-                elif response.json()['code'] != 0:
														
 
															-                    Common.logger(log_type, crawler).warning(f"{word_wechat_score_dict}")
														
 
															-                    continue
														
 
															-                else:
														
 
															-                    time_index = response.json()['content']['resp_list'][0]['indexes'][0]['time_indexes']
														
 
															-                    for x in range(len(time_index)):
														
 
															-                        Common.logger(log_type, crawler).info(f"正在更新 {word_list[i]['word']}")
														
 
															-                        score_time = time_index[x]['time']
														
 
															-                        score_time_str = f"{str(score_time)[:4]}-{str(score_time)[4:6]}-{str(score_time)[6:]}"
														
 
															-                        score = time_index[x]['score']
														
 
															-                        wechat_score_dict = {"score": score, "scoreDate": score_time_str}
														
 
															-                        wechat_score_list.append(wechat_score_dict)
														
 
															-                        Common.logger(log_type, crawler).info(f"wechat_score_dict:{wechat_score_dict}")
														
 
															-                        Feishu.insert_columns(log_type, crawler, "JpgyAv", "ROWS", 1, 2)
														
 
															-                        time.sleep(1)
														
 
															-                        Feishu.update_values(log_type, crawler, "JpgyAv", "F2:Z2", [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
														
 
															-                                                                               word_list[i]['title'],
														
 
															-                                                                               word_list[i]['word'],
														
 
															-                                                                               score_time_str,
														
 
															-                                                                               score]])
														
 
															-                        Common.logger(log_type, crawler).info("写入飞书成功\n")
														
 
															-                break
														
 
															-
														
 
															-        Feishu.bot(log_type, "weixinzhishu_inner_long", "微信指数_站内长期指数抓取完毕")
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    Test.get_score_test("inner-long", "weixinzhishu")
														
 
															-    pass
														
--- a/weixinzhishu/weixinzhishu_main/weixinzhishu_inner_sort.py
+++ b/weixinzhishu/weixinzhishu_main/weixinzhishu_inner_sort.py
@@ -1,152 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/2/28
														
 
															-import json
														
 
															-import os
														
 
															-import sys
														
 
															-import time
														
 
															-from datetime import date, timedelta
														
 
															-import requests
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.common import Common
														
 
															-from common.feishu import Feishu
														
 
															-proxies = {"http": None, "https": None}
														
 
															-
														
 
															-
														
 
															-class Test:
														
 
															-    # 获取微信 key / openid
														
 
															-    @classmethod
														
 
															-    def get_wechat_key(cls, log_type, crawler):
														
 
															-        """
														
 
															-        获取微信 key / openid
														
 
															-        https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?sheet=sVL74k
														
 
															-        :param log_type: 日志名
														
 
															-        :param crawler: 哪款爬虫，填写:weixinzhishu
														
 
															-        :return: search_key, openid
														
 
															-        """
														
 
															-        try:
														
 
															-            # while True:
														
 
															-            sheet = Feishu.get_values_batch(log_type, crawler, 'sVL74k')
														
 
															-                # if sheet is None:
														
 
															-                #     Common.logger(log_type, crawler).warning(f"获取热词sheet:{sheet} ，10秒钟后重试")
														
 
															-                #     time.sleep(10)
														
 
															-                # else:
														
 
															-                #     break
														
 
															-            for i in range(len(sheet)):
														
 
															-                search_key = sheet[1][1]
														
 
															-                openid = sheet[1][2]
														
 
															-                return search_key, openid
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f"wechat_key:{e}\n")
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_words(cls, log_type, crawler):
														
 
															-        try:
														
 
															-            while True:
														
 
															-                sheet = Feishu.get_values_batch(log_type, crawler, 'D9IqTp')
														
 
															-                if sheet is None:
														
 
															-                    Common.logger(log_type, crawler).warning(f"获取热词sheet:{sheet} ，10秒钟后重试")
														
 
															-                    time.sleep(10)
														
 
															-                else:
														
 
															-                    break
														
 
															-            word_list = []
														
 
															-            for i in range(len(sheet)):
														
 
															-                word_dict = {
														
 
															-                    "title": sheet[i][0],
														
 
															-                    "word": sheet[i][1]
														
 
															-                }
														
 
															-                word_list.append(word_dict)
														
 
															-            return word_list
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f"get_words:{e}\n")
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_score_test(cls, log_type, crawler):
														
 
															-
														
 
															-        start_ymd = (date.today() + timedelta(days=-7)).strftime("%Y%m%d")
														
 
															-        end_ymd = (date.today() + timedelta(days=0)).strftime("%Y%m%d")
														
 
															-
														
 
															-        word_list = cls.get_words(log_type, crawler)
														
 
															-        for i in range(len(word_list)):
														
 
															-            Common.logger(log_type, crawler).info(f"热词: {word_list[i]['word']}")
														
 
															-            while True:
														
 
															-                wechat_key = cls.get_wechat_key(log_type, crawler)
														
 
															-                if wechat_key is None:
														
 
															-                    Common.logger(log_type, crawler).info(
														
 
															-                        f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time())))} auth 过期，休眠 10 秒，重新获取")
														
 
															-                    time.sleep(10)
														
 
															-                    continue
														
 
															-
														
 
															-                search_key = wechat_key[0]
														
 
															-                openid = wechat_key[-1]
														
 
															-                url = "https://search.weixin.qq.com/cgi-bin/wxaweb/wxindex"
														
 
															-                payload = json.dumps({
														
 
															-                    "openid": openid,
														
 
															-                    "search_key": search_key,
														
 
															-                    "cgi_name": "GetDefaultIndex",
														
 
															-                    "start_ymd": start_ymd,
														
 
															-                    "end_ymd": end_ymd,
														
 
															-                    "query": word_list[i]['word']
														
 
															-                })
														
 
															-                headers = {
														
 
															-                    'Host': 'search.weixin.qq.com',
														
 
															-                    'content-type': 'application/json',
														
 
															-                    'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.32(0x1800202a) NetType/WIFI Language/zh_CN',
														
 
															-                    'Referer': 'https://servicewechat.com/wxc026e7662ec26a3a/42/page-frame.html'
														
 
															-                }
														
 
															-                response = requests.request("POST", url, headers=headers, data=payload, proxies=proxies)
														
 
															-                if response.json()['code'] == -10000:
														
 
															-                    Common.logger(log_type, crawler).info(f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time())))} response:{response.json()['msg']} 休眠 10 秒，重新获取")
														
 
															-                    time.sleep(10)
														
 
															-                    continue
														
 
															-
														
 
															-                wechat_score_list = []
														
 
															-                word_wechat_score_dict = {
														
 
															-                    "id": i+1,
														
 
															-                    "word": word_list[i]['word'],
														
 
															-                    "wechatScores": wechat_score_list,
														
 
															-                }
														
 
															-                if response.json()['code'] == -10002:
														
 
															-                    Common.logger(log_type, crawler).info("该词暂未收录")
														
 
															-                    # # 写飞书
														
 
															-                    # if word_list[i]['word'] in [x for y in Feishu.get_values_batch(log_type, crawler, "DrZHpa") for x in y]:
														
 
															-                    #     Common.logger(log_type, crawler).info("该词已存在")
														
 
															-                    #     continue
														
 
															-                    Feishu.insert_columns(log_type, crawler, "DrZHpa", "ROWS", 1, 2)
														
 
															-                    time.sleep(0.5)
														
 
															-                    Feishu.update_values(log_type, crawler, "DrZHpa", "F2:Z2",
														
 
															-                                         [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
														
 
															-                                           word_list[i]['title'],
														
 
															-                                           word_list[i]['word'],
														
 
															-                                           "",
														
 
															-                                           "该词暂未收录"]])
														
 
															-                    Common.logger(log_type, crawler).info("写入飞书成功\n")
														
 
															-                elif response.json()['code'] != 0:
														
 
															-                    Common.logger(log_type, crawler).warning(f"{word_wechat_score_dict}")
														
 
															-                    continue
														
 
															-                else:
														
 
															-                    time_index = response.json()['content']['resp_list'][0]['indexes'][0]['time_indexes']
														
 
															-                    for x in range(len(time_index)):
														
 
															-                        Common.logger(log_type, crawler).info(f"正在更新 {word_list[i]['word']}")
														
 
															-                        score_time = time_index[x]['time']
														
 
															-                        score_time_str = f"{str(score_time)[:4]}-{str(score_time)[4:6]}-{str(score_time)[6:]}"
														
 
															-                        score = time_index[x]['score']
														
 
															-                        wechat_score_dict = {"score": score, "scoreDate": score_time_str}
														
 
															-                        wechat_score_list.append(wechat_score_dict)
														
 
															-                        Common.logger(log_type, crawler).info(f"wechat_score_dict:{wechat_score_dict}")
														
 
															-                        Feishu.insert_columns(log_type, crawler, "DrZHpa", "ROWS", 1, 2)
														
 
															-                        time.sleep(1)
														
 
															-                        Feishu.update_values(log_type, crawler, "DrZHpa", "F2:Z2", [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
														
 
															-                                                                                     word_list[i]['title'],
														
 
															-                                                                                     word_list[i]['word'],
														
 
															-                                                                                     score_time_str,
														
 
															-                                                                                     score]])
														
 
															-                        Common.logger(log_type, crawler).info("写入飞书成功\n")
														
 
															-                break
														
 
															-
														
 
															-        Feishu.bot(log_type, "weixinzhishu_inner_sort", "微信指数_站内短期指数抓取完毕")
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    Test.get_score_test("inner-sort", "weixinzhishu")
														
 
															-    pass
														
--- a/weixinzhishu/weixinzhishu_main/weixinzhishu_out.py
+++ b/weixinzhishu/weixinzhishu_main/weixinzhishu_out.py
@@ -1,153 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/2/28
														
 
															-import json
														
 
															-import os
														
 
															-import sys
														
 
															-import time
														
 
															-from datetime import date, timedelta
														
 
															-import requests
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.common import Common
														
 
															-from common.feishu import Feishu
														
 
															-proxies = {"http": None, "https": None}
														
 
															-
														
 
															-
														
 
															-class Test:
														
 
															-    # 获取微信 key / openid
														
 
															-    @classmethod
														
 
															-    def get_wechat_key(cls, log_type, crawler):
														
 
															-        """
														
 
															-        获取微信 key / openid
														
 
															-        https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?sheet=sVL74k
														
 
															-        :param log_type: 日志名
														
 
															-        :param crawler: 哪款爬虫，填写:weixinzhishu
														
 
															-        :return: search_key, openid
														
 
															-        """
														
 
															-        try:
														
 
															-            # while True:
														
 
															-            sheet = Feishu.get_values_batch(log_type, crawler, 'sVL74k')
														
 
															-                # if sheet is None:
														
 
															-                #     Common.logger(log_type, crawler).warning(f"获取热词sheet:{sheet} ，10秒钟后重试")
														
 
															-                #     time.sleep(10)
														
 
															-                # else:
														
 
															-                #     break
														
 
															-            for i in range(len(sheet)):
														
 
															-                search_key = sheet[1][1]
														
 
															-                openid = sheet[1][2]
														
 
															-                return search_key, openid
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f"wechat_key:{e}\n")
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_words(cls, log_type, crawler):
														
 
															-        try:
														
 
															-            while True:
														
 
															-                sheet = Feishu.get_values_batch(log_type, crawler, 'MvFi8s')
														
 
															-                if sheet is None:
														
 
															-                    Common.logger(log_type, crawler).warning(f"获取热词sheet:{sheet} ，10秒钟后重试")
														
 
															-                    time.sleep(10)
														
 
															-                else:
														
 
															-                    break
														
 
															-            word_list = []
														
 
															-            for i in range(len(sheet)):
														
 
															-                word_dict = {
														
 
															-                    "title": sheet[i][0],
														
 
															-                    "word": sheet[i][1]
														
 
															-                }
														
 
															-                word_list.append(word_dict)
														
 
															-            return word_list
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f"get_words:{e}\n")
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_score_test(cls, log_type, crawler):
														
 
															-
														
 
															-        start_ymd = (date.today() + timedelta(days=-7)).strftime("%Y%m%d")
														
 
															-        end_ymd = (date.today() + timedelta(days=0)).strftime("%Y%m%d")
														
 
															-
														
 
															-        word_list = cls.get_words(log_type, crawler)
														
 
															-        for i in range(len(word_list)):
														
 
															-            Common.logger(log_type, crawler).info(f"热词: {word_list[i]['word']}")
														
 
															-            while True:
														
 
															-                wechat_key = cls.get_wechat_key(log_type, crawler)
														
 
															-                if wechat_key is None:
														
 
															-                    Common.logger(log_type, crawler).info(
														
 
															-                        f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time())))} auth 过期，休眠 10 秒，重新获取")
														
 
															-                    time.sleep(10)
														
 
															-                    continue
														
 
															-
														
 
															-                search_key = wechat_key[0]
														
 
															-                openid = wechat_key[-1]
														
 
															-                url = "https://search.weixin.qq.com/cgi-bin/wxaweb/wxindex"
														
 
															-                payload = json.dumps({
														
 
															-                    "openid": openid,
														
 
															-                    "search_key": search_key,
														
 
															-                    "cgi_name": "GetDefaultIndex",
														
 
															-                    "start_ymd": start_ymd,
														
 
															-                    "end_ymd": end_ymd,
														
 
															-                    "query": word_list[i]['word']
														
 
															-                })
														
 
															-                headers = {
														
 
															-                    'Host': 'search.weixin.qq.com',
														
 
															-                    'content-type': 'application/json',
														
 
															-                    'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.32(0x1800202a) NetType/WIFI Language/zh_CN',
														
 
															-                    'Referer': 'https://servicewechat.com/wxc026e7662ec26a3a/42/page-frame.html'
														
 
															-                }
														
 
															-                response = requests.request("POST", url, headers=headers, data=payload, proxies=proxies)
														
 
															-                if response.json()['code'] == -10000:
														
 
															-                    Common.logger(log_type, crawler).info(f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time())))} response:{response.json()['msg']} 休眠 10 秒，重新获取")
														
 
															-                    time.sleep(10)
														
 
															-                    continue
														
 
															-
														
 
															-                wechat_score_list = []
														
 
															-                word_wechat_score_dict = {
														
 
															-                    "id": i+1,
														
 
															-                    "word": word_list[i]['word'],
														
 
															-                    "wechatScores": wechat_score_list,
														
 
															-                }
														
 
															-                if response.json()['code'] == -10002:
														
 
															-                    Common.logger(log_type, crawler).info("该词暂未收录")
														
 
															-                    # 写飞书
														
 
															-                    # if word_list[i]['word'] in [x for y in Feishu.get_values_batch(log_type, crawler, "YVuVgQ") for x in y]:
														
 
															-                    #     Common.logger(log_type, crawler).info("该词已存在")
														
 
															-                    #     continue
														
 
															-                    Feishu.insert_columns(log_type, crawler, "YVuVgQ", "ROWS", 1, 2)
														
 
															-                    time.sleep(0.5)
														
 
															-                    Feishu.update_values(log_type, crawler, "YVuVgQ", "F2:Z2",
														
 
															-                                         [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
														
 
															-                                           word_list[i]['title'],
														
 
															-                                           word_list[i]['word'],
														
 
															-                                           "",
														
 
															-                                           "该词暂未收录"]])
														
 
															-                    Common.logger(log_type, crawler).info("写入飞书成功\n")
														
 
															-                elif response.json()['code'] != 0:
														
 
															-                    Common.logger(log_type, crawler).warning(f"{word_wechat_score_dict}")
														
 
															-                    continue
														
 
															-                else:
														
 
															-                    time_index = response.json()['content']['resp_list'][0]['indexes'][0]['time_indexes']
														
 
															-                    for x in range(len(time_index)):
														
 
															-                        Common.logger(log_type, crawler).info(f"正在更新 {word_list[i]['word']}")
														
 
															-                        score_time = time_index[x]['time']
														
 
															-                        score_time_str = f"{str(score_time)[:4]}-{str(score_time)[4:6]}-{str(score_time)[6:]}"
														
 
															-                        score = time_index[x]['score']
														
 
															-                        wechat_score_dict = {"score": score, "scoreDate": score_time_str}
														
 
															-                        wechat_score_list.append(wechat_score_dict)
														
 
															-                        Common.logger(log_type, crawler).info(f"wechat_score_dict:{wechat_score_dict}")
														
 
															-                        Feishu.insert_columns(log_type, crawler, "YVuVgQ", "ROWS", 1, 2)
														
 
															-                        time.sleep(1)
														
 
															-                        Feishu.update_values(log_type, crawler, "YVuVgQ", "F2:Z2", [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
														
 
															-                                                                                     word_list[i]['title'],
														
 
															-                                                                                     word_list[i]['word'],
														
 
															-                                                                                     score_time_str,
														
 
															-                                                                                     score]])
														
 
															-                        Common.logger(log_type, crawler).info("写入飞书成功\n")
														
 
															-                break
														
 
															-
														
 
															-        Feishu.bot(log_type, "weixinzhishu_out", "微信指数_站外指数抓取完毕")
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    # print(Test.get_words("test", "weixinzhishu"))
														
 
															-    Test.get_score_test("out", "weixinzhishu")
														
 
															-    pass
														
--- a/xiaoniangao/xiaoniangao_follow/__init__.py
+++ b/xiaoniangao/xiaoniangao_follow/__init__.py
@@ -1,3 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/3/13
														
--- a/xiaoniangao/xiaoniangao_follow/xiaoniangao_follow.py
+++ b/xiaoniangao/xiaoniangao_follow/xiaoniangao_follow.py
--- a/xiaoniangao/xiaoniangao_hour/xiaoniangao_hour.py
+++ b/xiaoniangao/xiaoniangao_hour/xiaoniangao_hour.py
@@ -1,685 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/3/15
														
 
															-import datetime
														
 
															-import json
														
 
															-import os
														
 
															-import random
														
 
															-import shutil
														
 
															-import sys
														
 
															-import time
														
 
															-import requests
														
 
															-import urllib3
														
 
															-
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.common import Common
														
 
															-from common.feishu import Feishu
														
 
															-from common.publish import Publish
														
 
															-from common.scheduling_db import MysqlHelper
														
 
															-from common.public import get_config_from_mysql
														
 
															-
														
 
															-proxies = {"http": None, "https": None}
														
 
															-
														
 
															-
														
 
															-class XiaoniangaoHour:
														
 
															-    platform = "小年糕"
														
 
															-
														
 
															-    words = "abcdefghijklmnopqrstuvwxyz0123456789"
														
 
															-    uid = f"""{"".join(random.sample(words, 8))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 12))}"""
														
 
															-    token = "".join(random.sample(words, 32))
														
 
															-    uid_token_dict = {
														
 
															-        "uid": uid,
														
 
															-        "token": token
														
 
															-    }
														
 
															-
														
 
															-    # 生成 uid、token
														
 
															-    @classmethod
														
 
															-    def get_uid_token(cls):
														
 
															-        words = "abcdefghijklmnopqrstuvwxyz0123456789"
														
 
															-        uid = f"""{"".join(random.sample(words, 8))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 12))}"""
														
 
															-        token = "".join(random.sample(words, 32))
														
 
															-        uid_token_dict = {
														
 
															-            "uid": uid,
														
 
															-            "token": token
														
 
															-        }
														
 
															-        return uid_token_dict
														
 
															-
														
 
															-    # 基础门槛规则
														
 
															-    @staticmethod
														
 
															-    def download_rule(video_dict):
														
 
															-        """
														
 
															-        下载视频的基本规则
														
 
															-        :param video_dict: 视频信息，字典格式
														
 
															-        :return: 满足规则，返回 True；反之，返回 False
														
 
															-        """
														
 
															-        # 视频时长
														
 
															-        if int(float(video_dict["duration"])) >= 40:
														
 
															-            # 宽或高
														
 
															-            if int(video_dict["video_width"]) >= 0 or int(video_dict["video_height"]) >= 0:
														
 
															-                # 播放量
														
 
															-                if int(video_dict["play_cnt"]) >= 4000:
														
 
															-                    # 点赞量
														
 
															-                    if int(video_dict["like_cnt"]) >= 0:
														
 
															-                        # 分享量
														
 
															-                        if int(video_dict["share_cnt"]) >= 0:
														
 
															-                            # 发布时间 <= 10 天
														
 
															-                            if int(time.time()) - int(video_dict["publish_time_stamp"]) <= 3600 * 24 * 10:
														
 
															-                                return True
														
 
															-                            else:
														
 
															-                                return False
														
 
															-                        else:
														
 
															-                            return False
														
 
															-                    else:
														
 
															-                        return False
														
 
															-                else:
														
 
															-                    return False
														
 
															-            return False
														
 
															-        return False
														
 
															-
														
 
															-    # 获取表情及符号
														
 
															-    @classmethod
														
 
															-    def get_expression(cls):
														
 
															-        # 表情列表
														
 
															-        expression_list = ['📍', '⭕️', '🔥', '📣', '🎈', '⚡', '🔔', '🚩', '💢', '💎', '👉', '💓', '❗️', '🔴', '🔺', '♦️', '♥️', '👉',
														
 
															-                           '👈', '🏆', '❤️\u200d🔥']
														
 
															-        # 符号列表
														
 
															-        char_list = ['...', '~~']
														
 
															-        return expression_list, char_list
														
 
															-
														
 
															-    @classmethod
														
 
															-    def repeat_video(cls, log_type, crawler, video_id, env):
														
 
															-        sql = f""" select * from crawler_video where platform="小年糕" and out_video_id="{video_id}"; """
														
 
															-        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
														
 
															-        return len(repeat_video)
														
 
															-
														
 
															-    @classmethod
														
 
															-    def repeat_hour(cls, log_type, crawler, video_id, env):
														
 
															-        sql = f""" select * from crawler_xiaoniangao_hour where platform="小年糕" and out_video_id="{video_id}"; """
														
 
															-        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
														
 
															-        return len(repeat_video)
														
 
															-
														
 
															-    # 获取列表
														
 
															-    @classmethod
														
 
															-    def get_videoList(cls, log_type, crawler, env):
														
 
															-        # try:
														
 
															-        uid_token_dict = cls.uid_token_dict
														
 
															-        url = "https://kapi.xiaoniangao.cn/trends/get_recommend_trends"
														
 
															-        headers = {
														
 
															-            # "x-b3-traceid": cls.hour_x_b3_traceid,
														
 
															-            "x-b3-traceid": '1c403a4aa72e3c',
														
 
															-            # "X-Token-Id": cls.hour_x_token_id,
														
 
															-            "X-Token-Id": 'ab619e96d801f1567388629260aa68ec-1202200806',
														
 
															-            # "uid": cls.hour_uid,
														
 
															-            "uid": uid_token_dict['uid'],
														
 
															-            "content-type": "application/json",
														
 
															-            "Accept-Encoding": "gzip,compress,br,deflate",
														
 
															-            "User-Agent": 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)'
														
 
															-                          ' AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 '
														
 
															-                          'MicroMessenger/8.0.20(0x18001432) NetType/WIFI Language/zh_CN',
														
 
															-            # "Referer": cls.hour_referer
														
 
															-            "Referer": 'https://servicewechat.com/wxd7911e4c177690e4/624/page-frame.html'
														
 
															-        }
														
 
															-        data = {
														
 
															-            "log_params": {
														
 
															-                "page": "discover_rec",
														
 
															-                "common": {
														
 
															-                    "brand": "iPhone",
														
 
															-                    "device": "iPhone 11",
														
 
															-                    "os": "iOS 14.7.1",
														
 
															-                    "weixinver": "8.0.20",
														
 
															-                    "srcver": "2.24.2",
														
 
															-                    "net": "wifi",
														
 
															-                    "scene": 1089
														
 
															-                }
														
 
															-            },
														
 
															-            "qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!750x500r/crop/750x500/interlace/1/format/jpg",
														
 
															-            "h_qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!80x80r/crop/80x80/interlace/1/format/jpg",
														
 
															-            "share_width": 625,
														
 
															-            "share_height": 500,
														
 
															-            "ext": {
														
 
															-                "fmid": 0,
														
 
															-                "items": {}
														
 
															-            },
														
 
															-            "app": "xng",
														
 
															-            "rec_scene": "discover_rec",
														
 
															-            "log_common_params": {
														
 
															-                "e": [{
														
 
															-                    "data": {
														
 
															-                        "page": "discoverIndexPage",
														
 
															-                        "topic": "recommend"
														
 
															-                    },
														
 
															-                    "ab": {}
														
 
															-                }],
														
 
															-                "ext": {
														
 
															-                    "brand": "iPhone",
														
 
															-                    "device": "iPhone 11",
														
 
															-                    "os": "iOS 14.7.1",
														
 
															-                    "weixinver": "8.0.20",
														
 
															-                    "srcver": "2.24.3",
														
 
															-                    "net": "wifi",
														
 
															-                    "scene": "1089"
														
 
															-                },
														
 
															-                "pj": "1",
														
 
															-                "pf": "2",
														
 
															-                "session_id": "7bcce313-b57d-4305-8d14-6ebd9a1bad29"
														
 
															-            },
														
 
															-            "refresh": False,
														
 
															-            "token": uid_token_dict["token"],
														
 
															-            "uid": uid_token_dict["uid"],
														
 
															-            "proj": "ma",
														
 
															-            "wx_ver": "8.0.20",
														
 
															-            "code_ver": "3.62.0"
														
 
															-        }
														
 
															-        urllib3.disable_warnings()
														
 
															-        r = requests.post(url=url, headers=headers, json=data, proxies=proxies, verify=False)
														
 
															-        if 'data' not in r.text or r.status_code != 200:
														
 
															-            Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
														
 
															-            return
														
 
															-        elif "data" not in r.json():
														
 
															-            Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()}\n")
														
 
															-            return
														
 
															-        elif "list" not in r.json()["data"]:
														
 
															-            Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']}\n")
														
 
															-            return
														
 
															-        elif len(r.json()['data']['list']) == 0:
														
 
															-            Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']['list']}\n")
														
 
															-            return
														
 
															-        else:
														
 
															-            # 视频列表数据
														
 
															-            feeds = r.json()["data"]["list"]
														
 
															-            for i in range(len(feeds)):
														
 
															-                # 标题，表情随机加在片头、片尾，或替代句子中间的标点符号
														
 
															-                if "title" in feeds[i]:
														
 
															-                    befor_video_title = feeds[i]["title"].strip().replace("\n", "") \
														
 
															-                        .replace("/", "").replace("\r", "").replace("#", "") \
														
 
															-                        .replace(".", "。").replace("\\", "").replace("&NBSP", "") \
														
 
															-                        .replace(":", "").replace("*", "").replace("？", "") \
														
 
															-                        .replace("?", "").replace('"', "").replace("<", "") \
														
 
															-                        .replace(">", "").replace("|", "").replace(" ", "").replace("#表情", "").replace("#符号","").replace(
														
 
															-                        '"', '').replace("'", '').replace('"', '').replace("'", '')
														
 
															-
														
 
															-                    expression = cls.get_expression()
														
 
															-                    expression_list = expression[0]
														
 
															-                    char_list = expression[1]
														
 
															-                    # 随机取一个表情
														
 
															-                    expression = random.choice(expression_list)
														
 
															-                    # 生成标题list[表情+title, title+表情]
														
 
															-                    expression_title_list = [expression + befor_video_title, befor_video_title + expression]
														
 
															-                    # 从标题list中随机取一个标题
														
 
															-                    title_list1 = random.choice(expression_title_list)
														
 
															-                    # 生成标题:原标题+符号
														
 
															-                    title_list2 = befor_video_title + random.choice(char_list)
														
 
															-                    # 表情和标题组合，与标题和符号组合，汇总成待使用的标题列表
														
 
															-                    title_list4 = [title_list2, title_list1]
														
 
															-                    # 最终标题
														
 
															-                    video_title = random.choice(title_list4)
														
 
															-                else:
														
 
															-                    video_title = 0
														
 
															-
														
 
															-                # 视频 ID
														
 
															-                if "vid" in feeds[i]:
														
 
															-                    video_id = feeds[i]["vid"]
														
 
															-                else:
														
 
															-                    video_id = 0
														
 
															-
														
 
															-                # 播放量
														
 
															-                if "play_pv" in feeds[i]:
														
 
															-                    video_play_cnt = feeds[i]["play_pv"]
														
 
															-                else:
														
 
															-                    video_play_cnt = 0
														
 
															-
														
 
															-                # 点赞量
														
 
															-                if "favor" in feeds[i]:
														
 
															-                    video_like_cnt = feeds[i]["favor"]["total"]
														
 
															-                else:
														
 
															-                    video_like_cnt = 0
														
 
															-
														
 
															-                # 评论数
														
 
															-                if "comment_count" in feeds[i]:
														
 
															-                    video_comment_cnt = feeds[i]["comment_count"]
														
 
															-                else:
														
 
															-                    video_comment_cnt = 0
														
 
															-
														
 
															-                # 分享量
														
 
															-                if "share" in feeds[i]:
														
 
															-                    video_share_cnt = feeds[i]["share"]
														
 
															-                else:
														
 
															-                    video_share_cnt = 0
														
 
															-
														
 
															-                # 时长
														
 
															-                if "du" in feeds[i]:
														
 
															-                    video_duration = int(feeds[i]["du"] / 1000)
														
 
															-                else:
														
 
															-                    video_duration = 0
														
 
															-
														
 
															-                # 宽和高
														
 
															-                if "w" or "h" in feeds[i]:
														
 
															-                    video_width = feeds[i]["w"]
														
 
															-                    video_height = feeds[i]["h"]
														
 
															-                else:
														
 
															-                    video_width = 0
														
 
															-                    video_height = 0
														
 
															-
														
 
															-                # 发布时间
														
 
															-                if "t" in feeds[i]:
														
 
															-                    video_send_time = feeds[i]["t"]
														
 
															-                else:
														
 
															-                    video_send_time = 0
														
 
															-                publish_time_stamp = int(int(video_send_time) / 1000)
														
 
															-                publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
														
 
															-
														
 
															-                # 用户名 / 头像
														
 
															-                if "user" in feeds[i]:
														
 
															-                    user_name = feeds[i]["user"]["nick"].strip().replace("\n", "") \
														
 
															-                        .replace("/", "").replace("快手", "").replace(" ", "") \
														
 
															-                        .replace(" ", "").replace("&NBSP", "").replace("\r", "")
														
 
															-                    head_url = feeds[i]["user"]["hurl"]
														
 
															-                else:
														
 
															-                    user_name = 0
														
 
															-                    head_url = 0
														
 
															-
														
 
															-                # 用户 ID
														
 
															-                profile_id = feeds[i]["id"]
														
 
															-
														
 
															-                # 用户 mid
														
 
															-                profile_mid = feeds[i]["user"]["mid"]
														
 
															-
														
 
															-                # 视频封面
														
 
															-                if "url" in feeds[i]:
														
 
															-                    cover_url = feeds[i]["url"]
														
 
															-                else:
														
 
															-                    cover_url = 0
														
 
															-
														
 
															-                # 视频播放地址
														
 
															-                if "v_url" in feeds[i]:
														
 
															-                    video_url = feeds[i]["v_url"]
														
 
															-                else:
														
 
															-                    video_url = 0
														
 
															-
														
 
															-                video_dict = {
														
 
															-                    "video_title": video_title,
														
 
															-                    "video_id": video_id,
														
 
															-                    "duration": video_duration,
														
 
															-                    "play_cnt": video_play_cnt,
														
 
															-                    "like_cnt": video_like_cnt,
														
 
															-                    "comment_cnt": video_comment_cnt,
														
 
															-                    "share_cnt": video_share_cnt,
														
 
															-                    "user_name": user_name,
														
 
															-                    "publish_time_stamp": publish_time_stamp,
														
 
															-                    "publish_time_str": publish_time_str,
														
 
															-                    "video_width": video_width,
														
 
															-                    "video_height": video_height,
														
 
															-                    "avatar_url": head_url,
														
 
															-                    "profile_id": profile_id,
														
 
															-                    "profile_mid": profile_mid,
														
 
															-                    "cover_url": cover_url,
														
 
															-                    "video_url": video_url,
														
 
															-                    "session": f"xiaoniangao-hour-{int(time.time())}"
														
 
															-                }
														
 
															-                for k, v in video_dict.items():
														
 
															-                    Common.logger(log_type, crawler).info(f"{k}:{v}")
														
 
															-
														
 
															-                # 过滤无效视频
														
 
															-                if video_title == 0 or video_id == 0 or video_duration == 0 \
														
 
															-                        or video_send_time == 0 or user_name == 0 or head_url == 0 \
														
 
															-                        or cover_url == 0 or video_url == 0:
														
 
															-                    Common.logger(log_type, crawler).warning("无效视频\n")
														
 
															-                # 抓取基础规则过滤
														
 
															-                elif cls.download_rule(video_dict) is False:
														
 
															-                    Common.logger(log_type, crawler).info("不满足基础门槛规则\n")
														
 
															-                elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
														
 
															-                    Common.logger(log_type, crawler).info('视频已下载\n')
														
 
															-                elif any(str(word) if str(word) in video_dict['video_title'] else False for word in
														
 
															-                         get_config_from_mysql(log_type=log_type,
														
 
															-                                               source=crawler,
														
 
															-                                               env=env,
														
 
															-                                               text="filter",
														
 
															-                                               action="")) is True:
														
 
															-                    Common.logger(log_type, crawler).info("视频已中过滤词\n")
														
 
															-                    time.sleep(1)
														
 
															-                else:
														
 
															-                    # 写入飞书小时级feeds数据库表
														
 
															-                    insert_sql = f""" insert into crawler_xiaoniangao_hour(profile_id,
														
 
															-                    profile_mid,
														
 
															-                    platform,
														
 
															-                    out_video_id,
														
 
															-                    video_title,
														
 
															-                    user_name,
														
 
															-                    cover_url,
														
 
															-                    video_url,
														
 
															-                    duration,
														
 
															-                    publish_time,
														
 
															-                    play_cnt,
														
 
															-                    crawler_time_stamp,
														
 
															-                    crawler_time)
														
 
															-                    values({profile_id},
														
 
															-                    {profile_mid},
														
 
															-                    "{cls.platform}",
														
 
															-                    "{video_id}",
														
 
															-                    "{video_title}",
														
 
															-                    "{user_name}",
														
 
															-                    "{cover_url}",
														
 
															-                    "{video_url}",
														
 
															-                    {video_duration},
														
 
															-                    "{publish_time_str}",
														
 
															-                    {video_play_cnt},
														
 
															-                    {int(time.time())},
														
 
															-                    "{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time())))}"
														
 
															-                    )"""
														
 
															-                    Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
														
 
															-                    MysqlHelper.update_values(log_type, crawler, insert_sql, env)
														
 
															-                    Common.logger(log_type, crawler).info('视频信息插入数据库成功！\n')
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_video_info(cls, log_type, crawler, p_id, p_mid, v_title, v_id):
														
 
															-        # try:
														
 
															-        uid_token_dict = cls.uid_token_dict
														
 
															-        url = "https://kapi.xiaoniangao.cn/profile/get_profile_by_id"
														
 
															-        headers = {
														
 
															-            # "x-b3-traceid": cls.hour_x_b3_traceid,
														
 
															-            "x-b3-traceid": '1c403a4aa72e3c',
														
 
															-            # "X-Token-Id": cls.hour_x_token_id,
														
 
															-            "X-Token-Id": 'ab619e96d801f1567388629260aa68ec-1202200806',
														
 
															-            "uid": uid_token_dict['uid'],
														
 
															-            "content-type": "application/json",
														
 
															-            "Accept-Encoding": "gzip,compress,br,deflate",
														
 
															-            "User-Agent": 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)'
														
 
															-                          ' AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 '
														
 
															-                          'MicroMessenger/8.0.20(0x18001432) NetType/WIFI Language/zh_CN',
														
 
															-            # "Referer": cls.hour_referer
														
 
															-            "Referer": 'https://servicewechat.com/wxd7911e4c177690e4/624/page-frame.html'
														
 
															-        }
														
 
															-        data = {
														
 
															-            "play_src": "1",
														
 
															-            "profile_id": int(p_id),
														
 
															-            "profile_mid": int(p_mid),
														
 
															-            "qs": "imageMogr2/gravity/center/rotate/$/thumbnail/"
														
 
															-                  "!400x400r/crop/400x400/interlace/1/format/jpg",
														
 
															-            "h_qs": "imageMogr2/gravity/center/rotate/$/thumbnail"
														
 
															-                    "/!80x80r/crop/80x80/interlace/1/format/jpg",
														
 
															-            "share_width": 625,
														
 
															-            "share_height": 500,
														
 
															-            "no_comments": True,
														
 
															-            "no_follow": True,
														
 
															-            "vid": v_id,
														
 
															-            "hot_l1_comment": True,
														
 
															-            # "token": cls.hour_token,
														
 
															-            "token": uid_token_dict['token'],
														
 
															-            # "uid": cls.hour_uid,
														
 
															-            "uid": uid_token_dict['uid'],
														
 
															-            "proj": "ma",
														
 
															-            "wx_ver": "8.0.20",
														
 
															-            "code_ver": "3.62.0",
														
 
															-            "log_common_params": {
														
 
															-                "e": [{
														
 
															-                    "data": {
														
 
															-                        "page": "dynamicSharePage"
														
 
															-                    }
														
 
															-                }],
														
 
															-                "ext": {
														
 
															-                    "brand": "iPhone",
														
 
															-                    "device": "iPhone 11",
														
 
															-                    "os": "iOS 14.7.1",
														
 
															-                    "weixinver": "8.0.20",
														
 
															-                    "srcver": "2.24.3",
														
 
															-                    "net": "wifi",
														
 
															-                    "scene": "1089"
														
 
															-                },
														
 
															-                "pj": "1",
														
 
															-                "pf": "2",
														
 
															-                "session_id": "7bcce313-b57d-4305-8d14-6ebd9a1bad29"
														
 
															-            }
														
 
															-        }
														
 
															-        urllib3.disable_warnings()
														
 
															-        r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False)
														
 
															-        if r.status_code != 200 or 'data' not in r.text:
														
 
															-            Common.logger(log_type, crawler).warning(f"get_videoInfo:{r.text}\n")
														
 
															-        else:
														
 
															-            hour_play_cnt = r.json()["data"]["play_pv"]
														
 
															-            hour_cover_url = r.json()["data"]["url"]
														
 
															-            hour_video_url = r.json()["data"]["v_url"]
														
 
															-            hour_video_duration = r.json()["data"]["du"]
														
 
															-            hour_video_comment_cnt = r.json()["data"]["comment_count"]
														
 
															-            hour_video_like_cnt = r.json()["data"]["favor"]["total"]
														
 
															-            hour_video_share_cnt = r.json()["data"]["share"]
														
 
															-            hour_video_width = r.json()["data"]["w"]
														
 
															-            hour_video_height = r.json()["data"]["h"]
														
 
															-            hour_video_send_time = r.json()["data"]["t"]
														
 
															-            publish_time_stamp = int(int(hour_video_send_time) / 1000)
														
 
															-            publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
														
 
															-            hour_user_name = r.json()["data"]["user"]["nick"]
														
 
															-            hour_head_url = r.json()["data"]["user"]["hurl"]
														
 
															-            video_info_dict = {
														
 
															-                "video_id": v_id,
														
 
															-                "video_title": v_title,
														
 
															-                "duration": hour_video_duration,
														
 
															-                "play_cnt": hour_play_cnt,
														
 
															-                "like_cnt": hour_video_like_cnt,
														
 
															-                "comment_cnt": hour_video_comment_cnt,
														
 
															-                "share_cnt": hour_video_share_cnt,
														
 
															-                "user_name": hour_user_name,
														
 
															-                "publish_time_stamp": publish_time_stamp,
														
 
															-                "publish_time_str": publish_time_str,
														
 
															-                "video_width": hour_video_width,
														
 
															-                "video_height": hour_video_height,
														
 
															-                "avatar_url": hour_head_url,
														
 
															-                "profile_id": p_id,
														
 
															-                "profile_mid": p_mid,
														
 
															-                "cover_url": hour_cover_url,
														
 
															-                "video_url": hour_video_url,
														
 
															-                "session": f"xiaoniangao-hour-{int(time.time())}"
														
 
															-            }
														
 
															-            return video_info_dict
														
 
															-
														
 
															-    # 更新小时榜数据
														
 
															-    @classmethod
														
 
															-    def update_videoList(cls, log_type, crawler, strategy, oss_endpoint, env):
														
 
															-        """
														
 
															-        更新小时榜数据
														
 
															-        """
														
 
															-        # try:
														
 
															-        befor_yesterday = (datetime.date.today() + datetime.timedelta(days=-3)).strftime("%Y-%m-%d %H:%M:%S")
														
 
															-        update_time_stamp = int(time.mktime(time.strptime(befor_yesterday, "%Y-%m-%d %H:%M:%S")))
														
 
															-        select_sql = f""" select * from crawler_xiaoniangao_hour where crawler_time_stamp >= {update_time_stamp} GROUP BY out_video_id """
														
 
															-        update_video_list = MysqlHelper.get_values(log_type, crawler, select_sql, env)
														
 
															-        if len(update_video_list) == 0:
														
 
															-            Common.logger(log_type, crawler).info("暂无需要更新的小时榜数据\n")
														
 
															-            return
														
 
															-        for update_video_info in update_video_list:
														
 
															-            profile_id = update_video_info["profile_id"]
														
 
															-            profile_mid = update_video_info["profile_mid"]
														
 
															-            video_title = update_video_info["video_title"]
														
 
															-            video_id = update_video_info["out_video_id"]
														
 
															-            if datetime.datetime.now().hour == 10 and datetime.datetime.now().minute <= 10:
														
 
															-                video_info_dict = cls.get_video_info(log_type=log_type,
														
 
															-                                                     crawler=crawler,
														
 
															-                                                     p_id=profile_id,
														
 
															-                                                     p_mid=profile_mid,
														
 
															-                                                     v_title=video_title,
														
 
															-                                                     v_id=video_id)
														
 
															-                ten_play_cnt = video_info_dict['play_cnt']
														
 
															-                Common.logger(log_type, crawler).info(f"ten_play_cnt:{ten_play_cnt}")
														
 
															-                update_sql = f""" update crawler_xiaoniangao_hour set ten_play_cnt={ten_play_cnt} WHERE out_video_id="{video_id}"; """
														
 
															-                # Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
														
 
															-                MysqlHelper.update_values(log_type, crawler, update_sql, env)
														
 
															-                cls.download_publish(log_type, crawler, video_info_dict, update_video_info, strategy, oss_endpoint,
														
 
															-                                     env)
														
 
															-            elif datetime.datetime.now().hour == 15 and datetime.datetime.now().minute <= 10:
														
 
															-                video_info_dict = cls.get_video_info(log_type=log_type,
														
 
															-                                                     crawler=crawler,
														
 
															-                                                     p_id=profile_id,
														
 
															-                                                     p_mid=profile_mid,
														
 
															-                                                     v_title=video_title,
														
 
															-                                                     v_id=video_id)
														
 
															-                fifteen_play_cnt = video_info_dict['play_cnt']
														
 
															-                Common.logger(log_type, crawler).info(f"fifteen_play_cnt:{fifteen_play_cnt}")
														
 
															-                update_sql = f""" update crawler_xiaoniangao_hour set fifteen_play_cnt={fifteen_play_cnt} WHERE out_video_id="{video_id}"; """
														
 
															-                # Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
														
 
															-                MysqlHelper.update_values(log_type, crawler, update_sql, env)
														
 
															-                cls.download_publish(log_type, crawler, video_info_dict, update_video_info, strategy, oss_endpoint,
														
 
															-                                     env)
														
 
															-            elif datetime.datetime.now().hour == 20 and datetime.datetime.now().minute <= 10:
														
 
															-                video_info_dict = cls.get_video_info(log_type=log_type,
														
 
															-                                                     crawler=crawler,
														
 
															-                                                     p_id=profile_id,
														
 
															-                                                     p_mid=profile_mid,
														
 
															-                                                     v_title=video_title,
														
 
															-                                                     v_id=video_id)
														
 
															-                twenty_play_cnt = video_info_dict['play_cnt']
														
 
															-                Common.logger(log_type, crawler).info(f"twenty_play_cnt:{twenty_play_cnt}")
														
 
															-                update_sql = f""" update crawler_xiaoniangao_hour set twenty_play_cnt={twenty_play_cnt} WHERE out_video_id="{video_id}"; """
														
 
															-                # Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
														
 
															-                MysqlHelper.update_values(log_type, crawler, update_sql, env)
														
 
															-                cls.download_publish(log_type, crawler, video_info_dict, update_video_info, strategy, oss_endpoint,
														
 
															-                                     env)
														
 
															-            else:
														
 
															-                pass
														
 
															-
														
 
															-    @classmethod
														
 
															-    def download(cls, log_type, crawler, video_info_dict, strategy, oss_endpoint, env):
														
 
															-        # 下载封面
														
 
															-        Common.download_method(log_type=log_type, crawler=crawler, text="cover", title=video_info_dict["video_title"],
														
 
															-                               url=video_info_dict["cover_url"])
														
 
															-        # 下载视频
														
 
															-        Common.download_method(log_type=log_type, crawler=crawler, text="video", title=video_info_dict["video_title"],
														
 
															-                               url=video_info_dict["video_url"])
														
 
															-        # 保存视频信息至 "./videos/{download_video_title}/info.txt"
														
 
															-        Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_info_dict)
														
 
															-
														
 
															-        # 上传视频
														
 
															-        Common.logger(log_type, crawler).info("开始上传视频...")
														
 
															-        our_video_id = Publish.upload_and_publish(log_type=log_type,
														
 
															-                                                  crawler=crawler,
														
 
															-                                                  strategy=strategy,
														
 
															-                                                  our_uid="hour",
														
 
															-                                                  env=env,
														
 
															-                                                  oss_endpoint=oss_endpoint)
														
 
															-        if env == "dev":
														
 
															-            our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
														
 
															-        else:
														
 
															-            our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
														
 
															-        Common.logger(log_type, crawler).info("视频上传完成")
														
 
															-
														
 
															-        if our_video_id is None:
														
 
															-            # 删除视频文件夹
														
 
															-            shutil.rmtree(f"./{crawler}/videos/{video_info_dict['video_title']}")
														
 
															-            return
														
 
															-
														
 
															-        # 视频信息保存数据库
														
 
															-        rule_dict = {
														
 
															-            "duration": {"min": 40},
														
 
															-            "play_cnt": {"min": 4000},
														
 
															-            "publish_day": {"min": 10}
														
 
															-        }
														
 
															-
														
 
															-        insert_sql = f""" insert into crawler_video(video_id,
														
 
															-                                                        out_user_id,
														
 
															-                                                        platform,
														
 
															-                                                        strategy,
														
 
															-                                                        out_video_id,
														
 
															-                                                        video_title,
														
 
															-                                                        cover_url,
														
 
															-                                                        video_url,
														
 
															-                                                        duration,
														
 
															-                                                        publish_time,
														
 
															-                                                        play_cnt,
														
 
															-                                                        crawler_rule,
														
 
															-                                                        width,
														
 
															-                                                        height)
														
 
															-                                                        values({our_video_id},
														
 
															-                                                        "{video_info_dict['profile_id']}",
														
 
															-                                                        "{cls.platform}",
														
 
															-                                                        "小时榜爬虫策略",
														
 
															-                                                        "{video_info_dict['video_id']}",
														
 
															-                                                        "{video_info_dict['video_title']}",
														
 
															-                                                        "{video_info_dict['cover_url']}",
														
 
															-                                                        "{video_info_dict['video_url']}",
														
 
															-                                                        {int(video_info_dict['duration'])},
														
 
															-                                                        "{video_info_dict['publish_time_str']}",
														
 
															-                                                        {int(video_info_dict['play_cnt'])},
														
 
															-                                                        '{json.dumps(rule_dict)}',
														
 
															-                                                        {int(video_info_dict['video_width'])},
														
 
															-                                                        {int(video_info_dict['video_height'])}) """
														
 
															-        Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
														
 
															-        MysqlHelper.update_values(log_type, crawler, insert_sql, env)
														
 
															-        Common.logger(log_type, crawler).info('视频信息插入数据库成功！')
														
 
															-
														
 
															-        # 视频写入飞书
														
 
															-        Feishu.insert_columns(log_type, crawler, "yatRv2", "ROWS", 1, 2)
														
 
															-        # 视频ID工作表，首行写入数据
														
 
															-        upload_time = int(time.time())
														
 
															-        values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
														
 
															-                   "小时级上升榜",
														
 
															-                   str(video_info_dict['video_id']),
														
 
															-                   str(video_info_dict['video_title']),
														
 
															-                   our_video_link,
														
 
															-                   video_info_dict['play_cnt'],
														
 
															-                   video_info_dict['comment_cnt'],
														
 
															-                   video_info_dict['like_cnt'],
														
 
															-                   video_info_dict['share_cnt'],
														
 
															-                   video_info_dict['duration'],
														
 
															-                   f"{video_info_dict['video_width']}*{video_info_dict['video_height']}",
														
 
															-                   str(video_info_dict['publish_time_str'].replace("-", "/")),
														
 
															-                   str(video_info_dict['user_name']),
														
 
															-                   str(video_info_dict['profile_id']),
														
 
															-                   str(video_info_dict['profile_mid']),
														
 
															-                   str(video_info_dict['avatar_url']),
														
 
															-                   str(video_info_dict['cover_url']),
														
 
															-                   str(video_info_dict['video_url'])]]
														
 
															-        time.sleep(1)
														
 
															-        Feishu.update_values(log_type, crawler, "yatRv2", "F2:Z2", values)
														
 
															-        Common.logger(log_type, crawler).info('视频信息写入飞书成功\n')
														
 
															-
														
 
															-    # 下载/上传
														
 
															-    @classmethod
														
 
															-    def download_publish(cls, log_type, crawler, video_info_dict, update_video_info, strategy, oss_endpoint, env):
														
 
															-        # try:
														
 
															-        if cls.repeat_video(log_type, crawler, video_info_dict["video_id"], env) != 0:
														
 
															-            Common.logger(log_type, crawler).info('视频已下载\n')
														
 
															-        # 播放量大于 50000，直接下载
														
 
															-        elif int(video_info_dict["play_cnt"]) >= 30000:
														
 
															-            Common.logger(log_type, crawler).info(
														
 
															-                f"播放量:{video_info_dict['play_cnt']} >= 30000，满足下载规则，开始下载视频")
														
 
															-            cls.download(log_type, crawler, video_info_dict, strategy, oss_endpoint, env)
														
 
															-
														
 
															-        # 上升榜判断逻辑，任意时间段上升量>=5000，连续两个时间段上升量>=2000
														
 
															-        elif int(update_video_info['ten_play_cnt']) >= 3000 or int(
														
 
															-                update_video_info['fifteen_play_cnt']) >= 3000 or int(update_video_info['twenty_play_cnt']) >= 3000:
														
 
															-            Common.logger(log_type, crawler).info(
														
 
															-                f"10:00 or 15:00 or 20:00 数据上升量:{int(update_video_info['ten_play_cnt'])} or {int(update_video_info['fifteen_play_cnt'])} or {int(update_video_info['twenty_play_cnt'])} >= 3000")
														
 
															-            Common.logger(log_type, crawler).info("满足下载规则，开始下载视频")
														
 
															-            cls.download(log_type, crawler, video_info_dict, strategy, oss_endpoint, env)
														
 
															-
														
 
															-        elif int(update_video_info['ten_play_cnt']) >= 1000 and int(update_video_info['fifteen_play_cnt']) >= 1000:
														
 
															-            Common.logger(log_type, crawler).info(
														
 
															-                f"10:00 and 15:00 数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['fifteen_play_cnt'])} >= 1000")
														
 
															-            Common.logger(log_type, crawler).info("满足下载规则，开始下载视频")
														
 
															-            cls.download(log_type, crawler, video_info_dict, strategy, oss_endpoint, env)
														
 
															-
														
 
															-        elif int(update_video_info['fifteen_play_cnt']) >= 1000 and int(update_video_info['twenty_play_cnt']) >= 1000:
														
 
															-            Common.logger(log_type, crawler).info(
														
 
															-                f"15:00 and 20:00 数据上升量:{int(update_video_info['fifteen_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 1000")
														
 
															-            Common.logger(log_type, crawler).info("满足下载规则，开始下载视频")
														
 
															-            cls.download(log_type, crawler, video_info_dict, strategy, oss_endpoint, env)
														
 
															-
														
 
															-        elif int(update_video_info['ten_play_cnt']) >= 1000 and int(update_video_info['twenty_play_cnt']) >= 1000:
														
 
															-            Common.logger(log_type, crawler).info(
														
 
															-                f"今日10:00 / 20:00数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 1000")
														
 
															-            Common.logger(log_type, crawler).info("满足下载规则，开始下载视频")
														
 
															-            cls.download(log_type, crawler, video_info_dict, strategy, oss_endpoint, env)
														
 
															-
														
 
															-        else:
														
 
															-            Common.logger(log_type, crawler).info("上升量不满足下载规则")
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    # print(XiaoniangaoHour.get_expression())
														
 
															-    # print(XiaoniangaoHour.get_uid_token())
														
 
															-    # XiaoniangaoHour.get_videoList("test", "xiaoniangao", "dev")
														
 
															-    # XiaoniangaoHour.update_videoList("test", "xiaoniangao", "小时榜爬虫策略", "out", "dev")
														
 
															-    # befor_yesterday = (datetime.date.today() + datetime.timedelta(days=-3)).strftime("%Y-%m-%d %H:%M:%S")
														
 
															-    # update_time_stamp = int(time.mktime(time.strptime(befor_yesterday, "%Y-%m-%d %H:%M:%S")))
														
 
															-    # print(update_time_stamp)
														
 
															-    # print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))))
														
 
															-    print(XiaoniangaoHour.uid_token_dict)
														
 
															-    pass
														
--- a/xiaoniangao/xiaoniangao_main/run_xiaoniangao_author_scheduling.py
+++ b/xiaoniangao/xiaoniangao_main/run_xiaoniangao_author_scheduling.py
@@ -1,43 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/4/20
														
 
															-import argparse
														
 
															-import os
														
 
															-import sys
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.public import task_fun
														
 
															-from common.common import Common
														
 
															-from common.scheduling_db import MysqlHelper
														
 
															-from xiaoniangao.xiaoniangao_author.xiaoniangao_author_scheduling import XiaoniangaoAuthorScheduling
														
 
															-
														
 
															-
														
 
															-def main(log_type, crawler, task, env):
														
 
															-    task_dict = task_fun(task)['task_dict']
														
 
															-    rule_dict = task_fun(task)['rule_dict']
														
 
															-    task_id = task_dict['task_id']
														
 
															-    select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
														
 
															-    user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
														
 
															-    Common.logger(log_type, crawler).info(f"调度任务:\n{task_dict}")
														
 
															-    Common.logger(log_type, crawler).info(f"抓取规则:\n{rule_dict}")
														
 
															-    Common.logger(log_type, crawler).info(f"用户列表:\n{user_list}")
														
 
															-    Common.logger(log_type, crawler).info('开始抓取 小年糕 定向榜\n')
														
 
															-    XiaoniangaoAuthorScheduling.get_author_videos(log_type=log_type,
														
 
															-                                                  crawler=crawler,
														
 
															-                                                  user_list=user_list,
														
 
															-                                                  rule_dict=rule_dict,
														
 
															-                                                  env=env)
														
 
															-    Common.del_logs(log_type, crawler)
														
 
															-    Common.logger(log_type, crawler).info('抓取完一轮\n')
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
														
 
															-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
														
 
															-    parser.add_argument('--crawler')  ## 添加参数
														
 
															-    parser.add_argument('--task')  ## 添加参数
														
 
															-    parser.add_argument('--env')  ## 添加参数
														
 
															-    args = parser.parse_args()  ### 参数赋值，也可以通过终端赋值
														
 
															-    main(log_type=args.log_type,
														
 
															-         crawler=args.crawler,
														
 
															-         task=args.task,
														
 
															-         env=args.env)
														
--- a/xiaoniangao/xiaoniangao_main/run_xiaoniangao_follow.py
+++ b/xiaoniangao/xiaoniangao_main/run_xiaoniangao_follow.py
@@ -1,35 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/3/13
														
 
															-import argparse
														
 
															-import os
														
 
															-import sys
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.common import Common
														
 
															-from xiaoniangao.xiaoniangao_follow.xiaoniangao_follow import XiaoniangaoFollow
														
 
															-
														
 
															-
														
 
															-def main(log_type, crawler, env):
														
 
															-    if env == "dev":
														
 
															-        oss_endpoint = "out"
														
 
															-    else:
														
 
															-        oss_endpoint = "inner"
														
 
															-    Common.logger(log_type, crawler).info('开始抓取 小年糕 定向榜\n')
														
 
															-    XiaoniangaoFollow.get_follow_videos(log_type=log_type,
														
 
															-                                        crawler=crawler,
														
 
															-                                        strategy="定向爬虫策略",
														
 
															-                                        oss_endpoint=oss_endpoint,
														
 
															-                                        env=env)
														
 
															-    Common.del_logs(log_type, crawler)
														
 
															-    Common.logger(log_type, crawler).info('抓取完一轮\n')
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
														
 
															-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
														
 
															-    parser.add_argument('--crawler')  ## 添加参数
														
 
															-    parser.add_argument('--env')  ## 添加参数
														
 
															-    args = parser.parse_args()  ### 参数赋值，也可以通过终端赋值
														
 
															-    main(log_type=args.log_type,
														
 
															-         crawler=args.crawler,
														
 
															-         env=args.env)
														
--- a/xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour.py
+++ b/xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour.py
@@ -1,55 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/3/15
														
 
															-import argparse
														
 
															-import datetime
														
 
															-import os
														
 
															-import sys
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.common import Common
														
 
															-from xiaoniangao.xiaoniangao_hour.xiaoniangao_hour import XiaoniangaoHour
														
 
															-
														
 
															-
														
 
															-def main(log_type, crawler, env):
														
 
															-    if env == "dev":
														
 
															-        oss_endpoint = "out"
														
 
															-    else:
														
 
															-        oss_endpoint = "inner"
														
 
															-    # 获取符合规则的视频，写入小时级数据_feeds
														
 
															-    XiaoniangaoHour.get_videoList(log_type, crawler, env)
														
 
															-    now = datetime.datetime.now()
														
 
															-    if now.hour == 10 and 0 <= now.minute <= 10:
														
 
															-        Common.logger(log_type, crawler).info("开始更新/下载上升榜")
														
 
															-        XiaoniangaoHour.update_videoList(log_type=log_type,
														
 
															-                                         crawler=crawler,
														
 
															-                                         strategy="小时榜爬虫策略",
														
 
															-                                         oss_endpoint=oss_endpoint,
														
 
															-                                         env=env)
														
 
															-
														
 
															-    elif now.hour == 15 and now.minute <= 10:
														
 
															-        Common.logger(log_type, crawler).info("开始更新/下载上升榜")
														
 
															-        XiaoniangaoHour.update_videoList(log_type=log_type,
														
 
															-                                         crawler=crawler,
														
 
															-                                         strategy="小时榜爬虫策略",
														
 
															-                                         oss_endpoint=oss_endpoint,
														
 
															-                                         env=env)
														
 
															-
														
 
															-    elif now.hour == 20 and now.minute <= 10:
														
 
															-        Common.logger(log_type, crawler).info("开始更新/下载上升榜")
														
 
															-        XiaoniangaoHour.update_videoList(log_type=log_type,
														
 
															-                                         crawler=crawler,
														
 
															-                                         strategy="小时榜爬虫策略",
														
 
															-                                         oss_endpoint=oss_endpoint,
														
 
															-                                         env=env)
														
 
															-    Common.del_logs(log_type, crawler)
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
														
 
															-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
														
 
															-    parser.add_argument('--crawler')  ## 添加参数
														
 
															-    parser.add_argument('--env')  ## 添加参数
														
 
															-    args = parser.parse_args()  ### 参数赋值，也可以通过终端赋值
														
 
															-    main(log_type=args.log_type,
														
 
															-         crawler=args.crawler,
														
 
															-         env=args.env)
														
--- a/xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour_scheduling.py
+++ b/xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour_scheduling.py
@@ -1,74 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/3/15
														
 
															-import argparse
														
 
															-import datetime
														
 
															-import os
														
 
															-import random
														
 
															-import sys
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.scheduling_db import MysqlHelper
														
 
															-from common.common import Common
														
 
															-from common.public import task_fun
														
 
															-from xiaoniangao.xiaoniangao_hour.xiaoniangao_hour_scheduling import XiaoniangaoHourScheduling
														
 
															-
														
 
															-
														
 
															-def main(log_type, crawler, task, env):
														
 
															-    task_dict = task_fun(task)['task_dict']
														
 
															-    rule_dict = task_fun(task)['rule_dict']
														
 
															-    task_id = task_dict['task_id']
														
 
															-    select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
														
 
															-    user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
														
 
															-    our_uid_list = []
														
 
															-    for user in user_list:
														
 
															-        our_uid_list.append(user["uid"])
														
 
															-    our_uid = random.choice(our_uid_list)
														
 
															-    Common.logger(log_type, crawler).info(f"调度任务:\n{task_dict}")
														
 
															-    Common.logger(log_type, crawler).info(f"抓取规则:\n{rule_dict}")
														
 
															-    Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["task_name"]}\n')
														
 
															-    # 获取符合规则的视频，写入小时级数据_feeds
														
 
															-    for i in range(1, 101):
														
 
															-        try:
														
 
															-            Common.logger(log_type, crawler).info(f"正在抓取第{i}页")
														
 
															-            XiaoniangaoHourScheduling.get_videoList(log_type, crawler, rule_dict, env)
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).info(f"抓取第{i}页时异常:{e}\n")
														
 
															-    now = datetime.datetime.now()
														
 
															-    if now.hour == 10 and 0 <= now.minute <= 10:
														
 
															-        Common.logger(log_type, crawler).info("开始更新/下载上升榜")
														
 
															-        XiaoniangaoHourScheduling.update_videoList(log_type=log_type,
														
 
															-                                                   crawler=crawler,
														
 
															-                                                   rule_dict=rule_dict,
														
 
															-                                                   our_uid=our_uid,
														
 
															-                                                   env=env)
														
 
															-
														
 
															-    elif now.hour == 15 and now.minute <= 10:
														
 
															-        Common.logger(log_type, crawler).info("开始更新/下载上升榜")
														
 
															-        XiaoniangaoHourScheduling.update_videoList(log_type=log_type,
														
 
															-                                                   crawler=crawler,
														
 
															-                                                   rule_dict=rule_dict,
														
 
															-                                                   our_uid=our_uid,
														
 
															-                                                   env=env)
														
 
															-
														
 
															-    elif now.hour == 20 and now.minute <= 10:
														
 
															-        Common.logger(log_type, crawler).info("开始更新/下载上升榜")
														
 
															-        XiaoniangaoHourScheduling.update_videoList(log_type=log_type,
														
 
															-                                                   crawler=crawler,
														
 
															-                                                   rule_dict=rule_dict,
														
 
															-                                                   our_uid=our_uid,
														
 
															-                                                   env=env)
														
 
															-    Common.del_logs(log_type, crawler)
														
 
															-    Common.logger(log_type, crawler).info("抓取完一轮\n")
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
														
 
															-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
														
 
															-    parser.add_argument('--crawler')  ## 添加参数
														
 
															-    parser.add_argument('--task')  ## 添加参数
														
 
															-    parser.add_argument('--env')  ## 添加参数
														
 
															-    args = parser.parse_args()  ### 参数赋值，也可以通过终端赋值
														
 
															-    main(log_type=args.log_type,
														
 
															-         crawler=args.crawler,
														
 
															-         task=args.task,
														
 
															-         env=args.env)
														
--- a/xiaoniangao/xiaoniangao_main/run_xiaoniangao_play.py
+++ b/xiaoniangao/xiaoniangao_main/run_xiaoniangao_play.py
@@ -1,36 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/3/16
														
 
															-import argparse
														
 
															-import os
														
 
															-import sys
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.common import Common
														
 
															-from xiaoniangao.xiaoniangao_play.xiaoniangao_play import XiaoniangaoPlay
														
 
															-
														
 
															-
														
 
															-class Main:
														
 
															-    @classmethod
														
 
															-    def main(cls, log_type, crawler, env):
														
 
															-        if env == "dev":
														
 
															-            oss_endpoint = "out"
														
 
															-        else:
														
 
															-            oss_endpoint = "inner"
														
 
															-        for i in range(100):
														
 
															-            Common.logger(log_type, crawler).info(f'正在抓取小年糕播放量榜,第{i+1}页\n')
														
 
															-            XiaoniangaoPlay.get_videoList(log_type=log_type,
														
 
															-                                          crawler=crawler,
														
 
															-                                          strategy="播放量榜爬虫策略",
														
 
															-                                          oss_endpoint=oss_endpoint,
														
 
															-                                          env=env)
														
 
															-        Common.del_logs(log_type, crawler)
														
 
															-        Common.logger(log_type, crawler).info('抓取完一轮\n')
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
														
 
															-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
														
 
															-    parser.add_argument('--crawler')  ## 添加参数
														
 
															-    parser.add_argument('--env')  ## 添加参数
														
 
															-    args = parser.parse_args()  ### 参数赋值，也可以通过终端赋值
														
 
															-    Main.main(log_type=args.log_type, crawler=args.crawler, env=args.env)
														
--- a/xiaoniangao/xiaoniangao_main/run_xiaoniangao_play_scheduling.py
+++ b/xiaoniangao/xiaoniangao_main/run_xiaoniangao_play_scheduling.py
@@ -1,48 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/4/21
														
 
															-import argparse
														
 
															-import os
														
 
															-import random
														
 
															-import sys
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.common import Common
														
 
															-from common.public import task_fun
														
 
															-from common.scheduling_db import MysqlHelper
														
 
															-from xiaoniangao.xiaoniangao_play.xiaoniangao_play_scheduling import XiaoniangaoplayScheduling
														
 
															-
														
 
															-
														
 
															-def main(log_type, crawler, task, env):
														
 
															-    task_dict = task_fun(task)['task_dict']
														
 
															-    rule_dict = task_fun(task)['rule_dict']
														
 
															-    task_id = task_dict['task_id']
														
 
															-    select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
														
 
															-    user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
														
 
															-    our_uid_list = []
														
 
															-    for user in user_list:
														
 
															-        our_uid_list.append(user["uid"])
														
 
															-    our_uid = random.choice(our_uid_list)
														
 
															-    Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
														
 
															-    Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
														
 
															-    # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
														
 
															-    Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["task_name"]}\n')
														
 
															-    XiaoniangaoplayScheduling.get_videoList(log_type=log_type,
														
 
															-                                            crawler=crawler,
														
 
															-                                            rule_dict=rule_dict,
														
 
															-                                            our_uid=our_uid,
														
 
															-                                            env=env)
														
 
															-    Common.del_logs(log_type, crawler)
														
 
															-    Common.logger(log_type, crawler).info('抓取任务结束\n')
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
														
 
															-    parser.add_argument('--log_type', default='recommend')  ## 添加参数,注明参数类型
														
 
															-    parser.add_argument('--crawler', default='kuaishou')  ## 添加参数
														
 
															-    parser.add_argument('--task')  ## 添加参数
														
 
															-    parser.add_argument('--env', default='prod')  ## 添加参数
														
 
															-    args = parser.parse_args()  ### 参数赋值，也可以通过终端赋值
														
 
															-    main(log_type=args.log_type,
														
 
															-         crawler=args.crawler,
														
 
															-         task=args.task,
														
 
															-         env=args.env)
														
--- a/xiaoniangao/xiaoniangao_play/xiaoniangao_play.py
+++ b/xiaoniangao/xiaoniangao_play/xiaoniangao_play.py
@@ -1,430 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/3/16
														
 
															-import json
														
 
															-import os
														
 
															-import random
														
 
															-import shutil
														
 
															-import sys
														
 
															-import time
														
 
															-import requests
														
 
															-import urllib3
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.common import Common
														
 
															-from common.feishu import Feishu
														
 
															-from common.publish import Publish
														
 
															-from common.public import get_config_from_mysql
														
 
															-from common.scheduling_db import MysqlHelper
														
 
															-proxies = {"http": None, "https": None}
														
 
															-
														
 
															-
														
 
															-class XiaoniangaoPlay:
														
 
															-    platform = "小年糕"
														
 
															-
														
 
															-    words = "abcdefghijklmnopqrstuvwxyz0123456789"
														
 
															-    uid = f"""{"".join(random.sample(words, 8))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 12))}"""
														
 
															-    token = "".join(random.sample(words, 32))
														
 
															-    uid_token_dict = {
														
 
															-        "uid": uid,
														
 
															-        "token": token
														
 
															-    }
														
 
															-
														
 
															-    # 生成 uid、token
														
 
															-    @classmethod
														
 
															-    def get_uid_token(cls):
														
 
															-        words = "abcdefghijklmnopqrstuvwxyz0123456789"
														
 
															-        uid = f"""{"".join(random.sample(words, 8))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 12))}"""
														
 
															-        token = "".join(random.sample(words, 32))
														
 
															-        uid_token_dict = {
														
 
															-            "uid": uid,
														
 
															-            "token": token
														
 
															-        }
														
 
															-        return uid_token_dict
														
 
															-
														
 
															-    # 基础门槛规则
														
 
															-    @classmethod
														
 
															-    def download_rule(cls, video_dict):
														
 
															-        """
														
 
															-        下载视频的基本规则
														
 
															-        :param video_dict: 视频信息，字典格式
														
 
															-        :return: 满足规则，返回 True；反之，返回 False
														
 
															-        """
														
 
															-        # 视频时长
														
 
															-        if int(float(video_dict['duration'])) >= 40:
														
 
															-            # 宽或高
														
 
															-            if int(video_dict['video_width']) >= 0 or int(video_dict['video_height']) >= 0:
														
 
															-                # 播放量
														
 
															-                if int(video_dict['play_cnt']) >= 20000:
														
 
															-                    # 点赞量
														
 
															-                    if int(video_dict['like_cnt']) >= 0:
														
 
															-                        # 分享量
														
 
															-                        if int(video_dict['share_cnt']) >= 0:
														
 
															-                            # 发布时间 <= 60 天
														
 
															-                            if int(time.time()) - int(video_dict['publish_time_stamp']) <= 3600 * 24 * 60:
														
 
															-                                return True
														
 
															-                            else:
														
 
															-                                return False
														
 
															-                        else:
														
 
															-                            return False
														
 
															-                    else:
														
 
															-                        return False
														
 
															-                else:
														
 
															-                    return False
														
 
															-            return False
														
 
															-        return False
														
 
															-
														
 
															-    # 获取表情及符号
														
 
															-    @classmethod
														
 
															-    def get_expression(cls):
														
 
															-        # 表情列表
														
 
															-        expression_list = ['📍', '⭕️', '🔥', '📣', '🎈', '⚡', '🔔', '🚩', '💢', '💎', '👉', '💓', '❗️', '🔴', '🔺', '♦️', '♥️', '👉', '👈', '🏆', '❤️\u200d🔥']
														
 
															-        # 符号列表
														
 
															-        char_list = ['...', '~~']
														
 
															-        return expression_list, char_list
														
 
															-
														
 
															-    # 获取列表
														
 
															-    @classmethod
														
 
															-    def get_videoList(cls, log_type, crawler, strategy, oss_endpoint, env):
														
 
															-        uid_token_dict = cls.uid_token_dict
														
 
															-        url = "https://kapi.xiaoniangao.cn/trends/get_recommend_trends"
														
 
															-        headers = {
														
 
															-            "x-b3-traceid": '1dc0a6d0929a2b',
														
 
															-            "X-Token-Id": 'ae99a4953804085ebb0ae36fa138031d-1146052582',
														
 
															-            "uid": uid_token_dict['uid'],
														
 
															-            "content-type": "application/json",
														
 
															-            "Accept-Encoding": "gzip,compress,br,deflate",
														
 
															-            "User-Agent": 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)'
														
 
															-                          ' AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 '
														
 
															-                          'MicroMessenger/8.0.20(0x18001432) NetType/WIFI Language/zh_CN',
														
 
															-            "Referer": 'https://servicewechat.com/wxd7911e4c177690e4/620/page-frame.html'
														
 
															-        }
														
 
															-        data = {
														
 
															-            "log_params": {
														
 
															-                "page": "discover_rec",
														
 
															-                "common": {
														
 
															-                    "brand": "iPhone",
														
 
															-                    "device": "iPhone 11",
														
 
															-                    "os": "iOS 14.7.1",
														
 
															-                    "weixinver": "8.0.20",
														
 
															-                    "srcver": "2.24.2",
														
 
															-                    "net": "wifi",
														
 
															-                    "scene": 1089
														
 
															-                }
														
 
															-            },
														
 
															-            "qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!750x500r/crop/750x500/interlace/1/format/jpg",
														
 
															-            "h_qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!80x80r/crop/80x80/interlace/1/format/jpg",
														
 
															-            "share_width": 625,
														
 
															-            "share_height": 500,
														
 
															-            "ext": {
														
 
															-                "fmid": 0,
														
 
															-                "items": {}
														
 
															-            },
														
 
															-            "app": "xng",
														
 
															-            "rec_scene": "discover_rec",
														
 
															-            "log_common_params": {
														
 
															-                "e": [{
														
 
															-                    "data": {
														
 
															-                        "page": "discoverIndexPage",
														
 
															-                        "topic": "recommend"
														
 
															-                    },
														
 
															-                    "ab": {}
														
 
															-                }],
														
 
															-                "ext": {
														
 
															-                    "brand": "iPhone",
														
 
															-                    "device": "iPhone 11",
														
 
															-                    "os": "iOS 14.7.1",
														
 
															-                    "weixinver": "8.0.20",
														
 
															-                    "srcver": "2.24.3",
														
 
															-                    "net": "wifi",
														
 
															-                    "scene": "1089"
														
 
															-                },
														
 
															-                "pj": "1",
														
 
															-                "pf": "2",
														
 
															-                "session_id": "7bcce313-b57d-4305-8d14-6ebd9a1bad29"
														
 
															-            },
														
 
															-            "refresh": False,
														
 
															-            "token": uid_token_dict['token'],
														
 
															-            "uid": uid_token_dict['uid'],
														
 
															-            "proj": "ma",
														
 
															-            "wx_ver": "8.0.20",
														
 
															-            "code_ver": "3.62.0"
														
 
															-        }
														
 
															-        urllib3.disable_warnings()
														
 
															-        r = requests.post(url=url, headers=headers, json=data, proxies=proxies, verify=False)
														
 
															-        if "data" not in r.text or r.status_code != 200:
														
 
															-            Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}")
														
 
															-            return
														
 
															-        elif "data" not in r.json():
														
 
															-            Common.logger(log_type, crawler).info(f"get_videoList:{r.json()}")
														
 
															-            return
														
 
															-        elif "list" not in r.json()["data"]:
														
 
															-            Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']}")
														
 
															-            return
														
 
															-        elif len(r.json()["data"]["list"]) == 0:
														
 
															-            Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']['list']}")
														
 
															-            return
														
 
															-        else:
														
 
															-            # 视频列表数据
														
 
															-            feeds = r.json()["data"]["list"]
														
 
															-            for i in range(len(feeds)):
														
 
															-                # 标题，表情随机加在片头、片尾，或替代句子中间的标点符号
														
 
															-                if "title" in feeds[i]:
														
 
															-                    befor_video_title = feeds[i]["title"].strip().replace("\n", "") \
														
 
															-                        .replace("/", "").replace("\r", "").replace("#", "") \
														
 
															-                        .replace(".", "。").replace("\\", "").replace("&NBSP", "") \
														
 
															-                        .replace(":", "").replace("*", "").replace("？", "") \
														
 
															-                        .replace("?", "").replace('"', "").replace("<", "") \
														
 
															-                        .replace(">", "").replace("|", "").replace(" ", "").replace("#表情", "").replace("#符号", "").replace('"' ,'').replace("'", '')
														
 
															-
														
 
															-                    expression = cls.get_expression()
														
 
															-                    expression_list = expression[0]
														
 
															-                    char_list = expression[1]
														
 
															-                    # 随机取一个表情
														
 
															-                    expression = random.choice(expression_list)
														
 
															-                    # 生成标题list[表情+title, title+表情]
														
 
															-                    expression_title_list = [expression + befor_video_title, befor_video_title + expression]
														
 
															-                    # 从标题list中随机取一个标题
														
 
															-                    title_list1 = random.choice(expression_title_list)
														
 
															-                    # 生成标题:原标题+符号
														
 
															-                    title_list2 = befor_video_title + random.choice(char_list)
														
 
															-                    # 表情和标题组合，与标题和符号组合，汇总成待使用的标题列表
														
 
															-                    title_list4 = [title_list2, title_list1]
														
 
															-                    # 最终标题
														
 
															-                    video_title = random.choice(title_list4)
														
 
															-                else:
														
 
															-                    video_title = 0
														
 
															-
														
 
															-                # 视频 ID
														
 
															-                if "vid" in feeds[i]:
														
 
															-                    video_id = feeds[i]["vid"]
														
 
															-                else:
														
 
															-                    video_id = 0
														
 
															-
														
 
															-                # 播放量
														
 
															-                if "play_pv" in feeds[i]:
														
 
															-                    video_play_cnt = feeds[i]["play_pv"]
														
 
															-                else:
														
 
															-                    video_play_cnt = 0
														
 
															-
														
 
															-                # 评论量
														
 
															-                if "comment_count" in feeds[i]:
														
 
															-                    video_comment_cnt = feeds[i]["comment_count"]
														
 
															-                else:
														
 
															-                    video_comment_cnt = 0
														
 
															-
														
 
															-                # 点赞量
														
 
															-                if "favor" in feeds[i]:
														
 
															-                    video_like_cnt = feeds[i]["favor"]["total"]
														
 
															-                else:
														
 
															-                    video_like_cnt = 0
														
 
															-
														
 
															-                # 分享量
														
 
															-                if "share" in feeds[i]:
														
 
															-                    video_share_cnt = feeds[i]["share"]
														
 
															-                else:
														
 
															-                    video_share_cnt = 0
														
 
															-
														
 
															-                # 时长
														
 
															-                if "du" in feeds[i]:
														
 
															-                    video_duration = int(feeds[i]["du"] / 1000)
														
 
															-                else:
														
 
															-                    video_duration = 0
														
 
															-
														
 
															-                # 宽和高
														
 
															-                if "w" or "h" in feeds[i]:
														
 
															-                    video_width = feeds[i]["w"]
														
 
															-                    video_height = feeds[i]["h"]
														
 
															-                else:
														
 
															-                    video_width = 0
														
 
															-                    video_height = 0
														
 
															-
														
 
															-                # 发布时间
														
 
															-                if "t" in feeds[i]:
														
 
															-                    video_send_time = feeds[i]["t"]
														
 
															-                else:
														
 
															-                    video_send_time = 0
														
 
															-                publish_time_stamp = int(int(video_send_time)/1000)
														
 
															-                publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
														
 
															-
														
 
															-                # 用户名 / 头像
														
 
															-                if "user" in feeds[i]:
														
 
															-                    user_name = feeds[i]["user"]["nick"].strip().replace("\n", "") \
														
 
															-                        .replace("/", "").replace("快手", "").replace(" ", "") \
														
 
															-                        .replace(" ", "").replace("&NBSP", "").replace("\r", "")
														
 
															-                    head_url = feeds[i]["user"]["hurl"]
														
 
															-                else:
														
 
															-                    user_name = 0
														
 
															-                    head_url = 0
														
 
															-
														
 
															-                # 用户 ID
														
 
															-                profile_id = feeds[i]["id"]
														
 
															-
														
 
															-                # 用户 mid
														
 
															-                profile_mid = feeds[i]["user"]["mid"]
														
 
															-
														
 
															-                # 视频封面
														
 
															-                if "url" in feeds[i]:
														
 
															-                    cover_url = feeds[i]["url"]
														
 
															-                else:
														
 
															-                    cover_url = 0
														
 
															-
														
 
															-                # 视频播放地址
														
 
															-                if "v_url" in feeds[i]:
														
 
															-                    video_url = feeds[i]["v_url"]
														
 
															-                else:
														
 
															-                    video_url = 0
														
 
															-
														
 
															-                video_dict = {
														
 
															-                    "video_title": video_title,
														
 
															-                    "video_id": video_id,
														
 
															-                    "duration": video_duration,
														
 
															-                    "play_cnt": video_play_cnt,
														
 
															-                    "like_cnt": video_like_cnt,
														
 
															-                    "comment_cnt": video_comment_cnt,
														
 
															-                    "share_cnt": video_share_cnt,
														
 
															-                    "user_name": user_name,
														
 
															-                    "publish_time_stamp": publish_time_stamp,
														
 
															-                    "publish_time_str": publish_time_str,
														
 
															-                    "video_width": video_width,
														
 
															-                    "video_height": video_height,
														
 
															-                    "avatar_url": head_url,
														
 
															-                    "profile_id": profile_id,
														
 
															-                    "profile_mid": profile_mid,
														
 
															-                    "cover_url": cover_url,
														
 
															-                    "video_url": video_url,
														
 
															-                    "session": f"xiaoniangao-play-{int(time.time())}"
														
 
															-
														
 
															-                }
														
 
															-                for k, v in video_dict.items():
														
 
															-                    Common.logger(log_type, crawler).info(f"{k}:{v}")
														
 
															-
														
 
															-                cls.download_publish(log_type=log_type,
														
 
															-                                     crawler=crawler,
														
 
															-                                     video_dict=video_dict,
														
 
															-                                     strategy=strategy,
														
 
															-                                     oss_endpoint=oss_endpoint,
														
 
															-                                     env=env)
														
 
															-
														
 
															-    @classmethod
														
 
															-    def repeat_video(cls, log_type, crawler, video_id, env):
														
 
															-        sql = f""" select * from crawler_video where platform="小年糕" and out_video_id="{video_id}"; """
														
 
															-        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
														
 
															-        return len(repeat_video)
														
 
															-
														
 
															-    @classmethod
														
 
															-    def download_publish(cls, log_type, crawler, video_dict, strategy, oss_endpoint, env):
														
 
															-        # 过滤无效视频
														
 
															-        if video_dict["video_id"] == 0 \
														
 
															-                or video_dict["video_url"] == 0\
														
 
															-                or video_dict["cover_url"] == 0:
														
 
															-            Common.logger(log_type, crawler).warning("无效视频\n")
														
 
															-        # 抓取规则
														
 
															-        elif cls.download_rule(video_dict) is False:
														
 
															-            Common.logger(log_type, crawler).info("不满足抓取规则\n")
														
 
															-        # 去重
														
 
															-        elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
														
 
															-            Common.logger(log_type, crawler).info("视频已下载\n")
														
 
															-        elif any(str(word) if str(word) in video_dict['video_title'] else False for word in
														
 
															-                 get_config_from_mysql(log_type=log_type,
														
 
															-                                       source=crawler,
														
 
															-                                       env=env,
														
 
															-                                       text="filter",
														
 
															-                                       action="")) is True:
														
 
															-            Common.logger(log_type, crawler).info("视频已中过滤词\n")
														
 
															-        else:
														
 
															-            # 下载封面
														
 
															-            Common.download_method(log_type=log_type, crawler=crawler, text="cover", title=video_dict["video_title"], url=video_dict["cover_url"])
														
 
															-            # 下载视频
														
 
															-            Common.download_method(log_type=log_type, crawler=crawler, text="video", title=video_dict["video_title"], url=video_dict["video_url"])
														
 
															-            # 保存视频信息至 "./videos/{download_video_title}/info.txt"
														
 
															-            Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
														
 
															-
														
 
															-            # 上传视频
														
 
															-            Common.logger(log_type, crawler).info("开始上传视频...")
														
 
															-            our_video_id = Publish.upload_and_publish(log_type=log_type,
														
 
															-                                                      crawler=crawler,
														
 
															-                                                      strategy=strategy,
														
 
															-                                                      our_uid="play",
														
 
															-                                                      env=env,
														
 
															-                                                      oss_endpoint=oss_endpoint)
														
 
															-            if env == "dev":
														
 
															-                our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
														
 
															-            else:
														
 
															-                our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
														
 
															-            Common.logger(log_type, crawler).info("视频上传完成")
														
 
															-
														
 
															-            if our_video_id is None:
														
 
															-                # 删除视频文件夹
														
 
															-                shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
														
 
															-                return
														
 
															-
														
 
															-            # 视频信息保存数据库
														
 
															-            rule_dict = {
														
 
															-                "duration": {"min": 40},
														
 
															-                "play_cnt": {"min": 80000},
														
 
															-                "min_publish_day": {"min": 60}
														
 
															-            }
														
 
															-
														
 
															-            insert_sql = f""" insert into crawler_video(video_id,
														
 
															-                                                        out_user_id,
														
 
															-                                                        platform,
														
 
															-                                                        strategy,
														
 
															-                                                        out_video_id,
														
 
															-                                                        video_title,
														
 
															-                                                        cover_url,
														
 
															-                                                        video_url,
														
 
															-                                                        duration,
														
 
															-                                                        publish_time,
														
 
															-                                                        play_cnt,
														
 
															-                                                        crawler_rule,
														
 
															-                                                        width,
														
 
															-                                                        height)
														
 
															-                                                        values({our_video_id},
														
 
															-                                                        "{video_dict['profile_id']}",
														
 
															-                                                        "{cls.platform}",
														
 
															-                                                        "播放量榜爬虫策略",
														
 
															-                                                        "{video_dict['video_id']}",
														
 
															-                                                        "{video_dict['video_title']}",
														
 
															-                                                        "{video_dict['cover_url']}",
														
 
															-                                                        "{video_dict['video_url']}",
														
 
															-                                                        {int(video_dict['duration'])},
														
 
															-                                                        "{video_dict['publish_time_str']}",
														
 
															-                                                        {int(video_dict['play_cnt'])},
														
 
															-                                                        '{json.dumps(rule_dict)}',
														
 
															-                                                        {int(video_dict['video_width'])},
														
 
															-                                                        {int(video_dict['video_height'])}) """
														
 
															-            Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
														
 
															-            MysqlHelper.update_values(log_type, crawler, insert_sql, env)
														
 
															-            Common.logger(log_type, crawler).info('视频信息插入数据库成功！')
														
 
															-
														
 
															-            # 视频写入飞书
														
 
															-            Feishu.insert_columns(log_type, crawler, "c85k1C", "ROWS", 1, 2)
														
 
															-            # 视频ID工作表，首行写入数据
														
 
															-            upload_time = int(time.time())
														
 
															-            values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
														
 
															-                       "播放量榜爬虫策略",
														
 
															-                       str(video_dict['video_id']),
														
 
															-                       str(video_dict['video_title']),
														
 
															-                       our_video_link,
														
 
															-                       video_dict['play_cnt'],
														
 
															-                       video_dict['comment_cnt'],
														
 
															-                       video_dict['like_cnt'],
														
 
															-                       video_dict['share_cnt'],
														
 
															-                       video_dict['duration'],
														
 
															-                       f"{video_dict['video_width']}*{video_dict['video_height']}",
														
 
															-                       str(video_dict['publish_time_str']),
														
 
															-                       str(video_dict['user_name']),
														
 
															-                       str(video_dict['profile_id']),
														
 
															-                       str(video_dict['profile_mid']),
														
 
															-                       str(video_dict['avatar_url']),
														
 
															-                       str(video_dict['cover_url']),
														
 
															-                       str(video_dict['video_url'])]]
														
 
															-            time.sleep(1)
														
 
															-            Feishu.update_values(log_type, crawler, "c85k1C", "F2:Z2", values)
														
 
															-            Common.logger(log_type, crawler).info('视频信息写入飞书成功\n')
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    XiaoniangaoPlay.get_videoList("play", "xiaoniangao", "播放量榜爬虫策略", "out", "dev")
														
 
															-
														
 
															-    pass
														
--- a/xigua/xigua_follow/__init__.py
+++ b/xigua/xigua_follow/__init__.py
@@ -1,3 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/2/17
														
--- a/xigua/xigua_follow/xigua_follow.py
+++ b/xigua/xigua_follow/xigua_follow.py
@@ -1,1039 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/2/17
														
 
															-import base64
														
 
															-import json
														
 
															-import os
														
 
															-import random
														
 
															-import shutil
														
 
															-import string
														
 
															-import sys
														
 
															-import time
														
 
															-from hashlib import md5
														
 
															-
														
 
															-import requests
														
 
															-import urllib3
														
 
															-from requests.adapters import HTTPAdapter
														
 
															-
														
 
															-# from selenium.webdriver import DesiredCapabilities
														
 
															-# from selenium.webdriver.chrome.service import Service
														
 
															-# from selenium.webdriver.common.by import By
														
 
															-# from selenium import webdriver
														
 
															-from lxml import etree
														
 
															-
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.db import MysqlHelper
														
 
															-from common.getuser import getUser
														
 
															-from common.common import Common
														
 
															-from common.feishu import Feishu
														
 
															-from common.publish import Publish
														
 
															-from common.public import get_user_from_mysql, random_title, get_config_from_mysql
														
 
															-
														
 
															-
														
 
															-class Follow:
														
 
															-    # 个人主页视频翻页参数
														
 
															-    offset = 0
														
 
															-
														
 
															-    platform = "西瓜视频"
														
 
															-    tag = "西瓜视频爬虫,定向爬虫策略"
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_rule(cls, log_type, crawler):
														
 
															-        try:
														
 
															-            while True:
														
 
															-                rule_sheet = Feishu.get_values_batch(log_type, crawler, "4kxd31")
														
 
															-                if rule_sheet is None:
														
 
															-                    Common.logger(log_type, crawler).warning("rule_sheet is None! 10秒后重新获取")
														
 
															-                    time.sleep(10)
														
 
															-                    continue
														
 
															-                rule_dict = {
														
 
															-                    "play_cnt": int(rule_sheet[1][2]),
														
 
															-                    "comment_cnt": int(rule_sheet[2][2]),
														
 
															-                    "like_cnt": int(rule_sheet[3][2]),
														
 
															-                    "duration": int(rule_sheet[4][2]),
														
 
															-                    "publish_time": int(rule_sheet[5][2]),
														
 
															-                    "video_width": int(rule_sheet[6][2]),
														
 
															-                    "video_height": int(rule_sheet[7][2]),
														
 
															-                }
														
 
															-                return rule_dict
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f"get_rule:{e}\n")
														
 
															-
														
 
															-    # 下载规则
														
 
															-    @classmethod
														
 
															-    def download_rule(cls, video_info_dict, rule_dict):
														
 
															-        if video_info_dict['play_cnt'] >= rule_dict['play_cnt']:
														
 
															-            if video_info_dict['comment_cnt'] >= rule_dict['comment_cnt']:
														
 
															-                if video_info_dict['like_cnt'] >= rule_dict['like_cnt']:
														
 
															-                    if video_info_dict['duration'] >= rule_dict['duration']:
														
 
															-                        if video_info_dict['video_width'] >= rule_dict['video_width'] \
														
 
															-                                or video_info_dict['video_height'] >= rule_dict['video_height']:
														
 
															-                            return True
														
 
															-                        else:
														
 
															-                            return False
														
 
															-                    else:
														
 
															-                        return False
														
 
															-                else:
														
 
															-                    return False
														
 
															-            else:
														
 
															-                return False
														
 
															-        else:
														
 
															-            return False
														
 
															-
														
 
															-    # 过滤词库
														
 
															-    @classmethod
														
 
															-    def filter_words(cls, log_type, crawler):
														
 
															-        try:
														
 
															-            while True:
														
 
															-                filter_words_sheet = Feishu.get_values_batch(log_type, crawler, 'KGB4Hc')
														
 
															-                if filter_words_sheet is None:
														
 
															-                    Common.logger(log_type, crawler).warning(f"filter_words_sheet:{filter_words_sheet} 10秒钟后重试")
														
 
															-                    continue
														
 
															-                filter_words_list = []
														
 
															-                for x in filter_words_sheet:
														
 
															-                    for y in x:
														
 
															-                        if y is None:
														
 
															-                            pass
														
 
															-                        else:
														
 
															-                            filter_words_list.append(y)
														
 
															-                return filter_words_list
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_out_user_info(cls, log_type, crawler, out_uid):
														
 
															-        try:
														
 
															-            headers = {
														
 
															-                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41',
														
 
															-                'referer': f'https://www.ixigua.com/home/{out_uid}',
														
 
															-                'Cookie': f'ixigua-a-s=1; support_webp=true; support_avif=false; csrf_session_id=a5355d954d3c63ed1ba35faada452b4d; __ac_signature={cls.random_signature()}; MONITOR_WEB_ID=67cb5099-a022-4ec3-bb8e-c4de6ba51dd0; s_v_web_id=verify_lef4i99x_32SosrdH_Qrtk_4LJn_8S7q_fhu16xe3s8ZV; tt_scid=QLJjPuHf6wxVqu6IIq6gHiJXQpVrCwrdhjH2zpm7-E3ZniE1RXBcP6M8b41FJOdo41e1; ttwid=1%7CHHtv2QqpSGuSu8r-zXF1QoWsvjmNi1SJrqOrZzg-UCY%7C1677047013%7C5866a444e5ae10a9df8c11551db75010fb77b657f214ccf84e503fae8d313d09; msToken=PerXJcDdIsZ6zXkGITsftXX4mDaVaW21GuqtzSVdctH46oXXT2GcELIs9f0XW2hunRzP6KVHLZaYElRvNYflLKUXih7lC27XKxs3HjdZiXPK9NQaoKbLfA==; ixigua-a-s=1', }
														
 
															-            url = f"https://www.ixigua.com/home/{out_uid}"
														
 
															-            urllib3.disable_warnings()
														
 
															-            s = requests.session()
														
 
															-            # max_retries=3 重试3次
														
 
															-            s.mount('http://', HTTPAdapter(max_retries=3))
														
 
															-            s.mount('https://', HTTPAdapter(max_retries=3))
														
 
															-            response = s.get(url=url, headers=headers, proxies=Common.tunnel_proxies(), verify=False, timeout=5).text
														
 
															-            html = etree.HTML(response)
														
 
															-            out_follow_str = html.xpath('//div[@class="userDetailV3__header__detail2"]/*[1]/span')[0].text.encode(
														
 
															-                'raw_unicode_escape').decode()
														
 
															-            out_fans_str = html.xpath('//div[@class="userDetailV3__header__detail2"]/*[2]/span')[0].text.encode(
														
 
															-                'raw_unicode_escape').decode()
														
 
															-            out_like_str = html.xpath('//div[@class="userDetailV3__header__detail2"]/*[3]/span')[0].text.encode(
														
 
															-                'raw_unicode_escape').decode()
														
 
															-            out_avatar_url = f"""https:{html.xpath('//span[@class="component-avatar__inner"]//img/@src')[0]}"""
														
 
															-            if "万" in out_follow_str:
														
 
															-                out_follow = int(float(out_follow_str.split("万")[0]) * 10000)
														
 
															-            else:
														
 
															-                out_follow = int(out_follow_str.replace(",", ""))
														
 
															-            if "万" in out_fans_str:
														
 
															-                out_fans = int(float(out_fans_str.split("万")[0]) * 10000)
														
 
															-            else:
														
 
															-                out_fans = int(out_fans_str.replace(",", ""))
														
 
															-            if "万" in out_like_str:
														
 
															-                out_like = int(float(out_like_str.split("万")[0]) * 10000)
														
 
															-            else:
														
 
															-                out_like = int(out_like_str.replace(",", ""))
														
 
															-            out_user_dict = {
														
 
															-                "out_follow": out_follow,
														
 
															-                "out_fans": out_fans,
														
 
															-                "out_like": out_like,
														
 
															-                "out_avatar_url": out_avatar_url,
														
 
															-            }
														
 
															-            # for k, v in out_user_dict.items():
														
 
															-            #     print(f"{k}:{v}")
														
 
															-            return out_user_dict
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f"get_out_user_info:{e}\n")
														
 
															-
														
 
															-    # 获取用户信息（字典格式）. 注意：部分 user_id 字符类型是 int / str
														
 
															-    @classmethod
														
 
															-    def get_user_list(cls, log_type, crawler, sheetid, env, machine):
														
 
															-        try:
														
 
															-            while True:
														
 
															-                user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
														
 
															-                if user_sheet is None:
														
 
															-                    Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet} 10秒钟后重试")
														
 
															-                    continue
														
 
															-                our_user_list = []
														
 
															-                for i in range(1, len(user_sheet)):
														
 
															-                    # for i in range(428, len(user_sheet)):
														
 
															-                    out_uid = user_sheet[i][2]
														
 
															-                    user_name = user_sheet[i][3]
														
 
															-                    our_uid = user_sheet[i][6]
														
 
															-                    our_user_link = user_sheet[i][7]
														
 
															-                    if out_uid is None or user_name is None:
														
 
															-                        Common.logger(log_type, crawler).info("空行\n")
														
 
															-                    else:
														
 
															-                        Common.logger(log_type, crawler).info(f"正在更新 {user_name} 用户信息\n")
														
 
															-                        if our_uid is None:
														
 
															-                            try:
														
 
															-                                out_user_info = cls.get_out_user_info(log_type, crawler, out_uid)
														
 
															-                            except Exception as e:
														
 
															-                                continue
														
 
															-                            out_user_dict = {
														
 
															-                                "out_uid": out_uid,
														
 
															-                                "user_name": user_name,
														
 
															-                                "out_avatar_url": out_user_info["out_avatar_url"],
														
 
															-                                "out_create_time": '',
														
 
															-                                "out_tag": '',
														
 
															-                                "out_play_cnt": 0,
														
 
															-                                "out_fans": out_user_info["out_fans"],
														
 
															-                                "out_follow": out_user_info["out_follow"],
														
 
															-                                "out_friend": 0,
														
 
															-                                "out_like": out_user_info["out_like"],
														
 
															-                                "platform": cls.platform,
														
 
															-                                "tag": cls.tag,
														
 
															-                            }
														
 
															-                            our_user_dict = getUser.create_user(log_type=log_type, crawler=crawler,
														
 
															-                                                                out_user_dict=out_user_dict, env=env, machine=machine)
														
 
															-                            our_uid = our_user_dict['our_uid']
														
 
															-                            our_user_link = our_user_dict['our_user_link']
														
 
															-                            Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}',
														
 
															-                                                 [[our_uid, our_user_link]])
														
 
															-                            Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功！\n')
														
 
															-                            our_user_list.append(our_user_dict)
														
 
															-                        else:
														
 
															-                            our_user_dict = {
														
 
															-                                'out_uid': out_uid,
														
 
															-                                'user_name': user_name,
														
 
															-                                'our_uid': our_uid,
														
 
															-                                'our_user_link': our_user_link,
														
 
															-                            }
														
 
															-                            our_user_list.append(our_user_dict)
														
 
															-                return our_user_list
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f'get_user_id_from_feishu异常:{e}\n')
														
 
															-
														
 
															-    @classmethod
														
 
															-    def random_signature(cls):
														
 
															-        src_digits = string.digits  # string_数字
														
 
															-        src_uppercase = string.ascii_uppercase  # string_大写字母
														
 
															-        src_lowercase = string.ascii_lowercase  # string_小写字母
														
 
															-        digits_num = random.randint(1, 6)
														
 
															-        uppercase_num = random.randint(1, 26 - digits_num - 1)
														
 
															-        lowercase_num = 26 - (digits_num + uppercase_num)
														
 
															-        password = random.sample(src_digits, digits_num) + random.sample(src_uppercase, uppercase_num) + random.sample(
														
 
															-            src_lowercase, lowercase_num)
														
 
															-        random.shuffle(password)
														
 
															-        new_password = 'AAAAAAAAAA' + ''.join(password)[10:-4] + 'AAAB'
														
 
															-        new_password_start = new_password[0:18]
														
 
															-        new_password_end = new_password[-7:]
														
 
															-        if new_password[18] == '8':
														
 
															-            new_password = new_password_start + 'w' + new_password_end
														
 
															-        elif new_password[18] == '9':
														
 
															-            new_password = new_password_start + 'x' + new_password_end
														
 
															-        elif new_password[18] == '-':
														
 
															-            new_password = new_password_start + 'y' + new_password_end
														
 
															-        elif new_password[18] == '.':
														
 
															-            new_password = new_password_start + 'z' + new_password_end
														
 
															-        else:
														
 
															-            new_password = new_password_start + 'y' + new_password_end
														
 
															-        return new_password
														
 
															-
														
 
															-    # @classmethod
														
 
															-    # def get_signature(cls, log_type, crawler, out_uid, machine):
														
 
															-    #     try:
														
 
															-    #         # 打印请求配置
														
 
															-    #         ca = DesiredCapabilities.CHROME
														
 
															-    #         ca["goog:loggingPrefs"] = {"performance": "ALL"}
														
 
															-    #
														
 
															-    #         # 不打开浏览器运行
														
 
															-    #         chrome_options = webdriver.ChromeOptions()
														
 
															-    #         chrome_options.add_argument("--headless")
														
 
															-    #         chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
														
 
															-    #         chrome_options.add_argument("--no-sandbox")
														
 
															-    #
														
 
															-    #         # driver初始化
														
 
															-    #         if machine == 'aliyun' or machine == 'aliyun_hk':
														
 
															-    #             driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
														
 
															-    #         elif machine == 'macpro':
														
 
															-    #             driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
														
 
															-    #                                       service=Service('/Users/lieyunye/Downloads/chromedriver_v86/chromedriver'))
														
 
															-    #         elif machine == 'macair':
														
 
															-    #             driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
														
 
															-    #                                       service=Service('/Users/piaoquan/Downloads/chromedriver'))
														
 
															-    #         else:
														
 
															-    #             driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service('/Users/wangkun/Downloads/chromedriver/chromedriver_v110/chromedriver'))
														
 
															-    #         driver.implicitly_wait(10)
														
 
															-    #         driver.get(f'https://www.ixigua.com/home/{out_uid}/')
														
 
															-    #         time.sleep(3)
														
 
															-    #         data_src = driver.find_elements(By.XPATH, '//img[@class="tt-img BU-MagicImage tt-img-loaded"]')[1].get_attribute("data-src")
														
 
															-    #         signature = data_src.split("x-signature=")[-1]
														
 
															-    #         return signature
														
 
															-    #     except Exception as e:
														
 
															-    #         Common.logger(log_type, crawler).error(f'get_signature异常:{e}\n')
														
 
															-
														
 
															-    # 获取视频详情
														
 
															-    @classmethod
														
 
															-    def get_video_url(cls, log_type, crawler, gid):
														
 
															-        try:
														
 
															-            url = 'https://www.ixigua.com/api/mixVideo/information?'
														
 
															-            headers = {
														
 
															-                "accept-encoding": "gzip, deflate",
														
 
															-                "accept-language": "zh-CN,zh-Hans;q=0.9",
														
 
															-                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
														
 
															-                              "AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15",
														
 
															-                "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
														
 
															-            }
														
 
															-            params = {
														
 
															-                'mixId': gid,
														
 
															-                'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC'
														
 
															-                           'NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
														
 
															-                'X-Bogus': 'DFSzswVupYTANCJOSBk0P53WxM-r',
														
 
															-                '_signature': '_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px'
														
 
															-                              'fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94',
														
 
															-            }
														
 
															-            cookies = {
														
 
															-                'ixigua-a-s': '1',
														
 
															-                'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB'
														
 
															-                           'NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
														
 
															-                'ttwid': '1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7'
														
 
															-                         '6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8',
														
 
															-                'tt_scid': 'QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3',
														
 
															-                'MONITOR_WEB_ID': '0a49204a-7af5-4e96-95f0-f4bafb7450ad',
														
 
															-                '__ac_nonce': '06304878000964fdad287',
														
 
															-                '__ac_signature': '_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb'
														
 
															-                                  'FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8',
														
 
															-                'ttcid': 'e56fabf6e85d4adf9e4d91902496a0e882',
														
 
															-                '_tea_utm_cache_1300': 'undefined',
														
 
															-                'support_avif': 'false',
														
 
															-                'support_webp': 'false',
														
 
															-                'xiguavideopcwebid': '7134967546256016900',
														
 
															-                'xiguavideopcwebid.sig': 'xxRww5R1VEMJN_dQepHorEu_eAc',
														
 
															-            }
														
 
															-            urllib3.disable_warnings()
														
 
															-            s = requests.session()
														
 
															-            # max_retries=3 重试3次
														
 
															-            s.mount('http://', HTTPAdapter(max_retries=3))
														
 
															-            s.mount('https://', HTTPAdapter(max_retries=3))
														
 
															-            response = s.get(url=url, headers=headers, params=params, cookies=cookies, verify=False,
														
 
															-                             proxies=Common.tunnel_proxies(), timeout=5)
														
 
															-            response.close()
														
 
															-            if 'data' not in response.json() or response.json()['data'] == '':
														
 
															-                Common.logger(log_type, crawler).warning('get_video_info: response: {}', response)
														
 
															-            else:
														
 
															-                video_info = response.json()['data']['gidInformation']['packerData']['video']
														
 
															-                video_url_dict = {}
														
 
															-                # video_url
														
 
															-                if 'videoResource' not in video_info:
														
 
															-                    video_url_dict["video_url"] = ''
														
 
															-                    video_url_dict["audio_url"] = ''
														
 
															-                    video_url_dict["video_width"] = 0
														
 
															-                    video_url_dict["video_height"] = 0
														
 
															-
														
 
															-                elif 'dash_120fps' in video_info['videoResource']:
														
 
															-                    if "video_list" in video_info['videoResource']['dash_120fps'] and 'video_4' in \
														
 
															-                            video_info['videoResource']['dash_120fps']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_4']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_4']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_3' in \
														
 
															-                            video_info['videoResource']['dash_120fps']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_3']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_3']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_2' in \
														
 
															-                            video_info['videoResource']['dash_120fps']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_2']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_2']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_1' in \
														
 
															-                            video_info['videoResource']['dash_120fps']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_1']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_1']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-
														
 
															-                    elif 'dynamic_video' in video_info['videoResource']['dash_120fps'] \
														
 
															-                            and 'dynamic_video_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
														
 
															-                            and 'dynamic_audio_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
														
 
															-                            and len(
														
 
															-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list']) != 0 \
														
 
															-                            and len(
														
 
															-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list']) != 0:
														
 
															-
														
 
															-                        video_url = \
														
 
															-                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                                'backup_url_1']
														
 
															-                        audio_url = \
														
 
															-                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list'][-1][
														
 
															-                                'backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = \
														
 
															-                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                                'vwidth']
														
 
															-                        video_height = \
														
 
															-                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                                'vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    else:
														
 
															-                        video_url_dict["video_url"] = ''
														
 
															-                        video_url_dict["audio_url"] = ''
														
 
															-                        video_url_dict["video_width"] = 0
														
 
															-                        video_url_dict["video_height"] = 0
														
 
															-
														
 
															-                elif 'dash' in video_info['videoResource']:
														
 
															-                    if "video_list" in video_info['videoResource']['dash'] and 'video_4' in \
														
 
															-                            video_info['videoResource']['dash']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash']['video_list']['video_4']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash']['video_list']['video_4']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['dash'] and 'video_3' in \
														
 
															-                            video_info['videoResource']['dash']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash']['video_list']['video_3']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash']['video_list']['video_3']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['dash'] and 'video_2' in \
														
 
															-                            video_info['videoResource']['dash']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash']['video_list']['video_2']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash']['video_list']['video_2']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['dash'] and 'video_1' in \
														
 
															-                            video_info['videoResource']['dash']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash']['video_list']['video_1']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash']['video_list']['video_1']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-
														
 
															-                    elif 'dynamic_video' in video_info['videoResource']['dash'] \
														
 
															-                            and 'dynamic_video_list' in video_info['videoResource']['dash']['dynamic_video'] \
														
 
															-                            and 'dynamic_audio_list' in video_info['videoResource']['dash']['dynamic_video'] \
														
 
															-                            and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list']) != 0 \
														
 
															-                            and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list']) != 0:
														
 
															-
														
 
															-                        video_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list'][-1][
														
 
															-                            'backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    else:
														
 
															-                        video_url_dict["video_url"] = ''
														
 
															-                        video_url_dict["audio_url"] = ''
														
 
															-                        video_url_dict["video_width"] = 0
														
 
															-                        video_url_dict["video_height"] = 0
														
 
															-
														
 
															-                elif 'normal' in video_info['videoResource']:
														
 
															-                    if "video_list" in video_info['videoResource']['normal'] and 'video_4' in \
														
 
															-                            video_info['videoResource']['normal']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['normal']['video_list']['video_4']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['normal']['video_list']['video_4']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['normal'] and 'video_3' in \
														
 
															-                            video_info['videoResource']['normal']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['normal']['video_list']['video_3']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['normal']['video_list']['video_3']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['normal'] and 'video_2' in \
														
 
															-                            video_info['videoResource']['normal']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['normal']['video_list']['video_2']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['normal']['video_list']['video_2']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['normal'] and 'video_1' in \
														
 
															-                            video_info['videoResource']['normal']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['normal']['video_list']['video_1']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['normal']['video_list']['video_1']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-
														
 
															-                    elif 'dynamic_video' in video_info['videoResource']['normal'] \
														
 
															-                            and 'dynamic_video_list' in video_info['videoResource']['normal']['dynamic_video'] \
														
 
															-                            and 'dynamic_audio_list' in video_info['videoResource']['normal']['dynamic_video'] \
														
 
															-                            and len(video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list']) != 0 \
														
 
															-                            and len(video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list']) != 0:
														
 
															-
														
 
															-                        video_url = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list'][-1][
														
 
															-                            'backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'vwidth']
														
 
															-                        video_height = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    else:
														
 
															-                        video_url_dict["video_url"] = ''
														
 
															-                        video_url_dict["audio_url"] = ''
														
 
															-                        video_url_dict["video_width"] = 0
														
 
															-                        video_url_dict["video_height"] = 0
														
 
															-
														
 
															-                else:
														
 
															-                    video_url_dict["video_url"] = ''
														
 
															-                    video_url_dict["audio_url"] = ''
														
 
															-                    video_url_dict["video_width"] = 0
														
 
															-                    video_url_dict["video_height"] = 0
														
 
															-
														
 
															-                return video_url_dict
														
 
															-
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f'get_video_url:{e}\n')
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_videolist(cls, log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine):
														
 
															-        try:
														
 
															-            signature = cls.random_signature()
														
 
															-            while True:
														
 
															-                url = "https://www.ixigua.com/api/videov2/author/new_video_list?"
														
 
															-                params = {
														
 
															-                    'to_user_id': str(out_uid),
														
 
															-                    'offset': str(cls.offset),
														
 
															-                    'limit': '30',
														
 
															-                    'maxBehotTime': '0',
														
 
															-                    'order': 'new',
														
 
															-                    'isHome': '0',
														
 
															-                    # 'msToken': 'G0eRzNkw189a8TLaXjc6nTHVMQwh9XcxVAqTbGKi7iPJdQcLwS3-XRrJ3MZ7QBfqErpxp3EX1WtvWOIcZ3NIgr41hgcd-v64so_RRj3YCRw1UsKW8mIssNLlIMspsg==',
														
 
															-                    # 'X-Bogus': 'DFSzswVuEkUANjW9ShFTgR/F6qHt',
														
 
															-                    '_signature': signature,
														
 
															-                }
														
 
															-                headers = {
														
 
															-                    'referer': f'https://www.ixigua.com/home/{out_uid}/video/?preActiveKey=hotsoon&list_entrance=userdetail',
														
 
															-                    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41',
														
 
															-                }
														
 
															-                urllib3.disable_warnings()
														
 
															-                s = requests.session()
														
 
															-                # max_retries=3 重试3次
														
 
															-                s.mount('http://', HTTPAdapter(max_retries=3))
														
 
															-                s.mount('https://', HTTPAdapter(max_retries=3))
														
 
															-                response = s.get(url=url, headers=headers, params=params, proxies=Common.tunnel_proxies(), verify=False,
														
 
															-                                 timeout=5)
														
 
															-                response.close()
														
 
															-                cls.offset += 30
														
 
															-                if response.status_code != 200:
														
 
															-                    Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
														
 
															-                    cls.offset = 0
														
 
															-                    return
														
 
															-                elif 'data' not in response.text:
														
 
															-                    Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
														
 
															-                    cls.offset = 0
														
 
															-                    return
														
 
															-                elif not response.json()["data"]['videoList']:
														
 
															-                    Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.json()}\n")
														
 
															-                    cls.offset = 0
														
 
															-                    return
														
 
															-                else:
														
 
															-                    videoList = response.json()['data']['videoList']
														
 
															-                    for i in range(len(videoList)):
														
 
															-                        # video_title
														
 
															-                        if 'title' not in videoList[i]:
														
 
															-                            video_title = 0
														
 
															-                        else:
														
 
															-                            video_title = videoList[i]['title'].strip().replace('手游', '') \
														
 
															-                                .replace('/', '').replace('\/', '').replace('\n', '').replace('"', '').replace("'", '')
														
 
															-
														
 
															-                        # video_id
														
 
															-                        if 'video_id' not in videoList[i]:
														
 
															-                            video_id = 0
														
 
															-                        else:
														
 
															-                            video_id = videoList[i]['video_id']
														
 
															-
														
 
															-                        # gid
														
 
															-                        if 'gid' not in videoList[i]:
														
 
															-                            gid = 0
														
 
															-                        else:
														
 
															-                            gid = videoList[i]['gid']
														
 
															-
														
 
															-                        # play_cnt
														
 
															-                        if 'video_detail_info' not in videoList[i]:
														
 
															-                            play_cnt = 0
														
 
															-                        elif 'video_watch_count' not in videoList[i]['video_detail_info']:
														
 
															-                            play_cnt = 0
														
 
															-                        else:
														
 
															-                            play_cnt = videoList[i]['video_detail_info']['video_watch_count']
														
 
															-
														
 
															-                        # comment_cnt
														
 
															-                        if 'comment_count' not in videoList[i]:
														
 
															-                            comment_cnt = 0
														
 
															-                        else:
														
 
															-                            comment_cnt = videoList[i]['comment_count']
														
 
															-
														
 
															-                        # like_cnt
														
 
															-                        if 'digg_count' not in videoList[i]:
														
 
															-                            like_cnt = 0
														
 
															-                        else:
														
 
															-                            like_cnt = videoList[i]['digg_count']
														
 
															-
														
 
															-                        # share_cnt
														
 
															-                        share_cnt = 0
														
 
															-
														
 
															-                        # video_duration
														
 
															-                        if 'video_duration' not in videoList[i]:
														
 
															-                            video_duration = 0
														
 
															-                        else:
														
 
															-                            video_duration = int(videoList[i]['video_duration'])
														
 
															-
														
 
															-                        # send_time
														
 
															-                        if 'publish_time' not in videoList[i]:
														
 
															-                            publish_time = 0
														
 
															-                        else:
														
 
															-                            publish_time = videoList[i]['publish_time']
														
 
															-
														
 
															-                        publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time))
														
 
															-
														
 
															-                        # is_top
														
 
															-                        if 'is_top' not in videoList[i]:
														
 
															-                            is_top = 0
														
 
															-                        else:
														
 
															-                            is_top = videoList[i]['is_top']
														
 
															-
														
 
															-                        # user_name
														
 
															-                        if 'user_info' not in videoList[i]:
														
 
															-                            user_name = 0
														
 
															-                        elif 'name' not in videoList[i]['user_info']:
														
 
															-                            user_name = 0
														
 
															-                        else:
														
 
															-                            user_name = videoList[i]['user_info']['name']
														
 
															-
														
 
															-                        # user_id
														
 
															-                        if 'user_info' not in videoList[i]:
														
 
															-                            user_id = 0
														
 
															-                        elif 'user_id' not in videoList[i]['user_info']:
														
 
															-                            user_id = 0
														
 
															-                        else:
														
 
															-                            user_id = videoList[i]['user_info']['user_id']
														
 
															-
														
 
															-                        # avatar_url
														
 
															-                        if 'user_info' not in videoList[i]:
														
 
															-                            avatar_url = 0
														
 
															-                        elif 'avatar_url' not in videoList[i]['user_info']:
														
 
															-                            avatar_url = 0
														
 
															-                        else:
														
 
															-                            avatar_url = videoList[i]['user_info']['avatar_url']
														
 
															-
														
 
															-                        # cover_url
														
 
															-                        if 'video_detail_info' not in videoList[i]:
														
 
															-                            cover_url = 0
														
 
															-                        elif 'detail_video_large_image' not in videoList[i]['video_detail_info']:
														
 
															-                            cover_url = 0
														
 
															-                        elif 'url' in videoList[i]['video_detail_info']['detail_video_large_image']:
														
 
															-                            cover_url = videoList[i]['video_detail_info']['detail_video_large_image']['url']
														
 
															-                        else:
														
 
															-                            cover_url = videoList[i]['video_detail_info']['detail_video_large_image']['url_list'][0][
														
 
															-                                'url']
														
 
															-
														
 
															-                        Common.logger(log_type, crawler).info(f'---开始读取规则---')
														
 
															-                        rule_dict = cls.get_rule(log_type, crawler)
														
 
															-                        Common.logger(log_type, crawler).info(f'---读取规则完成---')
														
 
															-
														
 
															-                        if gid == 0 or video_id == 0 or cover_url == 0:
														
 
															-                            Common.logger(log_type, crawler).info('无效视频\n')
														
 
															-                        elif is_top is True and int(time.time()) - int(publish_time) > 3600 * 24 * rule_dict[
														
 
															-                            'publish_time']:
														
 
															-                            Common.logger(log_type, crawler).info(
														
 
															-                                f'置顶视频，且发布时间:{publish_time_str} 超过{rule_dict["publish_time"]}天\n')
														
 
															-                        elif int(time.time()) - int(publish_time) > 3600 * 24 * rule_dict['publish_time']:
														
 
															-                            Common.logger(log_type, crawler).info(
														
 
															-                                f'发布时间:{publish_time_str}超过{rule_dict["publish_time"]}天\n')
														
 
															-                            cls.offset = 0
														
 
															-                            return
														
 
															-                        else:
														
 
															-                            video_url_dict = cls.get_video_url(log_type, crawler, gid)
														
 
															-                            video_url = video_url_dict["video_url"]
														
 
															-                            audio_url = video_url_dict["audio_url"]
														
 
															-                            video_width = video_url_dict["video_width"]
														
 
															-                            video_height = video_url_dict["video_height"]
														
 
															-
														
 
															-                            video_dict = {'video_title': video_title,
														
 
															-                                          'video_id': video_id,
														
 
															-                                          'gid': gid,
														
 
															-                                          'play_cnt': play_cnt,
														
 
															-                                          'comment_cnt': comment_cnt,
														
 
															-                                          'like_cnt': like_cnt,
														
 
															-                                          'share_cnt': share_cnt,
														
 
															-                                          'video_width': video_width,
														
 
															-                                          'video_height': video_height,
														
 
															-                                          'duration': video_duration,
														
 
															-                                          'publish_time_stamp': publish_time,
														
 
															-                                          'publish_time_str': publish_time_str,
														
 
															-                                          'is_top': is_top,
														
 
															-                                          'user_name': user_name,
														
 
															-                                          'user_id': user_id,
														
 
															-                                          'avatar_url': avatar_url,
														
 
															-                                          'cover_url': cover_url,
														
 
															-                                          'audio_url': audio_url,
														
 
															-                                          'video_url': video_url,
														
 
															-                                          'session': signature}
														
 
															-                            for k, v in video_dict.items():
														
 
															-                                Common.logger(log_type, crawler).info(f"{k}:{v}")
														
 
															-                            cls.download_publish(log_type=log_type,
														
 
															-                                                 crawler=crawler,
														
 
															-                                                 video_dict=video_dict,
														
 
															-                                                 rule_dict=rule_dict,
														
 
															-                                                 strategy=strategy,
														
 
															-                                                 our_uid=our_uid,
														
 
															-                                                 oss_endpoint=oss_endpoint,
														
 
															-                                                 env=env,
														
 
															-                                                 machine=machine)
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f"get_videolist:{e}\n")
														
 
															-
														
 
															-    @classmethod
														
 
															-    def repeat_video(cls, log_type, crawler, video_id, env, machine):
														
 
															-        sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{video_id}"; """
														
 
															-        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env, machine)
														
 
															-        return len(repeat_video)
														
 
															-
														
 
															-    # 下载 / 上传
														
 
															-    @classmethod
														
 
															-    def download_publish(cls, log_type, crawler, strategy, video_dict, rule_dict, our_uid, oss_endpoint, env, machine):
														
 
															-        filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
														
 
															-        for filter_word in filter_words:
														
 
															-            if filter_word in video_dict['video_title']:
														
 
															-                Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
														
 
															-                return
														
 
															-        if cls.download_rule(video_dict, rule_dict) is False:
														
 
															-            Common.logger(log_type, crawler).info('不满足抓取规则\n')
														
 
															-        elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
														
 
															-            Common.logger(log_type, crawler).info('视频已下载\n')
														
 
															-        else:
														
 
															-            # 下载视频
														
 
															-            Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video',
														
 
															-                                   title=video_dict['video_title'], url=video_dict['video_url'])
														
 
															-            # 下载音频
														
 
															-            Common.download_method(log_type=log_type, crawler=crawler, text='xigua_audio',
														
 
															-                                   title=video_dict['video_title'], url=video_dict['audio_url'])
														
 
															-            # 合成音视频
														
 
															-            Common.video_compose(log_type=log_type, crawler=crawler,
														
 
															-                                 video_dir=f"./{crawler}/videos/{video_dict['video_title']}")
														
 
															-            md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
														
 
															-            try:
														
 
															-                if os.path.getsize(f"./{crawler}/videos/{md_title}/video.mp4") == 0:
														
 
															-                    # 删除视频文件夹
														
 
															-                    shutil.rmtree(f"./{crawler}/videos/{md_title}")
														
 
															-                    Common.logger(log_type, crawler).info("视频size=0，删除成功\n")
														
 
															-                    return
														
 
															-            except FileNotFoundError:
														
 
															-                # 删除视频文件夹
														
 
															-                shutil.rmtree(f"./{crawler}/videos/{md_title}")
														
 
															-                Common.logger(log_type, crawler).info("视频文件不存在，删除文件夹成功\n")
														
 
															-                return
														
 
															-            # 下载封面
														
 
															-            Common.download_method(log_type=log_type, crawler=crawler, text='cover',
														
 
															-                                   title=video_dict['video_title'], url=video_dict['cover_url'])
														
 
															-            # 保存视频信息至txt
														
 
															-            Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
														
 
															-
														
 
															-            # 上传视频
														
 
															-            Common.logger(log_type, crawler).info("开始上传视频...")
														
 
															-            our_video_id = Publish.upload_and_publish(log_type=log_type,
														
 
															-                                                      crawler=crawler,
														
 
															-                                                      strategy=strategy,
														
 
															-                                                      our_uid=our_uid,
														
 
															-                                                      env=env,
														
 
															-                                                      oss_endpoint=oss_endpoint)
														
 
															-            if env == 'dev':
														
 
															-                our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
														
 
															-            else:
														
 
															-                our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
														
 
															-            Common.logger(log_type, crawler).info("视频上传完成")
														
 
															-
														
 
															-            if our_video_id is None:
														
 
															-                # 删除视频文件夹
														
 
															-                shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
														
 
															-                return
														
 
															-
														
 
															-            # 视频写入飞书
														
 
															-            Feishu.insert_columns(log_type, 'xigua', "e075e9", "ROWS", 1, 2)
														
 
															-            upload_time = int(time.time())
														
 
															-            values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
														
 
															-                       "定向榜",
														
 
															-                       video_dict['video_title'],
														
 
															-                       str(video_dict['video_id']),
														
 
															-                       our_video_link,
														
 
															-                       video_dict['gid'],
														
 
															-                       video_dict['play_cnt'],
														
 
															-                       video_dict['comment_cnt'],
														
 
															-                       video_dict['like_cnt'],
														
 
															-                       video_dict['share_cnt'],
														
 
															-                       video_dict['duration'],
														
 
															-                       str(video_dict['video_width']) + '*' + str(video_dict['video_height']),
														
 
															-                       video_dict['publish_time_str'],
														
 
															-                       video_dict['user_name'],
														
 
															-                       video_dict['user_id'],
														
 
															-                       video_dict['avatar_url'],
														
 
															-                       video_dict['cover_url'],
														
 
															-                       video_dict['video_url'],
														
 
															-                       video_dict['audio_url']]]
														
 
															-            time.sleep(1)
														
 
															-            Feishu.update_values(log_type, 'xigua', "e075e9", "F2:Z2", values)
														
 
															-            Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
														
 
															-
														
 
															-            # 视频信息保存数据库
														
 
															-            insert_sql = f""" insert into crawler_video(video_id,
														
 
															-                            user_id,
														
 
															-                            out_user_id,
														
 
															-                            platform,
														
 
															-                            strategy,
														
 
															-                            out_video_id,
														
 
															-                            video_title,
														
 
															-                            cover_url,
														
 
															-                            video_url,
														
 
															-                            duration,
														
 
															-                            publish_time,
														
 
															-                            play_cnt,
														
 
															-                            crawler_rule,
														
 
															-                            width,
														
 
															-                            height)
														
 
															-                            values({our_video_id},
														
 
															-                            {our_uid},
														
 
															-                            "{video_dict['user_id']}",
														
 
															-                            "{cls.platform}",
														
 
															-                            "定向爬虫策略",
														
 
															-                            "{video_dict['video_id']}",
														
 
															-                            "{video_dict['video_title']}",
														
 
															-                            "{video_dict['cover_url']}",
														
 
															-                            "{video_dict['video_url']}",
														
 
															-                            {int(video_dict['duration'])},
														
 
															-                            "{video_dict['publish_time_str']}",
														
 
															-                            {int(video_dict['play_cnt'])},
														
 
															-                            '{json.dumps(rule_dict)}',
														
 
															-                            {int(video_dict['video_width'])},
														
 
															-                            {int(video_dict['video_height'])}) """
														
 
															-            Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
														
 
															-            MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine)
														
 
															-            Common.logger(log_type, crawler).info('视频信息插入数据库成功！\n')
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_follow_videos(cls, log_type, crawler, strategy, oss_endpoint, env, machine):
														
 
															-        user_list = get_user_from_mysql(log_type, crawler, crawler, env)
														
 
															-        for user in user_list:
														
 
															-            try:
														
 
															-                spider_link = user["link"]
														
 
															-                out_uid = spider_link.split('/')[-1]
														
 
															-                user_name = user["nick_name"]
														
 
															-                our_uid = user["uid"]
														
 
															-
														
 
															-                Common.logger(log_type, crawler).info(f"开始抓取 {user_name} 用户主页视频\n")
														
 
															-                cls.get_videolist(log_type=log_type,
														
 
															-                                  crawler=crawler,
														
 
															-                                  strategy=strategy,
														
 
															-                                  our_uid=our_uid,
														
 
															-                                  out_uid=out_uid,
														
 
															-                                  oss_endpoint=oss_endpoint,
														
 
															-                                  env=env,
														
 
															-                                  machine=machine)
														
 
															-                cls.offset = 0
														
 
															-            except Exception as e:
														
 
															-                continue
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    Follow.get_follow_videos('follow', 'xigua', '定向抓取策略', 'inner', 'prod', 'aliyun')
														
--- a/xigua/xigua_follow/xigua_follow_scheduling.py
+++ b/xigua/xigua_follow/xigua_follow_scheduling.py
@@ -1,895 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/2/17
														
 
															-import base64
														
 
															-import json
														
 
															-import os
														
 
															-import random
														
 
															-import shutil
														
 
															-import string
														
 
															-import sys
														
 
															-import time
														
 
															-from datetime import date, timedelta
														
 
															-from hashlib import md5
														
 
															-
														
 
															-import requests
														
 
															-import urllib3
														
 
															-from requests.adapters import HTTPAdapter
														
 
															-
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.scheduling_db import MysqlHelper
														
 
															-from common.common import Common
														
 
															-from common.feishu import Feishu
														
 
															-from common.publish import Publish
														
 
															-from common.public import get_user_from_mysql, get_config_from_mysql, download_rule
														
 
															-
														
 
															-
														
 
															-class ScheduleXiguaFollow:
														
 
															-    # 个人主页视频翻页参数
														
 
															-    offset = 0
														
 
															-    platform = "西瓜视频"
														
 
															-
														
 
															-    @classmethod
														
 
															-    def download_rule(cls, video_info_dict, rule_dict):
														
 
															-        if video_info_dict['play_cnt'] >= rule_dict['play_cnt']['min']:
														
 
															-            if video_info_dict['comment_cnt'] >= rule_dict['comment_cnt']['min']:
														
 
															-                if video_info_dict['like_cnt'] >= rule_dict['like_cnt']['min']:
														
 
															-                    if video_info_dict['duration'] >= rule_dict['duration']['min']:
														
 
															-                        if video_info_dict['video_width'] >= rule_dict['width']['min'] \
														
 
															-                                or video_info_dict['video_height'] >= rule_dict['height']['min']:
														
 
															-                            return True
														
 
															-                        else:
														
 
															-                            return False
														
 
															-                    else:
														
 
															-                        return False
														
 
															-                else:
														
 
															-                    return False
														
 
															-            else:
														
 
															-                return False
														
 
															-        else:
														
 
															-            return False
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_users(cls, log_type, crawler, task, env):
														
 
															-        link_list = task['spider_link']
														
 
															-        user_list = []
														
 
															-        for link in link_list:
														
 
															-            out_uid = int(link.split("https://www.ixigua.com/home/")[-1].replace("/", "").strip())
														
 
															-            sql = f""" select * from crawler_author_map where spider_link="{link}" """
														
 
															-            our_user_info = MysqlHelper.get_values(log_type=log_type, crawler=crawler, sql=sql, env=env)
														
 
															-            if len(our_user_info) == 0:
														
 
															-                our_uid = 0
														
 
															-                Common.logger(log_type, crawler).info(f"没有站内虚拟账号: {link}\n")
														
 
															-            else:
														
 
															-                # print(type(our_user_info[0]))
														
 
															-                # print(our_user_info[0])
														
 
															-                our_uid = our_user_info[0]["media_id"]
														
 
															-            user_dict = {
														
 
															-                "out_uid": out_uid,
														
 
															-                "our_uid": our_uid
														
 
															-            }
														
 
															-            user_list.append(user_dict)
														
 
															-        Common.logger(log_type, crawler).info(f"user_list:{user_list}")
														
 
															-        return user_list
														
 
															-
														
 
															-    # 过滤词库
														
 
															-    @classmethod
														
 
															-    def filter_words(cls, log_type, crawler):
														
 
															-        try:
														
 
															-            while True:
														
 
															-                filter_words_sheet = Feishu.get_values_batch(log_type, crawler, 'KGB4Hc')
														
 
															-                if filter_words_sheet is None:
														
 
															-                    Common.logger(log_type, crawler).warning(f"filter_words_sheet:{filter_words_sheet} 10秒钟后重试")
														
 
															-                    continue
														
 
															-                filter_words_list = []
														
 
															-                for x in filter_words_sheet:
														
 
															-                    for y in x:
														
 
															-                        if y is None:
														
 
															-                            pass
														
 
															-                        else:
														
 
															-                            filter_words_list.append(y)
														
 
															-                return filter_words_list
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
														
 
															-
														
 
															-    @classmethod
														
 
															-    def random_signature(cls):
														
 
															-        src_digits = string.digits  # string_数字
														
 
															-        src_uppercase = string.ascii_uppercase  # string_大写字母
														
 
															-        src_lowercase = string.ascii_lowercase  # string_小写字母
														
 
															-        digits_num = random.randint(1, 6)
														
 
															-        uppercase_num = random.randint(1, 26 - digits_num - 1)
														
 
															-        lowercase_num = 26 - (digits_num + uppercase_num)
														
 
															-        password = random.sample(src_digits, digits_num) + random.sample(src_uppercase, uppercase_num) + random.sample(
														
 
															-            src_lowercase, lowercase_num)
														
 
															-        random.shuffle(password)
														
 
															-        new_password = 'AAAAAAAAAA' + ''.join(password)[10:-4] + 'AAAB'
														
 
															-        new_password_start = new_password[0:18]
														
 
															-        new_password_end = new_password[-7:]
														
 
															-        if new_password[18] == '8':
														
 
															-            new_password = new_password_start + 'w' + new_password_end
														
 
															-        elif new_password[18] == '9':
														
 
															-            new_password = new_password_start + 'x' + new_password_end
														
 
															-        elif new_password[18] == '-':
														
 
															-            new_password = new_password_start + 'y' + new_password_end
														
 
															-        elif new_password[18] == '.':
														
 
															-            new_password = new_password_start + 'z' + new_password_end
														
 
															-        else:
														
 
															-            new_password = new_password_start + 'y' + new_password_end
														
 
															-        return new_password
														
 
															-
														
 
															-    # 获取视频详情
														
 
															-    @classmethod
														
 
															-    def get_video_url(cls, log_type, crawler, gid):
														
 
															-        try:
														
 
															-            url = 'https://www.ixigua.com/api/mixVideo/information?'
														
 
															-            headers = {
														
 
															-                "accept-encoding": "gzip, deflate",
														
 
															-                "accept-language": "zh-CN,zh-Hans;q=0.9",
														
 
															-                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
														
 
															-                              "AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15",
														
 
															-                "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
														
 
															-            }
														
 
															-            params = {
														
 
															-                'mixId': gid,
														
 
															-                'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC'
														
 
															-                           'NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
														
 
															-                'X-Bogus': 'DFSzswVupYTANCJOSBk0P53WxM-r',
														
 
															-                '_signature': '_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px'
														
 
															-                              'fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94',
														
 
															-            }
														
 
															-            cookies = {
														
 
															-                'ixigua-a-s': '1',
														
 
															-                'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB'
														
 
															-                           'NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
														
 
															-                'ttwid': '1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7'
														
 
															-                         '6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8',
														
 
															-                'tt_scid': 'QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3',
														
 
															-                'MONITOR_WEB_ID': '0a49204a-7af5-4e96-95f0-f4bafb7450ad',
														
 
															-                '__ac_nonce': '06304878000964fdad287',
														
 
															-                '__ac_signature': '_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb'
														
 
															-                                  'FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8',
														
 
															-                'ttcid': 'e56fabf6e85d4adf9e4d91902496a0e882',
														
 
															-                '_tea_utm_cache_1300': 'undefined',
														
 
															-                'support_avif': 'false',
														
 
															-                'support_webp': 'false',
														
 
															-                'xiguavideopcwebid': '7134967546256016900',
														
 
															-                'xiguavideopcwebid.sig': 'xxRww5R1VEMJN_dQepHorEu_eAc',
														
 
															-            }
														
 
															-            urllib3.disable_warnings()
														
 
															-            s = requests.session()
														
 
															-            # max_retries=3 重试3次
														
 
															-            s.mount('http://', HTTPAdapter(max_retries=3))
														
 
															-            s.mount('https://', HTTPAdapter(max_retries=3))
														
 
															-            response = s.get(url=url, headers=headers, params=params, cookies=cookies, verify=False,
														
 
															-                             proxies=Common.tunnel_proxies(), timeout=5)
														
 
															-            response.close()
														
 
															-            if 'data' not in response.json() or response.json()['data'] == '':
														
 
															-                Common.logger(log_type, crawler).warning('get_video_info: response: {}', response)
														
 
															-            else:
														
 
															-                video_info = response.json()['data']['gidInformation']['packerData']['video']
														
 
															-                video_url_dict = {}
														
 
															-                # video_url
														
 
															-                if 'videoResource' not in video_info:
														
 
															-                    video_url_dict["video_url"] = ''
														
 
															-                    video_url_dict["audio_url"] = ''
														
 
															-                    video_url_dict["video_width"] = 0
														
 
															-                    video_url_dict["video_height"] = 0
														
 
															-
														
 
															-                elif 'dash_120fps' in video_info['videoResource']:
														
 
															-                    if "video_list" in video_info['videoResource']['dash_120fps'] and 'video_4' in \
														
 
															-                            video_info['videoResource']['dash_120fps']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_4']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_4']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_3' in \
														
 
															-                            video_info['videoResource']['dash_120fps']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_3']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_3']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_2' in \
														
 
															-                            video_info['videoResource']['dash_120fps']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_2']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_2']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_1' in \
														
 
															-                            video_info['videoResource']['dash_120fps']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_1']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_1']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-
														
 
															-                    elif 'dynamic_video' in video_info['videoResource']['dash_120fps'] \
														
 
															-                            and 'dynamic_video_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
														
 
															-                            and 'dynamic_audio_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
														
 
															-                            and len(
														
 
															-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list']) != 0 \
														
 
															-                            and len(
														
 
															-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list']) != 0:
														
 
															-
														
 
															-                        video_url = \
														
 
															-                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                                'backup_url_1']
														
 
															-                        audio_url = \
														
 
															-                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list'][-1][
														
 
															-                                'backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = \
														
 
															-                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                                'vwidth']
														
 
															-                        video_height = \
														
 
															-                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                                'vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    else:
														
 
															-                        video_url_dict["video_url"] = ''
														
 
															-                        video_url_dict["audio_url"] = ''
														
 
															-                        video_url_dict["video_width"] = 0
														
 
															-                        video_url_dict["video_height"] = 0
														
 
															-
														
 
															-                elif 'dash' in video_info['videoResource']:
														
 
															-                    if "video_list" in video_info['videoResource']['dash'] and 'video_4' in \
														
 
															-                            video_info['videoResource']['dash']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash']['video_list']['video_4']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash']['video_list']['video_4']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['dash'] and 'video_3' in \
														
 
															-                            video_info['videoResource']['dash']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash']['video_list']['video_3']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash']['video_list']['video_3']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['dash'] and 'video_2' in \
														
 
															-                            video_info['videoResource']['dash']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash']['video_list']['video_2']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash']['video_list']['video_2']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['dash'] and 'video_1' in \
														
 
															-                            video_info['videoResource']['dash']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash']['video_list']['video_1']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash']['video_list']['video_1']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-
														
 
															-                    elif 'dynamic_video' in video_info['videoResource']['dash'] \
														
 
															-                            and 'dynamic_video_list' in video_info['videoResource']['dash']['dynamic_video'] \
														
 
															-                            and 'dynamic_audio_list' in video_info['videoResource']['dash']['dynamic_video'] \
														
 
															-                            and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list']) != 0 \
														
 
															-                            and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list']) != 0:
														
 
															-
														
 
															-                        video_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list'][-1][
														
 
															-                            'backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    else:
														
 
															-                        video_url_dict["video_url"] = ''
														
 
															-                        video_url_dict["audio_url"] = ''
														
 
															-                        video_url_dict["video_width"] = 0
														
 
															-                        video_url_dict["video_height"] = 0
														
 
															-
														
 
															-                elif 'normal' in video_info['videoResource']:
														
 
															-                    if "video_list" in video_info['videoResource']['normal'] and 'video_4' in \
														
 
															-                            video_info['videoResource']['normal']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['normal']['video_list']['video_4']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['normal']['video_list']['video_4']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['normal'] and 'video_3' in \
														
 
															-                            video_info['videoResource']['normal']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['normal']['video_list']['video_3']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['normal']['video_list']['video_3']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['normal'] and 'video_2' in \
														
 
															-                            video_info['videoResource']['normal']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['normal']['video_list']['video_2']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['normal']['video_list']['video_2']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['normal'] and 'video_1' in \
														
 
															-                            video_info['videoResource']['normal']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['normal']['video_list']['video_1']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['normal']['video_list']['video_1']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-
														
 
															-                    elif 'dynamic_video' in video_info['videoResource']['normal'] \
														
 
															-                            and 'dynamic_video_list' in video_info['videoResource']['normal']['dynamic_video'] \
														
 
															-                            and 'dynamic_audio_list' in video_info['videoResource']['normal']['dynamic_video'] \
														
 
															-                            and len(video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list']) != 0 \
														
 
															-                            and len(video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list']) != 0:
														
 
															-
														
 
															-                        video_url = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list'][-1][
														
 
															-                            'backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'vwidth']
														
 
															-                        video_height = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    else:
														
 
															-                        video_url_dict["video_url"] = ''
														
 
															-                        video_url_dict["audio_url"] = ''
														
 
															-                        video_url_dict["video_width"] = 0
														
 
															-                        video_url_dict["video_height"] = 0
														
 
															-
														
 
															-                else:
														
 
															-                    video_url_dict["video_url"] = ''
														
 
															-                    video_url_dict["audio_url"] = ''
														
 
															-                    video_url_dict["video_width"] = 0
														
 
															-                    video_url_dict["video_height"] = 0
														
 
															-
														
 
															-                return video_url_dict
														
 
															-
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f'get_video_url:{e}\n')
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_videolist(cls, log_type, crawler, strategy, task, our_uid, out_uid, oss_endpoint, env):
														
 
															-        try:
														
 
															-            signature = cls.random_signature()
														
 
															-            while True:
														
 
															-                url = "https://www.ixigua.com/api/videov2/author/new_video_list?"
														
 
															-                params = {
														
 
															-                    'to_user_id': str(out_uid),
														
 
															-                    'offset': str(cls.offset),
														
 
															-                    'limit': '30',
														
 
															-                    'maxBehotTime': '0',
														
 
															-                    'order': 'new',
														
 
															-                    'isHome': '0',
														
 
															-                    # 'msToken': 'G0eRzNkw189a8TLaXjc6nTHVMQwh9XcxVAqTbGKi7iPJdQcLwS3-XRrJ3MZ7QBfqErpxp3EX1WtvWOIcZ3NIgr41hgcd-v64so_RRj3YCRw1UsKW8mIssNLlIMspsg==',
														
 
															-                    # 'X-Bogus': 'DFSzswVuEkUANjW9ShFTgR/F6qHt',
														
 
															-                    '_signature': signature,
														
 
															-                }
														
 
															-                headers = {
														
 
															-                    'referer': f'https://www.ixigua.com/home/{out_uid}/video/?preActiveKey=hotsoon&list_entrance=userdetail',
														
 
															-                    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41',
														
 
															-                }
														
 
															-                urllib3.disable_warnings()
														
 
															-                s = requests.session()
														
 
															-                # max_retries=3 重试3次
														
 
															-                s.mount('http://', HTTPAdapter(max_retries=3))
														
 
															-                s.mount('https://', HTTPAdapter(max_retries=3))
														
 
															-                response = s.get(url=url, headers=headers, params=params, proxies=Common.tunnel_proxies(), verify=False,
														
 
															-                                 timeout=5)
														
 
															-                response.close()
														
 
															-                cls.offset += 30
														
 
															-                if response.status_code != 200:
														
 
															-                    Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
														
 
															-                    cls.offset = 0
														
 
															-                    return
														
 
															-                elif 'data' not in response.text:
														
 
															-                    Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
														
 
															-                    cls.offset = 0
														
 
															-                    return
														
 
															-                elif not response.json()["data"]['videoList']:
														
 
															-                    Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.json()}\n")
														
 
															-                    cls.offset = 0
														
 
															-                    return
														
 
															-                else:
														
 
															-                    videoList = response.json()['data']['videoList']
														
 
															-                    for i in range(len(videoList)):
														
 
															-                        # video_title
														
 
															-                        if 'title' not in videoList[i]:
														
 
															-                            video_title = 0
														
 
															-                        else:
														
 
															-                            video_title = videoList[i]['title'].strip().replace('手游', '') \
														
 
															-                                .replace('/', '').replace('\/', '').replace('\n', '').replace('"', '').replace("'", '')
														
 
															-
														
 
															-                        # video_id
														
 
															-                        if 'video_id' not in videoList[i]:
														
 
															-                            video_id = 0
														
 
															-                        else:
														
 
															-                            video_id = videoList[i]['video_id']
														
 
															-
														
 
															-                        # gid
														
 
															-                        if 'gid' not in videoList[i]:
														
 
															-                            gid = 0
														
 
															-                        else:
														
 
															-                            gid = videoList[i]['gid']
														
 
															-
														
 
															-                        # play_cnt
														
 
															-                        if 'video_detail_info' not in videoList[i]:
														
 
															-                            play_cnt = 0
														
 
															-                        elif 'video_watch_count' not in videoList[i]['video_detail_info']:
														
 
															-                            play_cnt = 0
														
 
															-                        else:
														
 
															-                            play_cnt = videoList[i]['video_detail_info']['video_watch_count']
														
 
															-
														
 
															-                        # comment_cnt
														
 
															-                        if 'comment_count' not in videoList[i]:
														
 
															-                            comment_cnt = 0
														
 
															-                        else:
														
 
															-                            comment_cnt = videoList[i]['comment_count']
														
 
															-
														
 
															-                        # like_cnt
														
 
															-                        if 'digg_count' not in videoList[i]:
														
 
															-                            like_cnt = 0
														
 
															-                        else:
														
 
															-                            like_cnt = videoList[i]['digg_count']
														
 
															-
														
 
															-                        # share_cnt
														
 
															-                        share_cnt = 0
														
 
															-
														
 
															-                        # video_duration
														
 
															-                        if 'video_duration' not in videoList[i]:
														
 
															-                            video_duration = 0
														
 
															-                        else:
														
 
															-                            video_duration = int(videoList[i]['video_duration'])
														
 
															-
														
 
															-                        # send_time
														
 
															-                        if 'publish_time' not in videoList[i]:
														
 
															-                            publish_time = 0
														
 
															-                        else:
														
 
															-                            publish_time = videoList[i]['publish_time']
														
 
															-
														
 
															-                        publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time))
														
 
															-
														
 
															-                        # is_top
														
 
															-                        if 'is_top' not in videoList[i]:
														
 
															-                            is_top = 0
														
 
															-                        else:
														
 
															-                            is_top = videoList[i]['is_top']
														
 
															-
														
 
															-                        # user_name
														
 
															-                        if 'user_info' not in videoList[i]:
														
 
															-                            user_name = 0
														
 
															-                        elif 'name' not in videoList[i]['user_info']:
														
 
															-                            user_name = 0
														
 
															-                        else:
														
 
															-                            user_name = videoList[i]['user_info']['name']
														
 
															-
														
 
															-                        # user_id
														
 
															-                        if 'user_info' not in videoList[i]:
														
 
															-                            user_id = 0
														
 
															-                        elif 'user_id' not in videoList[i]['user_info']:
														
 
															-                            user_id = 0
														
 
															-                        else:
														
 
															-                            user_id = videoList[i]['user_info']['user_id']
														
 
															-
														
 
															-                        # avatar_url
														
 
															-                        if 'user_info' not in videoList[i]:
														
 
															-                            avatar_url = 0
														
 
															-                        elif 'avatar_url' not in videoList[i]['user_info']:
														
 
															-                            avatar_url = 0
														
 
															-                        else:
														
 
															-                            avatar_url = videoList[i]['user_info']['avatar_url']
														
 
															-
														
 
															-                        # cover_url
														
 
															-                        if 'video_detail_info' not in videoList[i]:
														
 
															-                            cover_url = 0
														
 
															-                        elif 'detail_video_large_image' not in videoList[i]['video_detail_info']:
														
 
															-                            cover_url = 0
														
 
															-                        elif 'url' in videoList[i]['video_detail_info']['detail_video_large_image']:
														
 
															-                            cover_url = videoList[i]['video_detail_info']['detail_video_large_image']['url']
														
 
															-                        else:
														
 
															-                            cover_url = videoList[i]['video_detail_info']['detail_video_large_image']['url_list'][0][
														
 
															-                                'url']
														
 
															-                        video_url_dict = cls.get_video_url(log_type, crawler, gid)
														
 
															-                        video_url = video_url_dict["video_url"]
														
 
															-                        audio_url = video_url_dict["audio_url"]
														
 
															-                        video_width = video_url_dict["video_width"]
														
 
															-                        video_height = video_url_dict["video_height"]
														
 
															-
														
 
															-                        video_dict = {'video_title': video_title,
														
 
															-                                      'video_id': video_id,
														
 
															-                                      'gid': gid,
														
 
															-                                      'play_cnt': play_cnt,
														
 
															-                                      'comment_cnt': comment_cnt,
														
 
															-                                      'like_cnt': like_cnt,
														
 
															-                                      'share_cnt': share_cnt,
														
 
															-                                      'video_width': video_width,
														
 
															-                                      'video_height': video_height,
														
 
															-                                      'duration': video_duration,
														
 
															-                                      'publish_time_stamp': publish_time,
														
 
															-                                      'publish_time_str': publish_time_str,
														
 
															-                                      'is_top': is_top,
														
 
															-                                      'user_name': user_name,
														
 
															-                                      'user_id': user_id,
														
 
															-                                      'avatar_url': avatar_url,
														
 
															-                                      'cover_url': cover_url,
														
 
															-                                      'audio_url': audio_url,
														
 
															-                                      'video_url': video_url,
														
 
															-                                      'session': signature}
														
 
															-                        for k, v in video_dict.items():
														
 
															-                            Common.logger(log_type, crawler).info(f"{k}:{v}")
														
 
															-                        cls.download_publish(log_type=log_type,
														
 
															-                                             crawler=crawler,
														
 
															-                                             strategy=strategy,
														
 
															-                                             video_dict=video_dict,
														
 
															-                                             task=task,
														
 
															-                                             our_uid=our_uid,
														
 
															-                                             oss_endpoint=oss_endpoint,
														
 
															-                                             env=env)
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f"get_videolist:{e}\n")
														
 
															-
														
 
															-    @classmethod
														
 
															-    def repeat_video(cls, log_type, crawler, video_id, env):
														
 
															-        sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{video_id}"; """
														
 
															-        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
														
 
															-        return len(repeat_video)
														
 
															-
														
 
															-    # 下载 / 上传
														
 
															-    @classmethod
														
 
															-    def download_publish(cls, log_type, crawler, strategy, video_dict, task, our_uid, oss_endpoint, env):
														
 
															-        try:
														
 
															-            filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
														
 
															-            for filter_word in filter_words:
														
 
															-                if filter_word in video_dict['video_title']:
														
 
															-                    Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
														
 
															-                    return
														
 
															-            if download_rule(log_type, crawler, video_dict, task['rule_dict']) is False:
														
 
															-                Common.logger(log_type, crawler).info('不满足抓取规则\n')
														
 
															-
														
 
															-            elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
														
 
															-                Common.logger(log_type, crawler).info('视频已下载\n')
														
 
															-            else:
														
 
															-                # 下载视频
														
 
															-                Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video',
														
 
															-                                       title=video_dict['video_title'], url=video_dict['video_url'])
														
 
															-                # 下载音频
														
 
															-                Common.download_method(log_type=log_type, crawler=crawler, text='xigua_audio',
														
 
															-                                       title=video_dict['video_title'], url=video_dict['audio_url'])
														
 
															-                # 合成音视频
														
 
															-                Common.video_compose(log_type=log_type, crawler=crawler,
														
 
															-                                     video_dir=f"./{crawler}/videos/{video_dict['video_title']}")
														
 
															-                md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
														
 
															-                if os.path.getsize(f"./{crawler}/videos/{md_title}/video.mp4") == 0:
														
 
															-                    # 删除视频文件夹
														
 
															-                    shutil.rmtree(f"./{crawler}/videos/{md_title}")
														
 
															-                    Common.logger(log_type, crawler).info("视频size=0，删除成功\n")
														
 
															-                    return
														
 
															-                # ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
														
 
															-                # if ffmpeg_dict is None or ffmpeg_dict['size'] == 0:
														
 
															-                #     Common.logger(log_type, crawler).warning(f"下载的视频无效，已删除\n")
														
 
															-                #     # 删除视频文件夹
														
 
															-                #     shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
														
 
															-                #     return
														
 
															-                # 下载封面
														
 
															-                Common.download_method(log_type=log_type, crawler=crawler, text='cover',
														
 
															-                                       title=video_dict['video_title'], url=video_dict['cover_url'])
														
 
															-                # 保存视频信息至txt
														
 
															-                Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
														
 
															-
														
 
															-                # 上传视频
														
 
															-                Common.logger(log_type, crawler).info("开始上传视频...")
														
 
															-                our_video_id = Publish.upload_and_publish(log_type=log_type,
														
 
															-                                                          crawler=crawler,
														
 
															-                                                          strategy=strategy,
														
 
															-                                                          our_uid=our_uid,
														
 
															-                                                          env=env,
														
 
															-                                                          oss_endpoint=oss_endpoint)
														
 
															-                if env == 'dev':
														
 
															-                    our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
														
 
															-                else:
														
 
															-                    our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
														
 
															-                Common.logger(log_type, crawler).info("视频上传完成")
														
 
															-
														
 
															-                if our_video_id is None:
														
 
															-                    # 删除视频文件夹
														
 
															-                    shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
														
 
															-                    return
														
 
															-
														
 
															-                # 视频写入飞书
														
 
															-                Feishu.insert_columns(log_type, 'xigua', "e075e9", "ROWS", 1, 2)
														
 
															-                upload_time = int(time.time())
														
 
															-                values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
														
 
															-                           "定向榜",
														
 
															-                           video_dict['video_title'],
														
 
															-                           str(video_dict['video_id']),
														
 
															-                           our_video_link,
														
 
															-                           video_dict['gid'],
														
 
															-                           video_dict['play_cnt'],
														
 
															-                           video_dict['comment_cnt'],
														
 
															-                           video_dict['like_cnt'],
														
 
															-                           video_dict['share_cnt'],
														
 
															-                           video_dict['duration'],
														
 
															-                           str(video_dict['video_width']) + '*' + str(video_dict['video_height']),
														
 
															-                           video_dict['publish_time_str'],
														
 
															-                           video_dict['user_name'],
														
 
															-                           video_dict['user_id'],
														
 
															-                           video_dict['avatar_url'],
														
 
															-                           video_dict['cover_url'],
														
 
															-                           video_dict['video_url'],
														
 
															-                           video_dict['audio_url']]]
														
 
															-                time.sleep(1)
														
 
															-                Feishu.update_values(log_type, 'xigua', "e075e9", "F2:Z2", values)
														
 
															-                Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
														
 
															-
														
 
															-                rule_dict = {
														
 
															-                    "play_cnt": task["play_cnt"],
														
 
															-                    "video_width": task["video_width"],
														
 
															-                    "video_height": task["video_height"],
														
 
															-                    "video_like": task["video_like"],
														
 
															-                    "share_cnt": task["share_cnt"],
														
 
															-                    "duration": {"min": task["duration_min"], "max": task["duration_max"]}
														
 
															-                }
														
 
															-
														
 
															-                # 视频信息保存数据库
														
 
															-                insert_sql = f""" insert into crawler_video(video_id,
														
 
															-                                user_id,
														
 
															-                                out_user_id,
														
 
															-                                platform,
														
 
															-                                strategy,
														
 
															-                                out_video_id,
														
 
															-                                video_title,
														
 
															-                                cover_url,
														
 
															-                                video_url,
														
 
															-                                duration,
														
 
															-                                publish_time,
														
 
															-                                play_cnt,
														
 
															-                                crawler_rule,
														
 
															-                                width,
														
 
															-                                height)
														
 
															-                                values({our_video_id},
														
 
															-                                {our_uid},
														
 
															-                                "{video_dict['user_id']}",
														
 
															-                                "{cls.platform}",
														
 
															-                                "定向爬虫策略",
														
 
															-                                "{video_dict['video_id']}",
														
 
															-                                "{video_dict['video_title']}",
														
 
															-                                "{video_dict['cover_url']}",
														
 
															-                                "{video_dict['video_url']}",
														
 
															-                                {int(video_dict['duration'])},
														
 
															-                                "{video_dict['publish_time_str']}",
														
 
															-                                {int(video_dict['play_cnt'])},
														
 
															-                                '{json.dumps(rule_dict)}',
														
 
															-                                {int(video_dict['video_width'])},
														
 
															-                                {int(video_dict['video_height'])}) """
														
 
															-                Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
														
 
															-                MysqlHelper.update_values(log_type, crawler, insert_sql, env)
														
 
															-                Common.logger(log_type, crawler).info('视频信息插入数据库成功！\n')
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f'download_publish异常:{e}\n')
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_follow_videos(cls, log_type, crawler, task, oss_endpoint, env):
														
 
															-        user_list = get_user_from_mysql(log_type, crawler, crawler, env)
														
 
															-        strategy = '定向抓取策略'
														
 
															-        for user in user_list:
														
 
															-            try:
														
 
															-                spider_link = user["link"]
														
 
															-                out_uid = spider_link.split('/')[-1]
														
 
															-                user_name = user["nick_name"]
														
 
															-                our_uid = user["uid"]
														
 
															-                Common.logger(log_type, crawler).info(f"开始抓取 {user_name} 用户主页视频\n")
														
 
															-                cls.get_videolist(log_type=log_type,
														
 
															-                                  crawler=crawler,
														
 
															-                                  strategy=strategy,
														
 
															-                                  task=task,
														
 
															-                                  our_uid=our_uid,
														
 
															-                                  out_uid=out_uid,
														
 
															-                                  oss_endpoint=oss_endpoint,
														
 
															-                                  env=env)
														
 
															-                cls.offset = 0
														
 
															-            except Exception as e:
														
 
															-                Common.logger(log_type, crawler).error(f"get_follow_videos:{e}\n")
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    # SchedulingFollow.get_users(log_type="follow",
														
 
															-    #                            crawler="xigua",
														
 
															-    #                            spider_rule="['https://www.ixigua.com/home/95420624045', 'https://www.ixigua.com/home/6431477489']",
														
 
															-    #                            env="dev",
														
 
															-    #                            machine="local")
														
 
															-
														
 
															-    print(ScheduleXiguaFollow.repeat_video("follow", "xigua", "v0201ag10000ce3jcjbc77u8jsplpgrg", "dev"))
														
 
															-    pass
														
--- a/xigua/xigua_main/run_xigua_author_scheduling.py
+++ b/xigua/xigua_main/run_xigua_author_scheduling.py
@@ -1,43 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/5/26
														
 
															-import argparse
														
 
															-import os
														
 
															-import sys
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.common import Common
														
 
															-from common.public import task_fun
														
 
															-from common.scheduling_db import MysqlHelper
														
 
															-from xigua.xigua_author.xigua_author_scheduling import XiguaauthorScheduling
														
 
															-
														
 
															-
														
 
															-def main(log_type, crawler, task, env):
														
 
															-    task_dict = task_fun(task)['task_dict']
														
 
															-    rule_dict = task_fun(task)['rule_dict']
														
 
															-    task_id = task_dict['task_id']
														
 
															-    select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
														
 
															-    user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
														
 
															-    Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
														
 
															-    Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
														
 
															-    # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
														
 
															-    Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["task_name"]}\n')
														
 
															-    XiguaauthorScheduling.get_author_videos(log_type=log_type,
														
 
															-                                            crawler=crawler,
														
 
															-                                            rule_dict=rule_dict,
														
 
															-                                            user_list=user_list,
														
 
															-                                            env=env)
														
 
															-    Common.del_logs(log_type, crawler)
														
 
															-    Common.logger(log_type, crawler).info('抓取完一轮\n')
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
														
 
															-    parser.add_argument('--log_type', default='recommend')  ## 添加参数,注明参数类型
														
 
															-    parser.add_argument('--crawler', default='kuaishou')  ## 添加参数
														
 
															-    parser.add_argument('--task')  ## 添加参数
														
 
															-    parser.add_argument('--env', default='prod')  ## 添加参数
														
 
															-    args = parser.parse_args()  ### 参数赋值，也可以通过终端赋值
														
 
															-    main(log_type=args.log_type,
														
 
															-         crawler=args.crawler,
														
 
															-         task=args.task,
														
 
															-         env=args.env)
														
--- a/xigua/xigua_main/run_xigua_follow.py
+++ b/xigua/xigua_main/run_xigua_follow.py
@@ -1,41 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/2/17
														
 
															-import argparse
														
 
															-import os
														
 
															-import sys
														
 
															-import time
														
 
															-
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.common import Common
														
 
															-from xigua.xigua_follow.xigua_follow import Follow
														
 
															-from common.feishu import Feishu
														
 
															-
														
 
															-
														
 
															-def main(log_type, crawler, strategy, oss_endpoint, env, machine):
														
 
															-    try:
														
 
															-        Common.logger(log_type, crawler).info('开始抓取 西瓜视频 定向榜\n')
														
 
															-        Follow.get_follow_videos(log_type, crawler, strategy, oss_endpoint, env, machine)
														
 
															-        Common.del_logs(log_type, crawler)
														
 
															-        Common.logger(log_type, crawler).info('抓取完一轮，休眠 1 分钟\n')
														
 
															-    except Exception as e:
														
 
															-        Common.logger(log_type, crawler).info(f"西瓜视频异常，触发报警:{e}\n")
														
 
															-        Feishu.bot(log_type, crawler, f"{e}")
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
														
 
															-    parser.add_argument('--log_type', default='follow', type=str)  ## 添加参数,注明参数类型
														
 
															-    parser.add_argument('--crawler', default='xigua')  ## 添加参数
														
 
															-    parser.add_argument('--strategy', default='定向抓取')  ## 添加参数
														
 
															-    parser.add_argument('--oss_endpoint', default='inner')  ## 添加参数
														
 
															-    parser.add_argument('--env', default='prod')  ## 添加参数
														
 
															-    parser.add_argument('--machine', default='aliyun')  ## 添加参数
														
 
															-    args = parser.parse_args()  ### 参数赋值，也可以通过终端赋值
														
 
															-    # print(args)
														
 
															-    main(log_type=args.log_type,
														
 
															-         crawler=args.crawler,
														
 
															-         strategy=args.strategy,
														
 
															-         oss_endpoint=args.oss_endpoint,
														
 
															-         env=args.env,
														
 
															-         machine=args.machine)
														
--- a/xigua/xigua_main/run_xigua_recommend.py
+++ b/xigua/xigua_main/run_xigua_recommend.py
@@ -1,30 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/4/11
														
 
															-import argparse
														
 
															-import os
														
 
															-import sys
														
 
															-
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.common import Common
														
 
															-from xigua.xigua_recommend.xigua_recommend import XiguaRecommend
														
 
															-
														
 
															-
														
 
															-def main(log_type, crawler, env):
														
 
															-    if env == "dev":
														
 
															-        oss_endpoint = "out"
														
 
															-    else:
														
 
															-        oss_endpoint = "inner"
														
 
															-    Common.logger(log_type, crawler).info('开始抓取 西瓜视频 推荐榜\n')
														
 
															-    XiguaRecommend.get_videoList(log_type, crawler, oss_endpoint, env)
														
 
															-    Common.del_logs(log_type, crawler)
														
 
															-    Common.logger(log_type, crawler).info('抓取完一轮，休眠 1 分钟\n')
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
														
 
															-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
														
 
															-    parser.add_argument('--crawler')  ## 添加参数
														
 
															-    parser.add_argument('--env')  ## 添加参数
														
 
															-    args = parser.parse_args()  ### 参数赋值，也可以通过终端赋值
														
 
															-    main(log_type=args.log_type, crawler=args.crawler, env=args.env)
														
--- a/xigua/xigua_main/run_xigua_recommend_scheduling.py
+++ b/xigua/xigua_main/run_xigua_recommend_scheduling.py
@@ -1,48 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/5/25
														
 
															-import argparse
														
 
															-import os
														
 
															-import random
														
 
															-import sys
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.common import Common
														
 
															-from common.public import task_fun
														
 
															-from common.scheduling_db import MysqlHelper
														
 
															-from xigua.xigua_recommend.xigua_recommend_scheduling import XiguarecommendScheduling
														
 
															-
														
 
															-
														
 
															-def main(log_type, crawler, task, env):
														
 
															-    task_dict = task_fun(task)['task_dict']
														
 
															-    rule_dict = task_fun(task)['rule_dict']
														
 
															-    task_id = task_dict['task_id']
														
 
															-    select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
														
 
															-    user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
														
 
															-    our_uid_list = []
														
 
															-    for user in user_list:
														
 
															-        our_uid_list.append(user["uid"])
														
 
															-    our_uid = random.choice(our_uid_list)
														
 
															-    Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
														
 
															-    Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
														
 
															-    # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
														
 
															-    Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["task_name"]}\n')
														
 
															-    XiguarecommendScheduling.get_videoList(log_type=log_type,
														
 
															-                                           crawler=crawler,
														
 
															-                                           rule_dict=rule_dict,
														
 
															-                                           our_uid=our_uid,
														
 
															-                                           env=env)
														
 
															-    Common.del_logs(log_type, crawler)
														
 
															-    Common.logger(log_type, crawler).info('抓取任务结束\n')
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
														
 
															-    parser.add_argument('--log_type', default='recommend')  ## 添加参数,注明参数类型
														
 
															-    parser.add_argument('--crawler', default='kuaishou')  ## 添加参数
														
 
															-    parser.add_argument('--task')  ## 添加参数
														
 
															-    parser.add_argument('--env', default='prod')  ## 添加参数
														
 
															-    args = parser.parse_args()  ### 参数赋值，也可以通过终端赋值
														
 
															-    main(log_type=args.log_type,
														
 
															-         crawler=args.crawler,
														
 
															-         task=args.task,
														
 
															-         env=args.env)
														
--- a/xigua/xigua_main/run_xigua_search.py
+++ b/xigua/xigua_main/run_xigua_search.py
@@ -1,42 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: lierqiang
														
 
															-# @Time: 2023/3/13
														
 
															-import argparse
														
 
															-import os
														
 
															-import sys
														
 
															-import time
														
 
															-
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.common import Common
														
 
															-from xigua.xigua_search.xigua_search import XiguaSearch
														
 
															-from common.feishu import Feishu
														
 
															-
														
 
															-
														
 
															-def main(log_type, crawler, strategy, oss_endpoint, env, machine):
														
 
															-    try:
														
 
															-        Common.logger(log_type, crawler).info('开始抓取 西瓜视频 搜索\n')
														
 
															-        XiguaSearch.get_search_videos(log_type, crawler, strategy, oss_endpoint, env, machine)
														
 
															-        Common.del_logs(log_type, crawler)
														
 
															-        Common.logger(log_type, crawler).info('抓取完一轮，休眠 1 分钟\n')
														
 
															-    except Exception as e:
														
 
															-        Common.logger(log_type, crawler).info(f"西瓜视频异常，触发报警:{e}\n")
														
 
															-        Feishu.bot(log_type, crawler, f"{e}")
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
														
 
															-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
														
 
															-    parser.add_argument('--crawler')  ## 添加参数
														
 
															-    parser.add_argument('--strategy')  ## 添加参数
														
 
															-    parser.add_argument('--our_uid')  ## 添加参数
														
 
															-    parser.add_argument('--oss_endpoint')  ## 添加参数
														
 
															-    parser.add_argument('--env')  ## 添加参数
														
 
															-    parser.add_argument('--machine')  ## 添加参数
														
 
															-    args = parser.parse_args()  ### 参数赋值，也可以通过终端赋值
														
 
															-    # print(args)
														
 
															-    main(log_type=args.log_type,
														
 
															-         crawler=args.crawler,
														
 
															-         strategy=args.strategy,
														
 
															-         oss_endpoint=args.oss_endpoint,
														
 
															-         env=args.env,
														
 
															-         machine=args.machine)
														
--- a/xigua/xigua_main/run_xigua_search_new.py
+++ b/xigua/xigua_main/run_xigua_search_new.py
@@ -1,28 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/5/12
														
 
															-import argparse
														
 
															-import os
														
 
															-import sys
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.common import Common
														
 
															-from xigua.xigua_search.xigua_search_new import XiguaSearchNew
														
 
															-
														
 
															-
														
 
															-def main(log_type, crawler, env):
														
 
															-    Common.logger(log_type, crawler).info('开始抓取 西瓜视频 搜索策略\n')
														
 
															-    XiguaSearchNew.get_search_videos(log_type, crawler, env)
														
 
															-    os.system("ps aux | grep Chrome | grep -v grep | awk '{print $2}' | xargs kill -9")
														
 
															-    os.system("ps aux | grep chromedriver | grep -v grep | awk '{print $2}' | xargs kill -9")
														
 
															-    Common.del_logs(log_type, crawler)
														
 
															-    Common.logger(log_type, crawler).info('抓取完一轮\n')
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
														
 
															-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
														
 
															-    parser.add_argument('--crawler')  ## 添加参数
														
 
															-    parser.add_argument('--env')  ## 添加参数
														
 
															-    args = parser.parse_args()  ### 参数赋值，也可以通过终端赋值
														
 
															-    # print(args)
														
 
															-    main(log_type=args.log_type, crawler=args.crawler, env=args.env)
														
--- a/xigua/xigua_main/run_xigua_search_scheduling.py
+++ b/xigua/xigua_main/run_xigua_search_scheduling.py
@@ -1,45 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/5/26
														
 
															-import argparse
														
 
															-import os
														
 
															-import sys
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.common import Common
														
 
															-from common.public import task_fun
														
 
															-from common.scheduling_db import MysqlHelper
														
 
															-from xigua.xigua_search.xigua_search_scheduling import XiguasearchScheduling
														
 
															-
														
 
															-
														
 
															-def main(log_type, crawler, task, env):
														
 
															-    task_dict = task_fun(task)['task_dict']
														
 
															-    rule_dict = task_fun(task)['rule_dict']
														
 
															-    task_id = task_dict['task_id']
														
 
															-    select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
														
 
															-    user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
														
 
															-    Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
														
 
															-    Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
														
 
															-    # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
														
 
															-    Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["task_name"]}\n')
														
 
															-    XiguasearchScheduling.get_search_videos(log_type=log_type,
														
 
															-                                            crawler=crawler,
														
 
															-                                            rule_dict=rule_dict,
														
 
															-                                            user_list=user_list,
														
 
															-                                            env=env)
														
 
															-    os.system("ps aux | grep Chrome | grep -v grep | awk '{print $2}' | xargs kill -9")
														
 
															-    os.system("ps aux | grep chromedriver | grep -v grep | awk '{print $2}' | xargs kill -9")
														
 
															-    Common.del_logs(log_type, crawler)
														
 
															-    Common.logger(log_type, crawler).info('抓取完一轮\n')
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
														
 
															-    parser.add_argument('--log_type', default='recommend')  ## 添加参数,注明参数类型
														
 
															-    parser.add_argument('--crawler', default='kuaishou')  ## 添加参数
														
 
															-    parser.add_argument('--task')  ## 添加参数
														
 
															-    parser.add_argument('--env', default='prod')  ## 添加参数
														
 
															-    args = parser.parse_args()  ### 参数赋值，也可以通过终端赋值
														
 
															-    main(log_type=args.log_type,
														
 
															-         crawler=args.crawler,
														
 
															-         task=args.task,
														
 
															-         env=args.env)
														
--- a/xigua/xigua_recommend/xigua_recommend.py
+++ b/xigua/xigua_recommend/xigua_recommend.py
@@ -1,850 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/4/7
														
 
															-import base64
														
 
															-import json
														
 
															-import os
														
 
															-import random
														
 
															-import shutil
														
 
															-import string
														
 
															-import sys
														
 
															-import time
														
 
															-from datetime import date, timedelta
														
 
															-from hashlib import md5
														
 
															-
														
 
															-import requests
														
 
															-import urllib3
														
 
															-from requests.adapters import HTTPAdapter
														
 
															-from selenium import webdriver
														
 
															-from selenium.webdriver import DesiredCapabilities
														
 
															-from selenium.webdriver.chrome.service import Service
														
 
															-
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.publish import Publish
														
 
															-from common.common import Common
														
 
															-from common.feishu import Feishu
														
 
															-from common.scheduling_db import MysqlHelper
														
 
															-
														
 
															-
														
 
															-class XiguaRecommend:
														
 
															-    platform = "西瓜视频"
														
 
															-
														
 
															-    @classmethod
														
 
															-    def xigua_config(cls, log_type, crawler, text, env):
														
 
															-        select_sql = f"""select * from crawler_config where source="xigua" """
														
 
															-        contents = MysqlHelper.get_values(log_type, crawler, select_sql, env, action='')
														
 
															-        title_list = []
														
 
															-        filter_list = []
														
 
															-        for content in contents:
														
 
															-            config = content['config']
														
 
															-            config_dict = eval(config)
														
 
															-            for k, v in config_dict.items():
														
 
															-                if k == "title":
														
 
															-                    title_list_config = v.split(",")
														
 
															-                    for title in title_list_config:
														
 
															-                        title_list.append(title)
														
 
															-                if k == "filter":
														
 
															-                    filter_list_config = v.split(",")
														
 
															-                    for filter_word in filter_list_config:
														
 
															-                        filter_list.append(filter_word)
														
 
															-        if text == "title":
														
 
															-            return title_list
														
 
															-        elif text == "filter":
														
 
															-            return filter_list
														
 
															-
														
 
															-    @classmethod
														
 
															-    def download_rule(cls, video_dict):
														
 
															-        publish_time_str_rule = (date.today() + timedelta(days=-30)).strftime("%Y-%m-%d %H:%M:%S")
														
 
															-        publish_time_stamp_rule = int(time.mktime(time.strptime(publish_time_str_rule, "%Y-%m-%d %H:%M:%S")))
														
 
															-        if int(video_dict['play_cnt']) >= 10000:
														
 
															-            if 60*30 >= int(video_dict['duration']) >= 60:
														
 
															-                if int(video_dict['publish_time_stamp']) >= publish_time_stamp_rule:
														
 
															-                    return True
														
 
															-                else:
														
 
															-                    return False
														
 
															-            else:
														
 
															-                return False
														
 
															-        else:
														
 
															-            return False
														
 
															-
														
 
															-    @classmethod
														
 
															-    def random_signature(cls):
														
 
															-        src_digits = string.digits  # string_数字
														
 
															-        src_uppercase = string.ascii_uppercase  # string_大写字母
														
 
															-        src_lowercase = string.ascii_lowercase  # string_小写字母
														
 
															-        digits_num = random.randint(1, 6)
														
 
															-        uppercase_num = random.randint(1, 26 - digits_num - 1)
														
 
															-        lowercase_num = 26 - (digits_num + uppercase_num)
														
 
															-        password = random.sample(src_digits, digits_num) + random.sample(src_uppercase, uppercase_num) + random.sample(
														
 
															-            src_lowercase, lowercase_num)
														
 
															-        random.shuffle(password)
														
 
															-        new_password = 'AAAAAAAAAA' + ''.join(password)[10:-4] + 'AAAB'
														
 
															-        new_password_start = new_password[0:18]
														
 
															-        new_password_end = new_password[-7:]
														
 
															-        if new_password[18] == '8':
														
 
															-            new_password = new_password_start + 'w' + new_password_end
														
 
															-        elif new_password[18] == '9':
														
 
															-            new_password = new_password_start + 'x' + new_password_end
														
 
															-        elif new_password[18] == '-':
														
 
															-            new_password = new_password_start + 'y' + new_password_end
														
 
															-        elif new_password[18] == '.':
														
 
															-            new_password = new_password_start + 'z' + new_password_end
														
 
															-        else:
														
 
															-            new_password = new_password_start + 'y' + new_password_end
														
 
															-        return new_password
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_signature(cls, log_type, crawler, env):
														
 
															-        try:
														
 
															-            # 打印请求配置
														
 
															-            ca = DesiredCapabilities.CHROME
														
 
															-            ca["goog:loggingPrefs"] = {"performance": "ALL"}
														
 
															-
														
 
															-            # 不打开浏览器运行
														
 
															-            chrome_options = webdriver.ChromeOptions()
														
 
															-            chrome_options.add_argument("headless")
														
 
															-            chrome_options.add_argument(
														
 
															-                f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
														
 
															-            chrome_options.add_argument("--no-sandbox")
														
 
															-
														
 
															-            # driver初始化
														
 
															-            if env == "dev":
														
 
															-                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
														
 
															-                                          service=Service('/Users/wangkun/Downloads/chromedriver/chromedriver_v111/chromedriver'))
														
 
															-            else:
														
 
															-                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
														
 
															-
														
 
															-
														
 
															-            driver.implicitly_wait(10)
														
 
															-            driver.get('https://www.ixigua.com/')
														
 
															-            time.sleep(1)
														
 
															-
														
 
															-            # 向上滑动 1000 个像素
														
 
															-            driver.execute_script('window.scrollBy(0, 2000)')
														
 
															-            # Common.logger(log_type, crawler).info('刷新页面')
														
 
															-            driver.refresh()
														
 
															-            logs = driver.get_log("performance")
														
 
															-            # Common.logger(log_type, crawler).info('已获取logs:{}\n', logs)
														
 
															-            driver.quit()
														
 
															-            for line in logs:
														
 
															-                msg = json.loads(line['message'])
														
 
															-                if 'params' not in msg['message']:
														
 
															-                    pass
														
 
															-                elif 'documentURL' not in msg['message']['params']:
														
 
															-                    pass
														
 
															-                elif 'www.ixigua.com' not in msg['message']['params']['documentURL']:
														
 
															-                    pass
														
 
															-                elif 'url' not in msg['message']['params']['request']:
														
 
															-                    pass
														
 
															-                elif '_signature' not in msg['message']['params']['request']['url']:
														
 
															-                    pass
														
 
															-                else:
														
 
															-                    url = msg['message']['params']['request']['url']
														
 
															-                    signature = url.split('_signature=')[-1].split('&')[0]
														
 
															-                    return signature
														
 
															-
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f'get_signature异常:{e}\n')
														
 
															-
														
 
															-    # 获取视频详情
														
 
															-    @classmethod
														
 
															-    def get_video_url(cls, log_type, crawler, gid):
														
 
															-        try:
														
 
															-            url = 'https://www.ixigua.com/api/mixVideo/information?'
														
 
															-            headers = {
														
 
															-                "accept-encoding": "gzip, deflate",
														
 
															-                "accept-language": "zh-CN,zh-Hans;q=0.9",
														
 
															-                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
														
 
															-                              "AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15",
														
 
															-                "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
														
 
															-            }
														
 
															-            params = {
														
 
															-                'mixId': gid,
														
 
															-                'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC'
														
 
															-                           'NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
														
 
															-                'X-Bogus': 'DFSzswVupYTANCJOSBk0P53WxM-r',
														
 
															-                '_signature': '_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px'
														
 
															-                              'fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94',
														
 
															-            }
														
 
															-            cookies = {
														
 
															-                'ixigua-a-s': '1',
														
 
															-                'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB'
														
 
															-                           'NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
														
 
															-                'ttwid': '1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7'
														
 
															-                         '6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8',
														
 
															-                'tt_scid': 'QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3',
														
 
															-                'MONITOR_WEB_ID': '0a49204a-7af5-4e96-95f0-f4bafb7450ad',
														
 
															-                '__ac_nonce': '06304878000964fdad287',
														
 
															-                '__ac_signature': '_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb'
														
 
															-                                  'FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8',
														
 
															-                'ttcid': 'e56fabf6e85d4adf9e4d91902496a0e882',
														
 
															-                '_tea_utm_cache_1300': 'undefined',
														
 
															-                'support_avif': 'false',
														
 
															-                'support_webp': 'false',
														
 
															-                'xiguavideopcwebid': '7134967546256016900',
														
 
															-                'xiguavideopcwebid.sig': 'xxRww5R1VEMJN_dQepHorEu_eAc',
														
 
															-            }
														
 
															-            urllib3.disable_warnings()
														
 
															-            s = requests.session()
														
 
															-            # max_retries=3 重试3次
														
 
															-            s.mount('http://', HTTPAdapter(max_retries=3))
														
 
															-            s.mount('https://', HTTPAdapter(max_retries=3))
														
 
															-            response = s.get(url=url, headers=headers, params=params, cookies=cookies, verify=False,
														
 
															-                             proxies=Common.tunnel_proxies(), timeout=5)
														
 
															-            response.close()
														
 
															-            if 'data' not in response.json() or response.json()['data'] == '':
														
 
															-                Common.logger(log_type, crawler).warning('get_video_info: response: {}', response)
														
 
															-            else:
														
 
															-                video_info = response.json()['data']['gidInformation']['packerData']['video']
														
 
															-                video_url_dict = {}
														
 
															-                # video_url
														
 
															-                if 'videoResource' not in video_info:
														
 
															-                    video_url_dict["video_url"] = ''
														
 
															-                    video_url_dict["audio_url"] = ''
														
 
															-                    video_url_dict["video_width"] = 0
														
 
															-                    video_url_dict["video_height"] = 0
														
 
															-
														
 
															-                elif 'dash_120fps' in video_info['videoResource']:
														
 
															-                    if "video_list" in video_info['videoResource']['dash_120fps'] and 'video_4' in \
														
 
															-                            video_info['videoResource']['dash_120fps']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_4'][
														
 
															-                            'backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_4'][
														
 
															-                            'backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_4'][
														
 
															-                            'vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_3' in \
														
 
															-                            video_info['videoResource']['dash_120fps']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_3'][
														
 
															-                            'backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_3'][
														
 
															-                            'backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_3'][
														
 
															-                            'vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_2' in \
														
 
															-                            video_info['videoResource']['dash_120fps']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_2'][
														
 
															-                            'backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_2'][
														
 
															-                            'backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_2'][
														
 
															-                            'vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_1' in \
														
 
															-                            video_info['videoResource']['dash_120fps']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_1'][
														
 
															-                            'backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_1'][
														
 
															-                            'backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_1'][
														
 
															-                            'vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-
														
 
															-                    elif 'dynamic_video' in video_info['videoResource']['dash_120fps'] \
														
 
															-                            and 'dynamic_video_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
														
 
															-                            and 'dynamic_audio_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
														
 
															-                            and len(
														
 
															-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list']) != 0 \
														
 
															-                            and len(
														
 
															-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list']) != 0:
														
 
															-
														
 
															-                        video_url = \
														
 
															-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'backup_url_1']
														
 
															-                        audio_url = \
														
 
															-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list'][-1][
														
 
															-                            'backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = \
														
 
															-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'vwidth']
														
 
															-                        video_height = \
														
 
															-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    else:
														
 
															-                        video_url_dict["video_url"] = ''
														
 
															-                        video_url_dict["audio_url"] = ''
														
 
															-                        video_url_dict["video_width"] = 0
														
 
															-                        video_url_dict["video_height"] = 0
														
 
															-
														
 
															-                elif 'dash' in video_info['videoResource']:
														
 
															-                    if "video_list" in video_info['videoResource']['dash'] and 'video_4' in \
														
 
															-                            video_info['videoResource']['dash']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash']['video_list']['video_4']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash']['video_list']['video_4']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['dash'] and 'video_3' in \
														
 
															-                            video_info['videoResource']['dash']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash']['video_list']['video_3']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash']['video_list']['video_3']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['dash'] and 'video_2' in \
														
 
															-                            video_info['videoResource']['dash']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash']['video_list']['video_2']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash']['video_list']['video_2']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['dash'] and 'video_1' in \
														
 
															-                            video_info['videoResource']['dash']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash']['video_list']['video_1']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash']['video_list']['video_1']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-
														
 
															-                    elif 'dynamic_video' in video_info['videoResource']['dash'] \
														
 
															-                            and 'dynamic_video_list' in video_info['videoResource']['dash']['dynamic_video'] \
														
 
															-                            and 'dynamic_audio_list' in video_info['videoResource']['dash']['dynamic_video'] \
														
 
															-                            and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list']) != 0 \
														
 
															-                            and len(
														
 
															-                        video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list']) != 0:
														
 
															-
														
 
															-                        video_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list'][-1][
														
 
															-                            'backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = \
														
 
															-                        video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1]['vwidth']
														
 
															-                        video_height = \
														
 
															-                        video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1]['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    else:
														
 
															-                        video_url_dict["video_url"] = ''
														
 
															-                        video_url_dict["audio_url"] = ''
														
 
															-                        video_url_dict["video_width"] = 0
														
 
															-                        video_url_dict["video_height"] = 0
														
 
															-
														
 
															-                elif 'normal' in video_info['videoResource']:
														
 
															-                    if "video_list" in video_info['videoResource']['normal'] and 'video_4' in \
														
 
															-                            video_info['videoResource']['normal']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['normal']['video_list']['video_4']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['normal']['video_list']['video_4']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['normal'] and 'video_3' in \
														
 
															-                            video_info['videoResource']['normal']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['normal']['video_list']['video_3']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['normal']['video_list']['video_3']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['normal'] and 'video_2' in \
														
 
															-                            video_info['videoResource']['normal']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['normal']['video_list']['video_2']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['normal']['video_list']['video_2']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['normal'] and 'video_1' in \
														
 
															-                            video_info['videoResource']['normal']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['normal']['video_list']['video_1']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['normal']['video_list']['video_1']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-
														
 
															-                    elif 'dynamic_video' in video_info['videoResource']['normal'] \
														
 
															-                            and 'dynamic_video_list' in video_info['videoResource']['normal']['dynamic_video'] \
														
 
															-                            and 'dynamic_audio_list' in video_info['videoResource']['normal']['dynamic_video'] \
														
 
															-                            and len(
														
 
															-                        video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list']) != 0 \
														
 
															-                            and len(
														
 
															-                        video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list']) != 0:
														
 
															-
														
 
															-                        video_url = \
														
 
															-                        video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'backup_url_1']
														
 
															-                        audio_url = \
														
 
															-                        video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list'][-1][
														
 
															-                            'backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = \
														
 
															-                        video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'vwidth']
														
 
															-                        video_height = \
														
 
															-                        video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    else:
														
 
															-                        video_url_dict["video_url"] = ''
														
 
															-                        video_url_dict["audio_url"] = ''
														
 
															-                        video_url_dict["video_width"] = 0
														
 
															-                        video_url_dict["video_height"] = 0
														
 
															-
														
 
															-                else:
														
 
															-                    video_url_dict["video_url"] = ''
														
 
															-                    video_url_dict["audio_url"] = ''
														
 
															-                    video_url_dict["video_width"] = 0
														
 
															-                    video_url_dict["video_height"] = 0
														
 
															-
														
 
															-                return video_url_dict
														
 
															-
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f'get_video_url:{e}\n')
														
 
															-
														
 
															-    @classmethod
														
 
															-    def repeat_video(cls, log_type, crawler, video_id, env):
														
 
															-        sql = f""" select * from crawler_video where platform="西瓜视频" and out_video_id="{video_id}"; """
														
 
															-        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
														
 
															-        return len(repeat_video)
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_videoList(cls, log_type, crawler, oss_endpoint, env):
														
 
															-        queryCount = 1
														
 
															-        while True:
														
 
															-            signature = cls.get_signature(log_type, crawler, env)
														
 
															-            if signature is None:
														
 
															-                Common.logger(log_type, crawler).warning(f"signature:{signature}")
														
 
															-                continue
														
 
															-            url = "https://www.ixigua.com/api/feedv2/feedById?"
														
 
															-            params = {
														
 
															-                "channelId": "94349543909",
														
 
															-                "count": "9",
														
 
															-                "maxTime": str(int(time.time())),
														
 
															-                # "maxTime": "1683190690",
														
 
															-                "queryCount": str(queryCount),
														
 
															-                "_signature": signature,
														
 
															-                "request_from": "701",
														
 
															-                "offset": "0",
														
 
															-                "referrer:": "https://open.weixin.qq.com/",
														
 
															-                "aid": "1768",
														
 
															-                "msToken": "XDpSA6_ZPP-gAkkBV-_WRQvNpG20uUUGPwf3E-S-txhznjBcXNbK2sbOuSpF3U7Jki6R9HwLDPeW4Gj7n6PURPTKrKLEs8J-ieFrwXDvMp2DX94ZoMua",
														
 
															-                # "X-Bogus": "DFSzswVOx7bANt0TtCAcOFm4pIkR",
														
 
															-            }
														
 
															-            headers = {
														
 
															-                'referer': 'https://www.ixigua.com/',
														
 
															-                'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
														
 
															-                'authority': 'www.ixigua.com',
														
 
															-                'accept': 'application/json, text/plain, */*',
														
 
															-                'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
														
 
															-                'cache-control': 'no-cache',
														
 
															-                # 'cookie': 'ttcid=5d8f917a525e46759dc886296bf1111b69; MONITOR_WEB_ID=ad1c8360-d4c9-4fa2-a801-d9fd68dfc1b2; s_v_web_id=verify_lh8vaa6v_VI4RQ0ET_nVbq_4PXw_8mfN_7Xp6wdLOZi08; passport_csrf_token=0e7c6992cb6170c9db034c3696191fff; passport_csrf_token_default=0e7c6992cb6170c9db034c3696191fff; odin_tt=b102690fef38bf07c400e3c69cdc27627701802bdd816fa827e3721c33607c4d2c0cbef09fe99c7d370e4a9e9e11c263; sid_guard=8dec4ecbe52cbdcff99dafe622b586b4%7C1683189144%7C3024002%7CThu%2C+08-Jun-2023+08%3A32%3A26+GMT; uid_tt=1dccbeaf685e24afd018fec335f3151d; uid_tt_ss=1dccbeaf685e24afd018fec335f3151d; sid_tt=8dec4ecbe52cbdcff99dafe622b586b4; sessionid=8dec4ecbe52cbdcff99dafe622b586b4; sessionid_ss=8dec4ecbe52cbdcff99dafe622b586b4; sid_ucp_v1=1.0.0-KGVhZTIxYjFlNzRlZTNhZjk5MjNlNzk2NGRhOWJlYzZiNGI5NzBhMzYKFQiu3d-eqQIQmNvNogYYGCAMOAhACxoCaGwiIDhkZWM0ZWNiZTUyY2JkY2ZmOTlkYWZlNjIyYjU4NmI0; ssid_ucp_v1=1.0.0-KGVhZTIxYjFlNzRlZTNhZjk5MjNlNzk2NGRhOWJlYzZiNGI5NzBhMzYKFQiu3d-eqQIQmNvNogYYGCAMOAhACxoCaGwiIDhkZWM0ZWNiZTUyY2JkY2ZmOTlkYWZlNjIyYjU4NmI0; support_webp=true; support_avif=true; csrf_session_id=9dd5d8287d4f075ae24ff163cd22e51f; msToken=XDpSA6_ZPP-gAkkBV-_WRQvNpG20uUUGPwf3E-S-txhznjBcXNbK2sbOuSpF3U7Jki6R9HwLDPeW4Gj7n6PURPTKrKLEs8J-ieFrwXDvMp2DX94ZoMua; ixigua-a-s=1; tt_scid=UTduWO4ij7cX6YKx23sDuV4zjvFkGFtFk5ZBhEnd1lJ1EZBykStzU7tbWQOSzGdE0fc6; ttwid=1%7C4zaTJmlaHpEa8rAB-KjREdxT3sNBUJWrAzRJnNvqExQ%7C1683198318%7Cffc2eef612caab19a0db93b4cec27e21a6230f9b82ab4bf5b1c6193d082baab1',
														
 
															-                'pragma': 'no-cache',
														
 
															-                'sec-ch-ua': '"Chromium";v="112", "Google Chrome";v="112", "Not:A-Brand";v="99"',
														
 
															-                'sec-ch-ua-mobile': '?0',
														
 
															-                'sec-ch-ua-platform': '"macOS"',
														
 
															-                'sec-fetch-dest': 'empty',
														
 
															-                'sec-fetch-mode': 'cors',
														
 
															-                'sec-fetch-site': 'same-origin',
														
 
															-                # 'tt-anti-token': '95Ny0vj4Q-90dd9b91193b34ce554cc2861439b9629d897723f4d33719b9747d7d18a2ff7c',
														
 
															-                # 'x-secsdk-csrf-token': '000100000001ecb8f07e247a89e289b3ab55f3c967a8e88f88aa0addb1ddca9d3e36f35d7999175be79b8699c881'
														
 
															-            }
														
 
															-            urllib3.disable_warnings()
														
 
															-            s = requests.session()
														
 
															-            # max_retries=3 重试3次
														
 
															-            s.mount('http://', HTTPAdapter(max_retries=3))
														
 
															-            s.mount('https://', HTTPAdapter(max_retries=3))
														
 
															-            response = requests.get(url=url, headers=headers, params=params, proxies=Common.tunnel_proxies(), verify=False, timeout=5)
														
 
															-            response.close()
														
 
															-            queryCount += 1
														
 
															-            Common.logger(log_type, crawler).info(f"queryCount:{queryCount}")
														
 
															-            if response.status_code != 200:
														
 
															-                Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
														
 
															-                return
														
 
															-            elif 'data' not in response.text:
														
 
															-                Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
														
 
															-                return
														
 
															-            elif 'channelFeed' not in response.json()['data']:
														
 
															-                Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.json()}\n")
														
 
															-                return
														
 
															-            elif 'Data' not in response.json()['data']['channelFeed']:
														
 
															-                Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.json()}\n")
														
 
															-                return
														
 
															-            elif len(response.json()['data']['channelFeed']['Data']) == 0:
														
 
															-                Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.json()}\n")
														
 
															-                return
														
 
															-            else:
														
 
															-                videoList = response.json()['data']['channelFeed']['Data']
														
 
															-                for i in range(len(videoList)):
														
 
															-                    if 'data' not in videoList[i]:
														
 
															-                        continue
														
 
															-                    # video_title
														
 
															-                    video_title = videoList[i]['data'].get('title', '').replace('"' ,'').replace("'", '')
														
 
															-                    if video_title == '':
														
 
															-                        video_title = random.choice(cls.xigua_config(log_type, crawler, "title", env))
														
 
															-                    # video_id
														
 
															-                    video_id = videoList[i]['data'].get('vid', '')
														
 
															-                    # play_cnt
														
 
															-                    play_cnt = int(videoList[i]['data'].get('playNum', 0))
														
 
															-                    # comment_cnt
														
 
															-                    comment_cnt = int(videoList[i]['data'].get('commentNum', 0))
														
 
															-                    # gid
														
 
															-                    gid = videoList[i]['data'].get('item_id', 0)
														
 
															-                    # share_cnt / like_cnt
														
 
															-                    share_cnt = 0
														
 
															-                    like_cnt = 0
														
 
															-                    # duration
														
 
															-                    duration = int(videoList[i]['data'].get('duration', 0))
														
 
															-                    # publish_time_stamp
														
 
															-                    publish_time_stamp = int(videoList[i]['data'].get('publish_time', 0))
														
 
															-                    # publish_time_str
														
 
															-                    publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
														
 
															-                    # cover_url
														
 
															-                    cover_url = videoList[i]['data'].get('image_url', '')
														
 
															-                    # user_name
														
 
															-                    user_name = videoList[i]['data']['user_info'].get('name', '')
														
 
															-                    # user_id
														
 
															-                    user_id = videoList[i]['data']['user_info'].get('user_id', '')
														
 
															-                    # avatar_url
														
 
															-                    avatar_url = videoList[i]['data']['user_info'].get('avatar_url', '')
														
 
															-
														
 
															-                    video_dict = {
														
 
															-                        'video_title': video_title,
														
 
															-                        'video_id': video_id,
														
 
															-                        'gid': gid,
														
 
															-                        'play_cnt': play_cnt,
														
 
															-                        'comment_cnt': comment_cnt,
														
 
															-                        'like_cnt': like_cnt,
														
 
															-                        'share_cnt': share_cnt,
														
 
															-                        'duration': duration,
														
 
															-                        'publish_time_stamp': publish_time_stamp,
														
 
															-                        'publish_time_str': publish_time_str,
														
 
															-                        'user_name': user_name,
														
 
															-                        'user_id': user_id,
														
 
															-                        'avatar_url': avatar_url,
														
 
															-                        'cover_url': cover_url,
														
 
															-                        'session': signature
														
 
															-                    }
														
 
															-                    for k, v in video_dict.items():
														
 
															-                        Common.logger(log_type, crawler).info(f"{k}:{v}")
														
 
															-
														
 
															-                    if gid == 0 or video_id == '' or cover_url == '':
														
 
															-                        Common.logger(log_type, crawler).info('无效视频\n')
														
 
															-                    elif cls.download_rule(video_dict) is False:
														
 
															-                        Common.logger(log_type, crawler).info('不满足抓取规则\n')
														
 
															-                    elif any(str(word) if str(word) in video_title else False for word in cls.xigua_config(log_type, crawler, "filter", env)) is True:
														
 
															-                        Common.logger(log_type, crawler).info('已中过滤词\n')
														
 
															-                    elif cls.repeat_video(log_type, crawler, video_id, env) != 0:
														
 
															-                        Common.logger(log_type, crawler).info('视频已下载\n')
														
 
															-                    else:
														
 
															-                        video_url_dict = cls.get_video_url(log_type, crawler, gid)
														
 
															-                        video_dict['video_url'] = video_url_dict["video_url"]
														
 
															-                        video_dict["audio_url"] = video_url_dict["audio_url"]
														
 
															-                        video_dict["video_width"] = video_url_dict["video_width"]
														
 
															-                        video_dict["video_height"] = video_url_dict["video_height"]
														
 
															-
														
 
															-                        cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
														
 
															-
														
 
															-    @classmethod
														
 
															-    def download_publish(cls, log_type, crawler, video_dict, oss_endpoint, env):
														
 
															-        # 下载视频
														
 
															-        Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video', title=video_dict['video_title'],
														
 
															-                               url=video_dict['video_url'])
														
 
															-        # 下载音频
														
 
															-        Common.download_method(log_type=log_type, crawler=crawler, text='xigua_audio', title=video_dict['video_title'],
														
 
															-                               url=video_dict['audio_url'])
														
 
															-        # 合成音视频
														
 
															-        Common.video_compose(log_type=log_type, crawler=crawler,
														
 
															-                             video_dir=f"./{crawler}/videos/{video_dict['video_title']}")
														
 
															-        md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
														
 
															-        if os.path.getsize(f"./{crawler}/videos/{md_title}/video.mp4") == 0:
														
 
															-            # 删除视频文件夹
														
 
															-            shutil.rmtree(f"./{crawler}/videos/{md_title}")
														
 
															-            Common.logger(log_type, crawler).info("视频size=0，删除成功\n")
														
 
															-            return
														
 
															-        # 下载封面
														
 
															-        Common.download_method(log_type=log_type, crawler=crawler, text='cover', title=video_dict['video_title'],
														
 
															-                               url=video_dict['cover_url'])
														
 
															-        # 保存视频信息至txt
														
 
															-        Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
														
 
															-
														
 
															-        # 上传视频
														
 
															-        Common.logger(log_type, crawler).info("开始上传视频...")
														
 
															-        our_video_id = Publish.upload_and_publish(log_type=log_type,
														
 
															-                                                  crawler=crawler,
														
 
															-                                                  strategy="推荐榜爬虫策略",
														
 
															-                                                  our_uid="recommend",
														
 
															-                                                  env=env,
														
 
															-                                                  oss_endpoint=oss_endpoint)
														
 
															-        if env == 'dev':
														
 
															-            our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
														
 
															-        else:
														
 
															-            our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
														
 
															-        Common.logger(log_type, crawler).info("视频上传完成")
														
 
															-
														
 
															-        if our_video_id is None:
														
 
															-            # 删除视频文件夹
														
 
															-            shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
														
 
															-            return
														
 
															-
														
 
															-        # 视频写入飞书
														
 
															-        Feishu.insert_columns(log_type, 'xigua', "1iKGF1", "ROWS", 1, 2)
														
 
															-        upload_time = int(time.time())
														
 
															-        values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
														
 
															-                   "推荐榜爬虫策略",
														
 
															-                   video_dict['video_title'],
														
 
															-                   str(video_dict['video_id']),
														
 
															-                   our_video_link,
														
 
															-                   video_dict['gid'],
														
 
															-                   video_dict['play_cnt'],
														
 
															-                   video_dict['comment_cnt'],
														
 
															-                   video_dict['like_cnt'],
														
 
															-                   video_dict['share_cnt'],
														
 
															-                   video_dict['duration'],
														
 
															-                   str(video_dict['video_width']) + '*' + str(video_dict['video_height']),
														
 
															-                   video_dict['publish_time_str'],
														
 
															-                   video_dict['user_name'],
														
 
															-                   video_dict['user_id'],
														
 
															-                   video_dict['avatar_url'],
														
 
															-                   video_dict['cover_url'],
														
 
															-                   video_dict['audio_url'],
														
 
															-                   video_dict['video_url']]]
														
 
															-        time.sleep(1)
														
 
															-        Feishu.update_values(log_type, 'xigua', "1iKGF1", "F2:Z2", values)
														
 
															-        Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
														
 
															-
														
 
															-        rule_dict = {
														
 
															-            "play_cnt": {"min": 10000},
														
 
															-            "duration": {"min": 60, "max": 60*30},
														
 
															-            "publish_day": {"min": 30}
														
 
															-        }
														
 
															-
														
 
															-        # 视频信息保存数据库
														
 
															-        insert_sql = f""" insert into crawler_video(video_id,
														
 
															-                                        user_id,
														
 
															-                                        out_user_id,
														
 
															-                                        platform,
														
 
															-                                        strategy,
														
 
															-                                        out_video_id,
														
 
															-                                        video_title,
														
 
															-                                        cover_url,
														
 
															-                                        video_url,
														
 
															-                                        duration,
														
 
															-                                        publish_time,
														
 
															-                                        play_cnt,
														
 
															-                                        crawler_rule,
														
 
															-                                        width,
														
 
															-                                        height)
														
 
															-                                        values({our_video_id},
														
 
															-                                        {int(50322238)},
														
 
															-                                        "{video_dict['user_id']}",
														
 
															-                                        "{cls.platform}",
														
 
															-                                        "推荐榜爬虫策略",
														
 
															-                                        "{video_dict['video_id']}",
														
 
															-                                        "{video_dict['video_title']}",
														
 
															-                                        "{video_dict['cover_url']}",
														
 
															-                                        "{video_dict['video_url']}",
														
 
															-                                        {int(video_dict['duration'])},
														
 
															-                                        "{video_dict['publish_time_str']}",
														
 
															-                                        {int(video_dict['play_cnt'])},
														
 
															-                                        '{json.dumps(rule_dict)}',
														
 
															-                                        {int(video_dict['video_width'])},
														
 
															-                                        {int(video_dict['video_height'])}) """
														
 
															-        Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
														
 
															-        MysqlHelper.update_values(log_type, crawler, insert_sql, env, action='')
														
 
															-        Common.logger(log_type, crawler).info('视频信息插入数据库成功！\n')
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    # XiguaRecommend.get_signature("recommend", "xigua", "dev")
														
 
															-    # XiguaRecommend.get_videolist("recommend", "xigua", "dev")
														
 
															-    # print(XiguaRecommend.get_video_url("recommend", "xigua", "7218171653242094139"))
														
 
															-    # print(XiguaRecommend.filter_words("recommend", "xigua"))
														
 
															-    print(XiguaRecommend.xigua_config("recommend", "xigua", "title", "dev"))
														
 
															-    pass
														
--- a/xigua/xigua_search/xigua_search.py
+++ b/xigua/xigua_search/xigua_search.py
@@ -1,959 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/2/17
														
 
															-import base64
														
 
															-import json
														
 
															-import os
														
 
															-import random
														
 
															-import shutil
														
 
															-import string
														
 
															-import sys
														
 
															-import time
														
 
															-from hashlib import md5
														
 
															-
														
 
															-import requests
														
 
															-import urllib3
														
 
															-from urllib.parse import quote
														
 
															-from requests.adapters import HTTPAdapter
														
 
															-
														
 
															-
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.db import MysqlHelper
														
 
															-from common.getuser import getUser
														
 
															-from common.common import Common
														
 
															-from common.feishu import Feishu
														
 
															-from common.publish import Publish
														
 
															-from common.public import get_config_from_mysql
														
 
															-from common.userAgent import get_random_user_agent, get_random_header
														
 
															-
														
 
															-
														
 
															-class XiguaSearch:
														
 
															-    platform = "西瓜视频"
														
 
															-    tag = "西瓜视频爬虫,搜索爬虫策略"
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_rule(cls, log_type, crawler):
														
 
															-        try:
														
 
															-            while True:
														
 
															-                rule_sheet = Feishu.get_values_batch(log_type, crawler, "shxOl7")
														
 
															-                if rule_sheet is None:
														
 
															-                    Common.logger(log_type, crawler).warning("rule_sheet is None! 10秒后重新获取")
														
 
															-                    time.sleep(10)
														
 
															-                    continue
														
 
															-                rule_dict = {
														
 
															-                    "play_cnt": int(rule_sheet[1][2]),
														
 
															-                    "min_duration": int(rule_sheet[2][2]),
														
 
															-                    "max_duration": int(rule_sheet[3][2]),
														
 
															-                    "publish_time": int(rule_sheet[4][2]),
														
 
															-                }
														
 
															-                return rule_dict
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f"get_rule:{e}\n")
														
 
															-
														
 
															-    # 下载规则
														
 
															-    @classmethod
														
 
															-    def download_rule(cls, video_info_dict, rule_dict):
														
 
															-        if video_info_dict['play_cnt'] >= rule_dict['play_cnt']:
														
 
															-            if video_info_dict['comment_cnt'] >= rule_dict['comment_cnt']:
														
 
															-                if video_info_dict['like_cnt'] >= rule_dict['like_cnt']:
														
 
															-                    if video_info_dict['duration'] >= rule_dict['duration']:
														
 
															-                        if video_info_dict['video_width'] >= rule_dict['video_width'] \
														
 
															-                                or video_info_dict['video_height'] >= rule_dict['video_height']:
														
 
															-                            return True
														
 
															-                        else:
														
 
															-                            return False
														
 
															-                    else:
														
 
															-                        return False
														
 
															-                else:
														
 
															-                    return False
														
 
															-            else:
														
 
															-                return False
														
 
															-        else:
														
 
															-            return False
														
 
															-
														
 
															-    # 过滤词库
														
 
															-    @classmethod
														
 
															-    def filter_words(cls, log_type, crawler):
														
 
															-        try:
														
 
															-            while True:
														
 
															-                filter_words_sheet = Feishu.get_values_batch(log_type, crawler, 'KGB4Hc')
														
 
															-                if filter_words_sheet is None:
														
 
															-                    Common.logger(log_type, crawler).warning(f"filter_words_sheet:{filter_words_sheet} 10秒钟后重试")
														
 
															-                    continue
														
 
															-                filter_words_list = []
														
 
															-                for x in filter_words_sheet:
														
 
															-                    for y in x:
														
 
															-                        if y is None:
														
 
															-                            pass
														
 
															-                        else:
														
 
															-                            filter_words_list.append(y)
														
 
															-                return filter_words_list
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
														
 
															-
														
 
															-    # 获取用户信息（字典格式）. 注意：部分 user_id 字符类型是 int / str
														
 
															-    @classmethod
														
 
															-    def get_user_list(cls, log_type, crawler, sheetid, env, machine):
														
 
															-        try:
														
 
															-            while True:
														
 
															-                user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
														
 
															-                if user_sheet is None:
														
 
															-                    Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet} 10秒钟后重试")
														
 
															-                    continue
														
 
															-                our_user_list = []
														
 
															-                for i in range(1, len(user_sheet)):
														
 
															-                    our_uid = user_sheet[i][6]
														
 
															-                    search_word = user_sheet[i][4]
														
 
															-                    tag1 = user_sheet[i][8]
														
 
															-                    tag2 = user_sheet[i][9]
														
 
															-                    tag3 = user_sheet[i][10]
														
 
															-                    tag4 = user_sheet[i][11]
														
 
															-                    tag5 = user_sheet[i][12]
														
 
															-                    tag6 = user_sheet[i][13]
														
 
															-                    tag7 = user_sheet[i][14]
														
 
															-                    Common.logger(log_type, crawler).info(f"正在更新 {search_word} 关键词信息\n")
														
 
															-                    if our_uid is None:
														
 
															-                        default_user = getUser.get_default_user()
														
 
															-                        # 用来创建our_id的信息
														
 
															-                        user_dict = {
														
 
															-                            'recommendStatus': -6,
														
 
															-                            'appRecommendStatus': -6,
														
 
															-                            'nickName': default_user['nickName'],
														
 
															-                            'avatarUrl': default_user['avatarUrl'],
														
 
															-                            'tagName': f'{tag1},{tag2},{tag3},{tag4},{tag5},{tag6},{tag7}',
														
 
															-                        }
														
 
															-                        Common.logger(log_type, crawler).info(f'新创建的站内UID:{our_uid}')
														
 
															-                        our_uid = getUser.create_uid(log_type, crawler, user_dict, env)
														
 
															-                        if env == 'prod':
														
 
															-                            our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
														
 
															-                        else:
														
 
															-                            our_user_link = f'https://testadmin.piaoquantv.com/ums/user/{our_uid}/post'
														
 
															-                        Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}',
														
 
															-                                             [[our_uid, our_user_link]])
														
 
															-                        Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功！\n')
														
 
															-                    our_user_dict = {
														
 
															-                        'out_uid': '',
														
 
															-                        'search_word': search_word,
														
 
															-                        'our_uid': our_uid,
														
 
															-                        'our_user_link': f'https://admin.piaoquantv.com/ums/user/{our_uid}/post',
														
 
															-                    }
														
 
															-                    our_user_list.append(our_user_dict)
														
 
															-
														
 
															-                return our_user_list
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f'get_user_id_from_feishu异常:{e}\n')
														
 
															-
														
 
															-    @classmethod
														
 
															-    def random_signature(cls):
														
 
															-        src_digits = string.digits  # string_数字
														
 
															-        src_uppercase = string.ascii_uppercase  # string_大写字母
														
 
															-        src_lowercase = string.ascii_lowercase  # string_小写字母
														
 
															-        digits_num = random.randint(1, 6)
														
 
															-        uppercase_num = random.randint(1, 26 - digits_num - 1)
														
 
															-        lowercase_num = 26 - (digits_num + uppercase_num)
														
 
															-        password = random.sample(src_digits, digits_num) + random.sample(src_uppercase, uppercase_num) + random.sample(
														
 
															-            src_lowercase, lowercase_num)
														
 
															-        random.shuffle(password)
														
 
															-        new_password = 'AAAAAAAAAA' + ''.join(password)[10:-4] + 'AAAB'
														
 
															-        new_password_start = new_password[0:18]
														
 
															-        new_password_end = new_password[-7:]
														
 
															-        if new_password[18] == '8':
														
 
															-            new_password = new_password_start + 'w' + new_password_end
														
 
															-        elif new_password[18] == '9':
														
 
															-            new_password = new_password_start + 'x' + new_password_end
														
 
															-        elif new_password[18] == '-':
														
 
															-            new_password = new_password_start + 'y' + new_password_end
														
 
															-        elif new_password[18] == '.':
														
 
															-            new_password = new_password_start + 'z' + new_password_end
														
 
															-        else:
														
 
															-            new_password = new_password_start + 'y' + new_password_end
														
 
															-        return new_password
														
 
															-
														
 
															-    # 获取视频详情
														
 
															-    @classmethod
														
 
															-    def get_video_url(cls, log_type, crawler, gid):
														
 
															-        try:
														
 
															-            url = 'https://www.ixigua.com/api/mixVideo/information?'
														
 
															-            headers = {
														
 
															-                "accept-encoding": "gzip, deflate",
														
 
															-                "accept-language": "zh-CN,zh-Hans;q=0.9",
														
 
															-                "user-agent": get_random_user_agent('pc'),
														
 
															-                "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
														
 
															-            }
														
 
															-            params = {
														
 
															-                'mixId': gid,
														
 
															-                'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC'
														
 
															-                           'NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
														
 
															-                'X-Bogus': 'DFSzswVupYTANCJOSBk0P53WxM-r',
														
 
															-                '_signature': '_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px'
														
 
															-                              'fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94',
														
 
															-            }
														
 
															-            cookies = {
														
 
															-                'ixigua-a-s': '1',
														
 
															-                'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB'
														
 
															-                           'NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
														
 
															-                'ttwid': '1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7'
														
 
															-                         '6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8',
														
 
															-                'tt_scid': 'QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3',
														
 
															-                'MONITOR_WEB_ID': '0a49204a-7af5-4e96-95f0-f4bafb7450ad',
														
 
															-                '__ac_nonce': '06304878000964fdad287',
														
 
															-                '__ac_signature': '_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb'
														
 
															-                                  'FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8',
														
 
															-                'ttcid': 'e56fabf6e85d4adf9e4d91902496a0e882',
														
 
															-                '_tea_utm_cache_1300': 'undefined',
														
 
															-                'support_avif': 'false',
														
 
															-                'support_webp': 'false',
														
 
															-                'xiguavideopcwebid': '7134967546256016900',
														
 
															-                'xiguavideopcwebid.sig': 'xxRww5R1VEMJN_dQepHorEu_eAc',
														
 
															-            }
														
 
															-            urllib3.disable_warnings()
														
 
															-            s = requests.session()
														
 
															-            # max_retries=3 重试3次
														
 
															-            s.mount('http://', HTTPAdapter(max_retries=3))
														
 
															-            s.mount('https://', HTTPAdapter(max_retries=3))
														
 
															-            response = s.get(url=url, headers=headers, params=params, cookies=cookies, verify=False,
														
 
															-                             proxies=Common.tunnel_proxies(), timeout=5)
														
 
															-            # response = s.get(url=url, headers=headers, params=params, cookies=cookies, verify=False)
														
 
															-            response.close()
														
 
															-            if 'data' not in response.json() or response.json()['data'] == '':
														
 
															-                Common.logger(log_type, crawler).warning('get_video_info: response: {}', response)
														
 
															-            else:
														
 
															-                video_info = response.json()['data']['gidInformation']['packerData']['video']
														
 
															-                video_url_dict = {}
														
 
															-                # video_url
														
 
															-                if 'videoResource' not in video_info:
														
 
															-                    video_url_dict["video_url"] = ''
														
 
															-                    video_url_dict["audio_url"] = ''
														
 
															-                    video_url_dict["video_width"] = 0
														
 
															-                    video_url_dict["video_height"] = 0
														
 
															-
														
 
															-                elif 'dash_120fps' in video_info['videoResource']:
														
 
															-                    if "video_list" in video_info['videoResource']['dash_120fps'] and 'video_4' in \
														
 
															-                            video_info['videoResource']['dash_120fps']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_4']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_4']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_3' in \
														
 
															-                            video_info['videoResource']['dash_120fps']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_3']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_3']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_2' in \
														
 
															-                            video_info['videoResource']['dash_120fps']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_2']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_2']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_1' in \
														
 
															-                            video_info['videoResource']['dash_120fps']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_1']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_1']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-
														
 
															-                    elif 'dynamic_video' in video_info['videoResource']['dash_120fps'] \
														
 
															-                            and 'dynamic_video_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
														
 
															-                            and 'dynamic_audio_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
														
 
															-                            and len(
														
 
															-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list']) != 0 \
														
 
															-                            and len(
														
 
															-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list']) != 0:
														
 
															-
														
 
															-                        video_url = \
														
 
															-                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                                'backup_url_1']
														
 
															-                        audio_url = \
														
 
															-                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list'][-1][
														
 
															-                                'backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = \
														
 
															-                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                                'vwidth']
														
 
															-                        video_height = \
														
 
															-                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                                'vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    else:
														
 
															-                        video_url_dict["video_url"] = ''
														
 
															-                        video_url_dict["audio_url"] = ''
														
 
															-                        video_url_dict["video_width"] = 0
														
 
															-                        video_url_dict["video_height"] = 0
														
 
															-
														
 
															-                elif 'dash' in video_info['videoResource']:
														
 
															-                    if "video_list" in video_info['videoResource']['dash'] and 'video_4' in \
														
 
															-                            video_info['videoResource']['dash']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash']['video_list']['video_4']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash']['video_list']['video_4']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['dash'] and 'video_3' in \
														
 
															-                            video_info['videoResource']['dash']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash']['video_list']['video_3']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash']['video_list']['video_3']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['dash'] and 'video_2' in \
														
 
															-                            video_info['videoResource']['dash']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash']['video_list']['video_2']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash']['video_list']['video_2']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['dash'] and 'video_1' in \
														
 
															-                            video_info['videoResource']['dash']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash']['video_list']['video_1']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash']['video_list']['video_1']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-
														
 
															-                    elif 'dynamic_video' in video_info['videoResource']['dash'] \
														
 
															-                            and 'dynamic_video_list' in video_info['videoResource']['dash']['dynamic_video'] \
														
 
															-                            and 'dynamic_audio_list' in video_info['videoResource']['dash']['dynamic_video'] \
														
 
															-                            and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list']) != 0 \
														
 
															-                            and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list']) != 0:
														
 
															-
														
 
															-                        video_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list'][-1][
														
 
															-                            'backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'vwidth']
														
 
															-                        video_height = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    else:
														
 
															-                        video_url_dict["video_url"] = ''
														
 
															-                        video_url_dict["audio_url"] = ''
														
 
															-                        video_url_dict["video_width"] = 0
														
 
															-                        video_url_dict["video_height"] = 0
														
 
															-
														
 
															-                elif 'normal' in video_info['videoResource']:
														
 
															-                    if "video_list" in video_info['videoResource']['normal'] and 'video_4' in \
														
 
															-                            video_info['videoResource']['normal']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['normal']['video_list']['video_4']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['normal']['video_list']['video_4']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['normal'] and 'video_3' in \
														
 
															-                            video_info['videoResource']['normal']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['normal']['video_list']['video_3']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['normal']['video_list']['video_3']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['normal'] and 'video_2' in \
														
 
															-                            video_info['videoResource']['normal']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['normal']['video_list']['video_2']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['normal']['video_list']['video_2']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    elif "video_list" in video_info['videoResource']['normal'] and 'video_1' in \
														
 
															-                            video_info['videoResource']['normal']['video_list']:
														
 
															-                        video_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['normal']['video_list']['video_1']['vwidth']
														
 
															-                        video_height = video_info['videoResource']['normal']['video_list']['video_1']['vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-
														
 
															-                    elif 'dynamic_video' in video_info['videoResource']['normal'] \
														
 
															-                            and 'dynamic_video_list' in video_info['videoResource']['normal']['dynamic_video'] \
														
 
															-                            and 'dynamic_audio_list' in video_info['videoResource']['normal']['dynamic_video'] \
														
 
															-                            and len(video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list']) != 0 \
														
 
															-                            and len(video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list']) != 0:
														
 
															-
														
 
															-                        video_url = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'backup_url_1']
														
 
															-                        audio_url = video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list'][-1][
														
 
															-                            'backup_url_1']
														
 
															-                        if len(video_url) % 3 == 1:
														
 
															-                            video_url += '=='
														
 
															-                        elif len(video_url) % 3 == 2:
														
 
															-                            video_url += '='
														
 
															-                        elif len(audio_url) % 3 == 1:
														
 
															-                            audio_url += '=='
														
 
															-                        elif len(audio_url) % 3 == 2:
														
 
															-                            audio_url += '='
														
 
															-                        video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                        audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                        video_width = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'vwidth']
														
 
															-                        video_height = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                            'vheight']
														
 
															-                        video_url_dict["video_url"] = video_url
														
 
															-                        video_url_dict["audio_url"] = audio_url
														
 
															-                        video_url_dict["video_width"] = video_width
														
 
															-                        video_url_dict["video_height"] = video_height
														
 
															-                    else:
														
 
															-                        video_url_dict["video_url"] = ''
														
 
															-                        video_url_dict["audio_url"] = ''
														
 
															-                        video_url_dict["video_width"] = 0
														
 
															-                        video_url_dict["video_height"] = 0
														
 
															-
														
 
															-                else:
														
 
															-                    video_url_dict["video_url"] = ''
														
 
															-                    video_url_dict["audio_url"] = ''
														
 
															-                    video_url_dict["video_width"] = 0
														
 
															-                    video_url_dict["video_height"] = 0
														
 
															-
														
 
															-                return video_url_dict
														
 
															-
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f'get_video_url:{e}\n')
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_video_info(cls, log_type, crawler, item_id):
														
 
															-        d_url = "http://a6.pstatp.com/article/full/11/1/{video_id}/{video_id}/1/0/?iid=3636030325&device_id=5787057242" \
														
 
															-                "&ac=wifi&channel=wandoujia&aid=13&app_name=news_article&version_code=532&version_name=5.3.2&device_platform" \
														
 
															-                "=android&ab_client=a1%2Cc2%2Ce1%2Cf2%2Cg2%2Cb3%2Cf4&abflag=3&ssmix=a&device_type=SM705" \
														
 
															-                "&device_brand=smartisan&os_api=19&os_version=4.4.2&uuid=864593021012562&openudid=e23a5ff037ef2d1a" \
														
 
															-                "&manifest_version_code=532&resolution=1080*1920&dpi=480&update_version_code=5320".format(
														
 
															-            video_id=item_id)
														
 
															-        res = requests.get(url=d_url, headers=get_random_header('pc'), proxies=Common.tunnel_proxies())
														
 
															-        data = json.loads(res.text)['data']
														
 
															-        item_counter = data['h5_extra']['itemCell']['itemCounter']
														
 
															-        user_info = data['user_info']
														
 
															-        detail_info = data['video_detail_info']
														
 
															-        video_dict = {'video_title': data['title'].replace('"', '').replace("'", ''),
														
 
															-                      'video_id': detail_info['video_id'],
														
 
															-                      'gid': data['group_id'],
														
 
															-                      'play_cnt': item_counter['videoWatchCount'],
														
 
															-                      'comment_cnt': item_counter['commentCount'],
														
 
															-                      'like_cnt': item_counter['diggCount'],
														
 
															-                      'share_cnt': item_counter['shareCount'],
														
 
															-
														
 
															-                      'duration': data['video_duration'],
														
 
															-                      'publish_time_stamp': data['publish_time'],
														
 
															-                      'publish_time_str': time.strftime("%Y-%m-%d %H:%M:%S",
														
 
															-                                                        time.localtime(data['publish_time'])),
														
 
															-                      'user_name': user_info['name'],
														
 
															-                      'user_id': user_info['user_id'],
														
 
															-                      'avatar_url': user_info['avatar_url'],
														
 
															-                      'cover_url': data['large_image']['url'].replace('\u0026', '&'),
														
 
															-
														
 
															-                      }
														
 
															-        return video_dict
														
 
															-
														
 
															-    @classmethod
														
 
															-    def is_ruled(cls, log_type, crawler, video_dict, rule_dict):
														
 
															-        old_time = int(time.time()) - (3600 * 24 * rule_dict['publish_time'])
														
 
															-        if video_dict['publish_time_stamp'] <= old_time:
														
 
															-            return False
														
 
															-        elif video_dict['play_cnt'] <= rule_dict['play_cnt']:
														
 
															-            return False
														
 
															-        elif video_dict['duration'] < rule_dict['min_duration'] or video_dict['duration'] > rule_dict['max_duration']:
														
 
															-            return False
														
 
															-        else:
														
 
															-            return True
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_videolist(cls, log_type, crawler, strategy, our_uid, search_word, oss_endpoint, env, machine):
														
 
															-        total_count = 1
														
 
															-        offset = 0
														
 
															-        while True:
														
 
															-
														
 
															-            signature = cls.random_signature()
														
 
															-            # url = "https://www.ixigua.com/api/searchv2/complex/{}/{}?order_type=publish_time&click_position=new".format(
														
 
															-            #     quote(search_word), offset, signature)
														
 
															-            url = f'https://www.ixigua.com/api/searchv2/complex/{quote(search_word)}/{offset}?' \
														
 
															-                  f'search_id=202305111126371489381ECEC7FE277E3F&' \
														
 
															-                  f'aid=1768&' \
														
 
															-                  f'msToken=lPfIf3aps6EktQAeOl9yRgnL44MtMeGt2WnHjahIR0IysASB_zdhGiY0J9WWxNDpLd7aVdQx_36MpyPI5f2zRUHFYyNNsX5cl-or6GkiVuLLiRsU3ylxj9vt7Upubw==&' \
														
 
															-                  f'X-Bogus=DFSzswVY4h0ANGD7tC7G/Mm4pIkV&' \
														
 
															-                  f'_signature={signature}'
														
 
															-
														
 
															-            headers = {
														
 
															-                'referer': 'https://www.ixigua.com/search/{}/?logTag=594535e3690f17a88cdb&tab_name=search'.format(
														
 
															-                    quote(search_word)),
														
 
															-                'cookie': 'ttcid=5d8f917a525e46759dc886296bf1111b69; MONITOR_WEB_ID=ad1c8360-d4c9-4fa2-a801-d9fd68dfc1b2; s_v_web_id=verify_lh8vaa6v_VI4RQ0ET_nVbq_4PXw_8mfN_7Xp6wdLOZi08; passport_csrf_token=0e7c6992cb6170c9db034c3696191fff; passport_csrf_token_default=0e7c6992cb6170c9db034c3696191fff; support_webp=true; support_avif=true; csrf_session_id=a5355d954d3c63ed1ba35faada452b4d; odin_tt=3072e827705bd5aa707fb8d432524d7f8fad972b02b31a2d3458a3e5209d5492; sid_guard=46a52ce83dacb0b871dae675476a3e42%7C1683773717%7C21600%7CThu%2C+11-May-2023+08%3A55%3A17+GMT; uid_tt=4126f296856e6042f195253e9a01c4cb; uid_tt_ss=4126f296856e6042f195253e9a01c4cb; sid_tt=46a52ce83dacb0b871dae675476a3e42; sessionid=46a52ce83dacb0b871dae675476a3e42; sessionid_ss=46a52ce83dacb0b871dae675476a3e42; sid_ucp_v1=1.0.0-KDMyMzg5NWI3YzAxMGFkN2Y4MjZiMzE5Njc0MGFmMWQ5NGExY2MyYzgKCBCVsvGiBhgNGgJobCIgNDZhNTJjZTgzZGFjYjBiODcxZGFlNjc1NDc2YTNlNDI; ssid_ucp_v1=1.0.0-KDMyMzg5NWI3YzAxMGFkN2Y4MjZiMzE5Njc0MGFmMWQ5NGExY2MyYzgKCBCVsvGiBhgNGgJobCIgNDZhNTJjZTgzZGFjYjBiODcxZGFlNjc1NDc2YTNlNDI; ixigua-a-s=1; tt_scid=sblZQP6nSw2f6A.XS-yHFqB.R3o9UFsRTUCKAoWlHWzNrOf8R01qeIBbu6TDeXtMa3fb; ttwid=1%7C4zaTJmlaHpEa8rAB-KjREdxT3sNBUJWrAzRJnNvqExQ%7C1683775619%7Cf4fc6fa51baf2e302242da412ead6500c3d3f5bfb0be6253cbae00301d5773ae; msToken=lPfIf3aps6EktQAeOl9yRgnL44MtMeGt2WnHjahIR0IysASB_zdhGiY0J9WWxNDpLd7aVdQx_36MpyPI5f2zRUHFYyNNsX5cl-or6GkiVuLLiRsU3ylxj9vt7Upubw==',
														
 
															-                'user-agent': get_random_user_agent('pc'),
														
 
															-            }
														
 
															-            try:
														
 
															-                proxies = Common.tunnel_proxies()
														
 
															-                s = requests.session()
														
 
															-                # max_retries=3 重试3次
														
 
															-                s.mount('http://', HTTPAdapter(max_retries=3))
														
 
															-                s.mount('https://', HTTPAdapter(max_retries=3))
														
 
															-                res = s.request("GET", url, headers=headers, proxies=proxies, timeout=5)
														
 
															-                # Common.logger(log_type, crawler).info(f"proxies:{proxies}\n")
														
 
															-                Common.logger(log_type, crawler).info(f"get_videolist:{res.json()}\n")
														
 
															-                search_list = res.json()['data']['data']
														
 
															-            except Exception as e:
														
 
															-                Common.logger(log_type, crawler).warning(f"get_videolist:{e}\n")
														
 
															-                continue
														
 
															-            if not search_list:
														
 
															-                Common.logger(log_type, crawler).error(f'关键词:{search_word},没有获取到视频列表:offset{offset}')
														
 
															-                return
														
 
															-            for video_info in search_list:
														
 
															-                v_type = video_info['type']
														
 
															-                rule_dict = cls.get_rule(log_type, crawler)
														
 
															-                publish_time = video_info['data']['publish_time']
														
 
															-                old_time = int(time.time()) - (3600 * 24 * rule_dict['publish_time'])
														
 
															-                if publish_time <= old_time:
														
 
															-                    Common.logger(log_type, crawler).error(f'关键词:{search_word},抓取完毕，退出抓取\n')
														
 
															-                    return
														
 
															-
														
 
															-                if v_type == 'video':
														
 
															-                    item_id = video_info['data']['group_id']
														
 
															-                    if video_info['data']['publish_time'] <= old_time:
														
 
															-                        Common.logger(log_type, crawler).error(f'关键词:{search_word},视频:{item_id},不符合抓取规则\n')
														
 
															-                        continue
														
 
															-                    elif video_info['data']['video_watch_count'] <= rule_dict['play_cnt']:
														
 
															-                        Common.logger(log_type, crawler).error(f'关键词:{search_word},视频:{item_id},不符合抓取规则\n')
														
 
															-                        continue
														
 
															-                    elif video_info['data']['video_time'] < rule_dict['min_duration'] or video_info['data'][
														
 
															-                        'video_time'] > rule_dict['max_duration']:
														
 
															-                        Common.logger(log_type, crawler).error(f'关键词:{search_word},视频:{item_id},不符合抓取规则\n')
														
 
															-                        continue
														
 
															-
														
 
															-
														
 
															-                    try:
														
 
															-                        video_dict = cls.get_video_info(log_type, crawler, item_id)
														
 
															-                        filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
														
 
															-                        is_filter = False
														
 
															-                        for filter_word in filter_words:
														
 
															-                            if filter_word in video_dict['video_title']:
														
 
															-                                is_filter = True
														
 
															-                                break
														
 
															-                        if is_filter:
														
 
															-                            Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
														
 
															-                            continue
														
 
															-                        video_url_dict = cls.get_video_url(log_type, crawler, video_dict['gid'])
														
 
															-                        video_dict['video_width'] = video_url_dict["video_width"]
														
 
															-                        video_dict['video_height'] = video_url_dict["video_height"]
														
 
															-                        video_dict['audio_url'] = video_url_dict["audio_url"]
														
 
															-                        video_dict['video_url'] = video_url_dict["video_url"]
														
 
															-                        video_dict['session'] = signature
														
 
															-                    except Exception as e:
														
 
															-                        Common.logger(log_type, crawler).error(
														
 
															-                            f'关键词:{search_word},视频:{item_id},获取详情失败,原因:{e}')
														
 
															-                        continue
														
 
															-
														
 
															-                    if cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
														
 
															-                        Common.logger(log_type, crawler).info(
														
 
															-                            f'关键词:{search_word},gid:{video_dict["gid"]},视频已下载,无需重复下载\n')
														
 
															-                        continue
														
 
															-                    for k, v in video_dict.items():
														
 
															-                        Common.logger(log_type, crawler).info(f"{k}:{v}")
														
 
															-
														
 
															-                    try:
														
 
															-                        # print(
														
 
															-                        #     f'search_word:{search_word},title:{video_dict["video_title"]},gid:{video_dict["gid"]},offset:{offset}, total:{total_count}')
														
 
															-                        cls.download_publish(
														
 
															-                            search_word=search_word,
														
 
															-                            log_type=log_type,
														
 
															-                            crawler=crawler,
														
 
															-                            video_dict=video_dict,
														
 
															-                            rule_dict=rule_dict,
														
 
															-                            strategy=strategy,
														
 
															-                            our_uid=our_uid,
														
 
															-                            oss_endpoint=oss_endpoint,
														
 
															-                            env=env,
														
 
															-                            machine=machine
														
 
															-                        )
														
 
															-
														
 
															-                    except Exception as e:
														
 
															-                        Common.logger(log_type, crawler).error(f'关键词:{search_word},视频:{item_id},下载失败,原因:{e}')
														
 
															-                        continue
														
 
															-                    total_count += 1
														
 
															-                    Common.logger(log_type, crawler).info(
														
 
															-                        f'search_word:{search_word},title:{video_dict["video_title"]},gid:{video_dict["gid"]},offset:{offset}, total:{total_count}')
														
 
															-                    if total_count >= 30:
														
 
															-                        return
														
 
															-                # elif v_type == 'pseries':
														
 
															-                #     try:
														
 
															-                #         item_id = video_info['data']['group_id']
														
 
															-                #         p_url = "https://www.ixigua.com/api/videov2/pseries_more_v2?pSeriesId={}&rank=0&tailCount=30&aid=1768&msToken=wHEafKFLx0k3hihOPbhXYNsfMBxWiq2AB0K5R-34kEFixyq3ATi_DuXbL4Q47J9C2uK2zgWItMa1g2yc4FyDxM4dMijmSdwF4c4T8sSmOkoOI0wGzeEcPw==&X-Bogus=DFSzswVOzdUANG3ItaVHYr7TlqCv&_signature=_02B4Z6wo00001vB6l3QAAIDBZKzMeTihTmbwepPAANgh1Ai3JgFFo4e6anoezmBEpHfEMEYlWISGhXI-QKfev4N-2bwgXsHOuNGLnOsGqMbANIjFPh7Yj6OakQWrkbACenlv0P-arswtB6Zn45".format(
														
 
															-                #             item_id)
														
 
															-                #         p_headers = {
														
 
															-                #             'referer': 'https://www.ixigua.com/{}?series_flow=1&logTag=cfec9d927da968feff89'.format(
														
 
															-                #                 item_id),
														
 
															-                #             'user-agent': get_random_user_agent('pc'),
														
 
															-                #         }
														
 
															-                #         p_res = requests.request("GET", p_url, headers=p_headers,
														
 
															-                #                                  proxies=Common.tunnel_proxies()).json()
														
 
															-                #     except Exception as e:
														
 
															-                #         Common.logger(log_type, crawler).error(f'合集:{item_id},没有获取到合集详情,原因:{e}')
														
 
															-                #         continue
														
 
															-                #     for video in p_res['data']:
														
 
															-                #         item_id = video['item_id']
														
 
															-                #         try:
														
 
															-                #             video_dict = cls.get_video_info(log_type, crawler, item_id)
														
 
															-                #             video_url_dict = cls.get_video_url(log_type, crawler, video_dict['gid'])
														
 
															-                #             video_dict['video_width'] = video_url_dict["video_width"]
														
 
															-                #             video_dict['video_height'] = video_url_dict["video_height"]
														
 
															-                #             video_dict['audio_url'] = video_url_dict["audio_url"]
														
 
															-                #             video_dict['video_url'] = video_url_dict["video_url"]
														
 
															-                #             video_dict['session'] = signature
														
 
															-                #         except Exception as e:
														
 
															-                #             Common.logger(log_type, crawler).error(f'视频:{item_id},没有获取到视频详情,原因:{e}')
														
 
															-                #             continue
														
 
															-                #         if cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
														
 
															-                #             Common.logger(log_type, crawler).info(
														
 
															-                #                 f'gid:{video_dict["gid"]},视频已下载,无需重复下载\n')
														
 
															-                #             continue
														
 
															-                #         if not cls.is_ruled(log_type, crawler, video_dict, rule_dict):
														
 
															-                #             Common.logger(log_type, crawler).error(f'视频:{item_id},不符合抓取规则\n')
														
 
															-                #             continue
														
 
															-                #         for k, v in video_dict.items():
														
 
															-                #             Common.logger(log_type, crawler).info(f"{k}:{v}")
														
 
															-                #         try:
														
 
															-                #             # print(
														
 
															-                #             #     f'search_word:{search_word},title:{video_dict["video_title"]},gid:{video_dict["gid"]},offset:{offset}, total:{total_count}')
														
 
															-                #             cls.download_publish(
														
 
															-                #                 search_word=search_word,
														
 
															-                #                 log_type=log_type,
														
 
															-                #                 crawler=crawler,
														
 
															-                #                 video_dict=video_dict,
														
 
															-                #                 rule_dict=rule_dict,
														
 
															-                #                 strategy=strategy,
														
 
															-                #                 our_uid=our_uid,
														
 
															-                #                 oss_endpoint=oss_endpoint,
														
 
															-                #                 env=env,
														
 
															-                #                 machine=machine
														
 
															-                #             )
														
 
															-                #             total_count += 1
														
 
															-                #             if total_count >= 30:
														
 
															-                #                 return
														
 
															-                #             else:
														
 
															-                #                 break
														
 
															-                #         except Exception as e:
														
 
															-                #             Common.logger(log_type, crawler).error(f'视频:{item_id},download_publish异常:{e}\n')
														
 
															-
														
 
															-            offset += 10
														
 
															-
														
 
															-    @classmethod
														
 
															-    def repeat_video(cls, log_type, crawler, video_id, env, machine):
														
 
															-        sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{video_id}"; """
														
 
															-        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env, machine)
														
 
															-        return len(repeat_video)
														
 
															-
														
 
															-    # 下载 / 上传
														
 
															-    @classmethod
														
 
															-    def download_publish(cls, log_type, crawler, search_word, strategy, video_dict, rule_dict, our_uid, oss_endpoint,
														
 
															-                         env, machine):
														
 
															-
														
 
															-        Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video',
														
 
															-                               title=video_dict['video_title'], url=video_dict['video_url'])
														
 
															-        # 下载音频
														
 
															-        Common.download_method(log_type=log_type, crawler=crawler, text='xigua_audio',
														
 
															-                               title=video_dict['video_title'], url=video_dict['audio_url'])
														
 
															-        # 合成音视频
														
 
															-        Common.video_compose(log_type=log_type, crawler=crawler,
														
 
															-                             video_dir=f"./{crawler}/videos/{video_dict['video_title']}")
														
 
															-        md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
														
 
															-        if os.path.getsize(f"./{crawler}/videos/{md_title}/video.mp4") == 0:
														
 
															-            # 删除视频文件夹
														
 
															-            shutil.rmtree(f"./{crawler}/videos/{md_title}")
														
 
															-            Common.logger(log_type, crawler).info("视频size=0，删除成功\n")
														
 
															-            return
														
 
															-        # ffmpeg_dict = Common.ffmpeg(log_type, crawler,
														
 
															-        #                             f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
														
 
															-        # if ffmpeg_dict is None or ffmpeg_dict['size'] == 0:
														
 
															-        #     Common.logger(log_type, crawler).warning(f"下载的视频无效，已删除\n")
														
 
															-        #     # 删除视频文件夹
														
 
															-        #     shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
														
 
															-        #     return
														
 
															-        # 下载封面
														
 
															-        Common.download_method(log_type=log_type, crawler=crawler, text='cover',
														
 
															-                               title=video_dict['video_title'], url=video_dict['cover_url'])
														
 
															-        # 保存视频信息至txt
														
 
															-        Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
														
 
															-
														
 
															-        # 上传视频
														
 
															-        Common.logger(log_type, crawler).info("开始上传视频...")
														
 
															-        our_video_id = Publish.upload_and_publish(log_type=log_type,
														
 
															-                                                  crawler=crawler,
														
 
															-                                                  strategy=strategy,
														
 
															-                                                  our_uid=our_uid,
														
 
															-                                                  env=env,
														
 
															-                                                  oss_endpoint=oss_endpoint)
														
 
															-        if env == 'dev':
														
 
															-            our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
														
 
															-        else:
														
 
															-            our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
														
 
															-        Common.logger(log_type, crawler).info("视频上传完成")
														
 
															-
														
 
															-        if our_video_id is None:
														
 
															-            # 删除视频文件夹
														
 
															-            shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
														
 
															-            return
														
 
															-
														
 
															-        # 视频写入飞书
														
 
															-        Feishu.insert_columns(log_type, 'xigua', "BUNvGC", "ROWS", 1, 2)
														
 
															-        upload_time = int(time.time())
														
 
															-        values = [[
														
 
															-            search_word,
														
 
															-            time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
														
 
															-            "关键词搜索",
														
 
															-            video_dict['video_title'],
														
 
															-            str(video_dict['video_id']),
														
 
															-            our_video_link,
														
 
															-            video_dict['gid'],
														
 
															-            video_dict['play_cnt'],
														
 
															-            video_dict['comment_cnt'],
														
 
															-            video_dict['like_cnt'],
														
 
															-            video_dict['share_cnt'],
														
 
															-            video_dict['duration'],
														
 
															-            str(video_dict['video_width']) + '*' + str(video_dict['video_height']),
														
 
															-            video_dict['publish_time_str'],
														
 
															-            video_dict['user_name'],
														
 
															-            video_dict['user_id'],
														
 
															-            video_dict['avatar_url'],
														
 
															-            video_dict['cover_url'],
														
 
															-            video_dict['video_url'],
														
 
															-            video_dict['audio_url']]]
														
 
															-        time.sleep(1)
														
 
															-        Feishu.update_values(log_type, 'xigua', "BUNvGC", "E2:Z2", values)
														
 
															-        Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
														
 
															-
														
 
															-        # 视频信息保存数据库
														
 
															-        insert_sql = f""" insert into crawler_video(video_id,
														
 
															-                                user_id,
														
 
															-                                out_user_id,
														
 
															-                                platform,
														
 
															-                                strategy,
														
 
															-                                out_video_id,
														
 
															-                                video_title,
														
 
															-                                cover_url,
														
 
															-                                video_url,
														
 
															-                                duration,
														
 
															-                                publish_time,
														
 
															-                                play_cnt,
														
 
															-                                crawler_rule,
														
 
															-                                width,
														
 
															-                                height)
														
 
															-                                values({our_video_id},
														
 
															-                                {our_uid},
														
 
															-                                "{video_dict['user_id']}",
														
 
															-                                "{cls.platform}",
														
 
															-                                "搜索爬虫策略",
														
 
															-                                "{video_dict['video_id']}",
														
 
															-                                "{video_dict['video_title']}",
														
 
															-                                "{video_dict['cover_url']}",
														
 
															-                                "{video_dict['video_url']}",
														
 
															-                                {int(video_dict['duration'])},
														
 
															-                                "{video_dict['publish_time_str']}",
														
 
															-                                {int(video_dict['play_cnt'])},
														
 
															-                                '{json.dumps(rule_dict)}',
														
 
															-                                {int(video_dict['video_width'])},
														
 
															-                                {int(video_dict['video_height'])}) """
														
 
															-        Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
														
 
															-        MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine)
														
 
															-        Common.logger(log_type, crawler).info('视频信息插入数据库成功！\n')
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_search_videos(cls, log_type, crawler, strategy, oss_endpoint, env, machine):
														
 
															-        user_list = cls.get_user_list(log_type=log_type, crawler=crawler, sheetid="SSPNPW", env=env,
														
 
															-                                      machine=machine)
														
 
															-        for user in user_list:
														
 
															-            try:
														
 
															-                search_word = user["search_word"]
														
 
															-                our_uid = user["our_uid"]
														
 
															-                Common.logger(log_type, crawler).info(f"开始抓取 {search_word} 用户主页视频\n")
														
 
															-                cls.get_videolist(log_type=log_type,
														
 
															-                                  crawler=crawler,
														
 
															-                                  strategy=strategy,
														
 
															-                                  our_uid=our_uid,
														
 
															-                                  search_word=search_word,
														
 
															-                                  oss_endpoint=oss_endpoint,
														
 
															-                                  env=env,
														
 
															-                                  machine=machine)
														
 
															-            except Exception as e:
														
 
															-                Common.logger(log_type, crawler).error(f"get_search_videos:{e}\n")
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    XiguaSearch.get_search_videos('search', 'xigua', 'xigua_search', 'out', 'dev', 'aliyun')
														
 
															-
														
--- a/xigua/xigua_search/xigua_search_new.py
+++ b/xigua/xigua_search/xigua_search_new.py
@@ -1,904 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2023/2/17
														
 
															-import base64
														
 
															-import json
														
 
															-import os
														
 
															-import random
														
 
															-import shutil
														
 
															-import string
														
 
															-import sys
														
 
															-import time
														
 
															-from hashlib import md5
														
 
															-import requests
														
 
															-import urllib3
														
 
															-from requests.adapters import HTTPAdapter
														
 
															-from selenium.webdriver import DesiredCapabilities
														
 
															-from selenium.webdriver.chrome.service import Service
														
 
															-from selenium import webdriver
														
 
															-from selenium.webdriver.common.by import By
														
 
															-sys.path.append(os.getcwd())
														
 
															-from common.scheduling_db import MysqlHelper
														
 
															-from common.getuser import getUser
														
 
															-from common.common import Common
														
 
															-from common.feishu import Feishu
														
 
															-from common.publish import Publish
														
 
															-from common.public import get_config_from_mysql
														
 
															-from common.userAgent import get_random_user_agent
														
 
															-
														
 
															-
														
 
															-class XiguaSearchNew:
														
 
															-    # 抓取视频数
														
 
															-    i = 0
														
 
															-    # 已下载视频数
														
 
															-    videos_cnt = 0
														
 
															-    platform = "西瓜视频"
														
 
															-    tag = "西瓜视频爬虫,搜索爬虫策略"
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_rule_dict(cls, log_type, crawler):
														
 
															-        while True:
														
 
															-            rule_sheet = Feishu.get_values_batch(log_type, crawler, "shxOl7")
														
 
															-            if rule_sheet is None:
														
 
															-                Common.logger(log_type, crawler).info(f"get_rule:{rule_sheet}，2秒钟后重试")
														
 
															-                time.sleep(2)
														
 
															-                continue
														
 
															-            rule_dict = {
														
 
															-                "play_cnt": int(rule_sheet[1][2]),
														
 
															-                "duration_min": int(rule_sheet[2][2]),
														
 
															-                "duration_max": int(rule_sheet[3][2]),
														
 
															-                "publish_time": int(rule_sheet[4][2]),
														
 
															-                "like_cnt": int(rule_sheet[5][2]),
														
 
															-                "comment_cnt": int(rule_sheet[6][2])
														
 
															-            }
														
 
															-            return rule_dict
														
 
															-
														
 
															-    # 下载规则
														
 
															-    @classmethod
														
 
															-    def download_rule(cls, log_type, crawler, video_dict, rule_dict):
														
 
															-        Common.logger(log_type, crawler).info(f'play_cnt: {video_dict["play_cnt"]} >= {rule_dict["play_cnt"]}')
														
 
															-        Common.logger(log_type, crawler).info(f'duration: {rule_dict["duration_max"]} >= {video_dict["duration"]} >= {rule_dict["duration_min"]}')
														
 
															-        Common.logger(log_type, crawler).info(f'publish_time: {int(time.time())} - {video_dict["publish_time_stamp"]} = {int(time.time())-video_dict["publish_time_stamp"]} <= {rule_dict["publish_time"] * 3600 * 24}')
														
 
															-        Common.logger(log_type, crawler).info(f'like_cnt: {video_dict["like_cnt"]} >= {rule_dict["like_cnt"]}')
														
 
															-        Common.logger(log_type, crawler).info(f'comment_cnt: {video_dict["comment_cnt"]} >= {rule_dict["comment_cnt"]}')
														
 
															-        if video_dict["play_cnt"] >= rule_dict["play_cnt"] \
														
 
															-            and rule_dict["duration_max"] >= video_dict["duration"] >= rule_dict["duration_min"] \
														
 
															-            and int(time.time()) - video_dict["publish_time_stamp"] <= rule_dict["publish_time"]*3600*24 \
														
 
															-            and video_dict["like_cnt"] >= rule_dict["like_cnt"] \
														
 
															-            and video_dict["comment_cnt"] >= rule_dict["comment_cnt"]:
														
 
															-            return True
														
 
															-        else:
														
 
															-            return False
														
 
															-
														
 
															-    # 过滤词库
														
 
															-    @classmethod
														
 
															-    def filter_words(cls, log_type, crawler, env):
														
 
															-        filter_words_list = get_config_from_mysql(log_type, crawler, env, "filter")
														
 
															-        return filter_words_list
														
 
															-
														
 
															-    # 获取用户信息（字典格式）. 注意：部分 user_id 字符类型是 int / str
														
 
															-    @classmethod
														
 
															-    def get_user_list(cls, log_type, crawler, sheetid, env):
														
 
															-        try:
														
 
															-            while True:
														
 
															-                user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
														
 
															-                if user_sheet is None:
														
 
															-                    Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet} 10秒钟后重试")
														
 
															-                    continue
														
 
															-                our_user_list = []
														
 
															-                for i in range(1, len(user_sheet)):
														
 
															-                    our_uid = user_sheet[i][6]
														
 
															-                    search_word = user_sheet[i][4]
														
 
															-                    tag1 = user_sheet[i][8]
														
 
															-                    tag2 = user_sheet[i][9]
														
 
															-                    tag3 = user_sheet[i][10]
														
 
															-                    tag4 = user_sheet[i][11]
														
 
															-                    tag5 = user_sheet[i][12]
														
 
															-                    tag6 = user_sheet[i][13]
														
 
															-                    tag7 = user_sheet[i][14]
														
 
															-                    Common.logger(log_type, crawler).info(f"正在更新 {search_word} 关键词信息\n")
														
 
															-                    if our_uid is None:
														
 
															-                        default_user = getUser.get_default_user()
														
 
															-                        # 用来创建our_id的信息
														
 
															-                        user_dict = {
														
 
															-                            'recommendStatus': -6,
														
 
															-                            'appRecommendStatus': -6,
														
 
															-                            'nickName': default_user['nickName'],
														
 
															-                            'avatarUrl': default_user['avatarUrl'],
														
 
															-                            'tagName': f'{tag1},{tag2},{tag3},{tag4},{tag5},{tag6},{tag7}',
														
 
															-                        }
														
 
															-                        Common.logger(log_type, crawler).info(f'新创建的站内UID:{our_uid}')
														
 
															-                        our_uid = getUser.create_uid(log_type, crawler, user_dict, env)
														
 
															-                        if env == 'prod':
														
 
															-                            our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
														
 
															-                        else:
														
 
															-                            our_user_link = f'https://testadmin.piaoquantv.com/ums/user/{our_uid}/post'
														
 
															-                        Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}',
														
 
															-                                             [[our_uid, our_user_link]])
														
 
															-                        Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功！\n')
														
 
															-                    our_user_dict = {
														
 
															-                        'out_uid': '',
														
 
															-                        'search_word': search_word,
														
 
															-                        'our_uid': our_uid,
														
 
															-                        'our_user_link': f'https://admin.piaoquantv.com/ums/user/{our_uid}/post',
														
 
															-                    }
														
 
															-                    our_user_list.append(our_user_dict)
														
 
															-
														
 
															-                return our_user_list
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).error(f'get_user_id_from_feishu异常:{e}\n')
														
 
															-
														
 
															-    @classmethod
														
 
															-    def videos_cnt_rule(cls, log_type, crawler):
														
 
															-        while True:
														
 
															-            videos_cnt_sheet = Feishu.get_values_batch(log_type, crawler, "shxOl7")
														
 
															-            if videos_cnt_sheet is None:
														
 
															-                time.sleep(2)
														
 
															-                continue
														
 
															-            return int(videos_cnt_sheet[7][2])
														
 
															-
														
 
															-    @classmethod
														
 
															-    def random_signature(cls):
														
 
															-        src_digits = string.digits  # string_数字
														
 
															-        src_uppercase = string.ascii_uppercase  # string_大写字母
														
 
															-        src_lowercase = string.ascii_lowercase  # string_小写字母
														
 
															-        digits_num = random.randint(1, 6)
														
 
															-        uppercase_num = random.randint(1, 26 - digits_num - 1)
														
 
															-        lowercase_num = 26 - (digits_num + uppercase_num)
														
 
															-        password = random.sample(src_digits, digits_num) + random.sample(src_uppercase, uppercase_num) + random.sample(
														
 
															-            src_lowercase, lowercase_num)
														
 
															-        random.shuffle(password)
														
 
															-        new_password = 'AAAAAAAAAA' + ''.join(password)[10:-4] + 'AAAB'
														
 
															-        new_password_start = new_password[0:18]
														
 
															-        new_password_end = new_password[-7:]
														
 
															-        if new_password[18] == '8':
														
 
															-            new_password = new_password_start + 'w' + new_password_end
														
 
															-        elif new_password[18] == '9':
														
 
															-            new_password = new_password_start + 'x' + new_password_end
														
 
															-        elif new_password[18] == '-':
														
 
															-            new_password = new_password_start + 'y' + new_password_end
														
 
															-        elif new_password[18] == '.':
														
 
															-            new_password = new_password_start + 'z' + new_password_end
														
 
															-        else:
														
 
															-            new_password = new_password_start + 'y' + new_password_end
														
 
															-        return new_password
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_video_url(cls, video_info):
														
 
															-        video_url_dict = {}
														
 
															-        # video_url
														
 
															-        if 'videoResource' not in video_info:
														
 
															-            video_url_dict["video_url"] = ''
														
 
															-            video_url_dict["audio_url"] = ''
														
 
															-            video_url_dict["video_width"] = 0
														
 
															-            video_url_dict["video_height"] = 0
														
 
															-
														
 
															-        elif 'dash_120fps' in video_info['videoResource']:
														
 
															-            if "video_list" in video_info['videoResource']['dash_120fps'] and 'video_4' in \
														
 
															-                    video_info['videoResource']['dash_120fps']['video_list']:
														
 
															-                video_url = video_info['videoResource']['dash_120fps']['video_list']['video_4']['backup_url_1']
														
 
															-                audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_4']['backup_url_1']
														
 
															-                if len(video_url) % 3 == 1:
														
 
															-                    video_url += '=='
														
 
															-                elif len(video_url) % 3 == 2:
														
 
															-                    video_url += '='
														
 
															-                elif len(audio_url) % 3 == 1:
														
 
															-                    audio_url += '=='
														
 
															-                elif len(audio_url) % 3 == 2:
														
 
															-                    audio_url += '='
														
 
															-                video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                video_width = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vwidth']
														
 
															-                video_height = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vheight']
														
 
															-                video_url_dict["video_url"] = video_url
														
 
															-                video_url_dict["audio_url"] = audio_url
														
 
															-                video_url_dict["video_width"] = video_width
														
 
															-                video_url_dict["video_height"] = video_height
														
 
															-            elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_3' in \
														
 
															-                    video_info['videoResource']['dash_120fps']['video_list']:
														
 
															-                video_url = video_info['videoResource']['dash_120fps']['video_list']['video_3']['backup_url_1']
														
 
															-                audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_3']['backup_url_1']
														
 
															-                if len(video_url) % 3 == 1:
														
 
															-                    video_url += '=='
														
 
															-                elif len(video_url) % 3 == 2:
														
 
															-                    video_url += '='
														
 
															-                elif len(audio_url) % 3 == 1:
														
 
															-                    audio_url += '=='
														
 
															-                elif len(audio_url) % 3 == 2:
														
 
															-                    audio_url += '='
														
 
															-                video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                video_width = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vwidth']
														
 
															-                video_height = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vheight']
														
 
															-                video_url_dict["video_url"] = video_url
														
 
															-                video_url_dict["audio_url"] = audio_url
														
 
															-                video_url_dict["video_width"] = video_width
														
 
															-                video_url_dict["video_height"] = video_height
														
 
															-            elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_2' in \
														
 
															-                    video_info['videoResource']['dash_120fps']['video_list']:
														
 
															-                video_url = video_info['videoResource']['dash_120fps']['video_list']['video_2']['backup_url_1']
														
 
															-                audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_2']['backup_url_1']
														
 
															-                if len(video_url) % 3 == 1:
														
 
															-                    video_url += '=='
														
 
															-                elif len(video_url) % 3 == 2:
														
 
															-                    video_url += '='
														
 
															-                elif len(audio_url) % 3 == 1:
														
 
															-                    audio_url += '=='
														
 
															-                elif len(audio_url) % 3 == 2:
														
 
															-                    audio_url += '='
														
 
															-                video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                video_width = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vwidth']
														
 
															-                video_height = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vheight']
														
 
															-                video_url_dict["video_url"] = video_url
														
 
															-                video_url_dict["audio_url"] = audio_url
														
 
															-                video_url_dict["video_width"] = video_width
														
 
															-                video_url_dict["video_height"] = video_height
														
 
															-            elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_1' in \
														
 
															-                    video_info['videoResource']['dash_120fps']['video_list']:
														
 
															-                video_url = video_info['videoResource']['dash_120fps']['video_list']['video_1']['backup_url_1']
														
 
															-                audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_1']['backup_url_1']
														
 
															-                if len(video_url) % 3 == 1:
														
 
															-                    video_url += '=='
														
 
															-                elif len(video_url) % 3 == 2:
														
 
															-                    video_url += '='
														
 
															-                elif len(audio_url) % 3 == 1:
														
 
															-                    audio_url += '=='
														
 
															-                elif len(audio_url) % 3 == 2:
														
 
															-                    audio_url += '='
														
 
															-                video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                video_width = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vwidth']
														
 
															-                video_height = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vheight']
														
 
															-                video_url_dict["video_url"] = video_url
														
 
															-                video_url_dict["audio_url"] = audio_url
														
 
															-                video_url_dict["video_width"] = video_width
														
 
															-                video_url_dict["video_height"] = video_height
														
 
															-
														
 
															-            elif 'dynamic_video' in video_info['videoResource']['dash_120fps'] \
														
 
															-                    and 'dynamic_video_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
														
 
															-                    and 'dynamic_audio_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
														
 
															-                    and len(
														
 
															-                video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list']) != 0 \
														
 
															-                    and len(
														
 
															-                video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list']) != 0:
														
 
															-
														
 
															-                video_url = \
														
 
															-                    video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                        'backup_url_1']
														
 
															-                audio_url = \
														
 
															-                    video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list'][-1][
														
 
															-                        'backup_url_1']
														
 
															-                if len(video_url) % 3 == 1:
														
 
															-                    video_url += '=='
														
 
															-                elif len(video_url) % 3 == 2:
														
 
															-                    video_url += '='
														
 
															-                elif len(audio_url) % 3 == 1:
														
 
															-                    audio_url += '=='
														
 
															-                elif len(audio_url) % 3 == 2:
														
 
															-                    audio_url += '='
														
 
															-                video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                video_width = \
														
 
															-                    video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                        'vwidth']
														
 
															-                video_height = \
														
 
															-                    video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                        'vheight']
														
 
															-                video_url_dict["video_url"] = video_url
														
 
															-                video_url_dict["audio_url"] = audio_url
														
 
															-                video_url_dict["video_width"] = video_width
														
 
															-                video_url_dict["video_height"] = video_height
														
 
															-            else:
														
 
															-                video_url_dict["video_url"] = ''
														
 
															-                video_url_dict["audio_url"] = ''
														
 
															-                video_url_dict["video_width"] = 0
														
 
															-                video_url_dict["video_height"] = 0
														
 
															-
														
 
															-        elif 'dash' in video_info['videoResource']:
														
 
															-            if "video_list" in video_info['videoResource']['dash'] and 'video_4' in \
														
 
															-                    video_info['videoResource']['dash']['video_list']:
														
 
															-                video_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
														
 
															-                audio_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
														
 
															-                if len(video_url) % 3 == 1:
														
 
															-                    video_url += '=='
														
 
															-                elif len(video_url) % 3 == 2:
														
 
															-                    video_url += '='
														
 
															-                elif len(audio_url) % 3 == 1:
														
 
															-                    audio_url += '=='
														
 
															-                elif len(audio_url) % 3 == 2:
														
 
															-                    audio_url += '='
														
 
															-                video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                video_width = video_info['videoResource']['dash']['video_list']['video_4']['vwidth']
														
 
															-                video_height = video_info['videoResource']['dash']['video_list']['video_4']['vheight']
														
 
															-                video_url_dict["video_url"] = video_url
														
 
															-                video_url_dict["audio_url"] = audio_url
														
 
															-                video_url_dict["video_width"] = video_width
														
 
															-                video_url_dict["video_height"] = video_height
														
 
															-            elif "video_list" in video_info['videoResource']['dash'] and 'video_3' in \
														
 
															-                    video_info['videoResource']['dash']['video_list']:
														
 
															-                video_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
														
 
															-                audio_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
														
 
															-                if len(video_url) % 3 == 1:
														
 
															-                    video_url += '=='
														
 
															-                elif len(video_url) % 3 == 2:
														
 
															-                    video_url += '='
														
 
															-                elif len(audio_url) % 3 == 1:
														
 
															-                    audio_url += '=='
														
 
															-                elif len(audio_url) % 3 == 2:
														
 
															-                    audio_url += '='
														
 
															-                video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                video_width = video_info['videoResource']['dash']['video_list']['video_3']['vwidth']
														
 
															-                video_height = video_info['videoResource']['dash']['video_list']['video_3']['vheight']
														
 
															-                video_url_dict["video_url"] = video_url
														
 
															-                video_url_dict["audio_url"] = audio_url
														
 
															-                video_url_dict["video_width"] = video_width
														
 
															-                video_url_dict["video_height"] = video_height
														
 
															-            elif "video_list" in video_info['videoResource']['dash'] and 'video_2' in \
														
 
															-                    video_info['videoResource']['dash']['video_list']:
														
 
															-                video_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
														
 
															-                audio_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
														
 
															-                if len(video_url) % 3 == 1:
														
 
															-                    video_url += '=='
														
 
															-                elif len(video_url) % 3 == 2:
														
 
															-                    video_url += '='
														
 
															-                elif len(audio_url) % 3 == 1:
														
 
															-                    audio_url += '=='
														
 
															-                elif len(audio_url) % 3 == 2:
														
 
															-                    audio_url += '='
														
 
															-                video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                video_width = video_info['videoResource']['dash']['video_list']['video_2']['vwidth']
														
 
															-                video_height = video_info['videoResource']['dash']['video_list']['video_2']['vheight']
														
 
															-                video_url_dict["video_url"] = video_url
														
 
															-                video_url_dict["audio_url"] = audio_url
														
 
															-                video_url_dict["video_width"] = video_width
														
 
															-                video_url_dict["video_height"] = video_height
														
 
															-            elif "video_list" in video_info['videoResource']['dash'] and 'video_1' in \
														
 
															-                    video_info['videoResource']['dash']['video_list']:
														
 
															-                video_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
														
 
															-                audio_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
														
 
															-                if len(video_url) % 3 == 1:
														
 
															-                    video_url += '=='
														
 
															-                elif len(video_url) % 3 == 2:
														
 
															-                    video_url += '='
														
 
															-                elif len(audio_url) % 3 == 1:
														
 
															-                    audio_url += '=='
														
 
															-                elif len(audio_url) % 3 == 2:
														
 
															-                    audio_url += '='
														
 
															-                video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                video_width = video_info['videoResource']['dash']['video_list']['video_1']['vwidth']
														
 
															-                video_height = video_info['videoResource']['dash']['video_list']['video_1']['vheight']
														
 
															-                video_url_dict["video_url"] = video_url
														
 
															-                video_url_dict["audio_url"] = audio_url
														
 
															-                video_url_dict["video_width"] = video_width
														
 
															-                video_url_dict["video_height"] = video_height
														
 
															-
														
 
															-            elif 'dynamic_video' in video_info['videoResource']['dash'] \
														
 
															-                    and 'dynamic_video_list' in video_info['videoResource']['dash']['dynamic_video'] \
														
 
															-                    and 'dynamic_audio_list' in video_info['videoResource']['dash']['dynamic_video'] \
														
 
															-                    and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list']) != 0 \
														
 
															-                    and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list']) != 0:
														
 
															-
														
 
															-                video_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                    'backup_url_1']
														
 
															-                audio_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list'][-1][
														
 
															-                    'backup_url_1']
														
 
															-                if len(video_url) % 3 == 1:
														
 
															-                    video_url += '=='
														
 
															-                elif len(video_url) % 3 == 2:
														
 
															-                    video_url += '='
														
 
															-                elif len(audio_url) % 3 == 1:
														
 
															-                    audio_url += '=='
														
 
															-                elif len(audio_url) % 3 == 2:
														
 
															-                    audio_url += '='
														
 
															-                video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                video_width = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                    'vwidth']
														
 
															-                video_height = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                    'vheight']
														
 
															-                video_url_dict["video_url"] = video_url
														
 
															-                video_url_dict["audio_url"] = audio_url
														
 
															-                video_url_dict["video_width"] = video_width
														
 
															-                video_url_dict["video_height"] = video_height
														
 
															-            else:
														
 
															-                video_url_dict["video_url"] = ''
														
 
															-                video_url_dict["audio_url"] = ''
														
 
															-                video_url_dict["video_width"] = 0
														
 
															-                video_url_dict["video_height"] = 0
														
 
															-
														
 
															-        elif 'normal' in video_info['videoResource']:
														
 
															-            if "video_list" in video_info['videoResource']['normal'] and 'video_4' in \
														
 
															-                    video_info['videoResource']['normal']['video_list']:
														
 
															-                video_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
														
 
															-                audio_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
														
 
															-                if len(video_url) % 3 == 1:
														
 
															-                    video_url += '=='
														
 
															-                elif len(video_url) % 3 == 2:
														
 
															-                    video_url += '='
														
 
															-                elif len(audio_url) % 3 == 1:
														
 
															-                    audio_url += '=='
														
 
															-                elif len(audio_url) % 3 == 2:
														
 
															-                    audio_url += '='
														
 
															-                video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                video_width = video_info['videoResource']['normal']['video_list']['video_4']['vwidth']
														
 
															-                video_height = video_info['videoResource']['normal']['video_list']['video_4']['vheight']
														
 
															-                video_url_dict["video_url"] = video_url
														
 
															-                video_url_dict["audio_url"] = audio_url
														
 
															-                video_url_dict["video_width"] = video_width
														
 
															-                video_url_dict["video_height"] = video_height
														
 
															-            elif "video_list" in video_info['videoResource']['normal'] and 'video_3' in \
														
 
															-                    video_info['videoResource']['normal']['video_list']:
														
 
															-                video_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
														
 
															-                audio_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
														
 
															-                if len(video_url) % 3 == 1:
														
 
															-                    video_url += '=='
														
 
															-                elif len(video_url) % 3 == 2:
														
 
															-                    video_url += '='
														
 
															-                elif len(audio_url) % 3 == 1:
														
 
															-                    audio_url += '=='
														
 
															-                elif len(audio_url) % 3 == 2:
														
 
															-                    audio_url += '='
														
 
															-                video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                video_width = video_info['videoResource']['normal']['video_list']['video_3']['vwidth']
														
 
															-                video_height = video_info['videoResource']['normal']['video_list']['video_3']['vheight']
														
 
															-                video_url_dict["video_url"] = video_url
														
 
															-                video_url_dict["audio_url"] = audio_url
														
 
															-                video_url_dict["video_width"] = video_width
														
 
															-                video_url_dict["video_height"] = video_height
														
 
															-            elif "video_list" in video_info['videoResource']['normal'] and 'video_2' in \
														
 
															-                    video_info['videoResource']['normal']['video_list']:
														
 
															-                video_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
														
 
															-                audio_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
														
 
															-                if len(video_url) % 3 == 1:
														
 
															-                    video_url += '=='
														
 
															-                elif len(video_url) % 3 == 2:
														
 
															-                    video_url += '='
														
 
															-                elif len(audio_url) % 3 == 1:
														
 
															-                    audio_url += '=='
														
 
															-                elif len(audio_url) % 3 == 2:
														
 
															-                    audio_url += '='
														
 
															-                video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                video_width = video_info['videoResource']['normal']['video_list']['video_2']['vwidth']
														
 
															-                video_height = video_info['videoResource']['normal']['video_list']['video_2']['vheight']
														
 
															-                video_url_dict["video_url"] = video_url
														
 
															-                video_url_dict["audio_url"] = audio_url
														
 
															-                video_url_dict["video_width"] = video_width
														
 
															-                video_url_dict["video_height"] = video_height
														
 
															-            elif "video_list" in video_info['videoResource']['normal'] and 'video_1' in \
														
 
															-                    video_info['videoResource']['normal']['video_list']:
														
 
															-                video_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
														
 
															-                audio_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
														
 
															-                if len(video_url) % 3 == 1:
														
 
															-                    video_url += '=='
														
 
															-                elif len(video_url) % 3 == 2:
														
 
															-                    video_url += '='
														
 
															-                elif len(audio_url) % 3 == 1:
														
 
															-                    audio_url += '=='
														
 
															-                elif len(audio_url) % 3 == 2:
														
 
															-                    audio_url += '='
														
 
															-                video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                video_width = video_info['videoResource']['normal']['video_list']['video_1']['vwidth']
														
 
															-                video_height = video_info['videoResource']['normal']['video_list']['video_1']['vheight']
														
 
															-                video_url_dict["video_url"] = video_url
														
 
															-                video_url_dict["audio_url"] = audio_url
														
 
															-                video_url_dict["video_width"] = video_width
														
 
															-                video_url_dict["video_height"] = video_height
														
 
															-
														
 
															-            elif 'dynamic_video' in video_info['videoResource']['normal'] \
														
 
															-                    and 'dynamic_video_list' in video_info['videoResource']['normal']['dynamic_video'] \
														
 
															-                    and 'dynamic_audio_list' in video_info['videoResource']['normal']['dynamic_video'] \
														
 
															-                    and len(video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list']) != 0 \
														
 
															-                    and len(video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list']) != 0:
														
 
															-
														
 
															-                video_url = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                    'backup_url_1']
														
 
															-                audio_url = video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list'][-1][
														
 
															-                    'backup_url_1']
														
 
															-                if len(video_url) % 3 == 1:
														
 
															-                    video_url += '=='
														
 
															-                elif len(video_url) % 3 == 2:
														
 
															-                    video_url += '='
														
 
															-                elif len(audio_url) % 3 == 1:
														
 
															-                    audio_url += '=='
														
 
															-                elif len(audio_url) % 3 == 2:
														
 
															-                    audio_url += '='
														
 
															-                video_url = base64.b64decode(video_url).decode('utf8')
														
 
															-                audio_url = base64.b64decode(audio_url).decode('utf8')
														
 
															-                video_width = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                    'vwidth']
														
 
															-                video_height = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
														
 
															-                    'vheight']
														
 
															-                video_url_dict["video_url"] = video_url
														
 
															-                video_url_dict["audio_url"] = audio_url
														
 
															-                video_url_dict["video_width"] = video_width
														
 
															-                video_url_dict["video_height"] = video_height
														
 
															-            else:
														
 
															-                video_url_dict["video_url"] = ''
														
 
															-                video_url_dict["audio_url"] = ''
														
 
															-                video_url_dict["video_width"] = 0
														
 
															-                video_url_dict["video_height"] = 0
														
 
															-
														
 
															-        else:
														
 
															-            video_url_dict["video_url"] = ''
														
 
															-            video_url_dict["audio_url"] = ''
														
 
															-            video_url_dict["video_width"] = 0
														
 
															-            video_url_dict["video_height"] = 0
														
 
															-
														
 
															-        return video_url_dict
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_comment_cnt(cls, item_id):
														
 
															-        url = "https://www.ixigua.com/tlb/comment/article/v5/tab_comments/?"
														
 
															-        params = {
														
 
															-            "tab_index": "0",
														
 
															-            "count": "10",
														
 
															-            "offset": "10",
														
 
															-            "group_id": str(item_id),
														
 
															-            "item_id": str(item_id),
														
 
															-            "aid": "1768",
														
 
															-            "msToken": "50-JJObWB07HfHs-BMJWT1eIDX3G-6lPSF_i-QwxBIXE9VVa-iN0jbEXR5pG2DKjXBmP299n6ZTuXzY-GAy968CCvouSAYIS4GzvGQT3pNlKNejr5G4-1g==",
														
 
															-            "X-Bogus": "DFSzswVOyGtANVeWtCLMqR/F6q9U",
														
 
															-            "_signature": cls.random_signature(),
														
 
															-        }
														
 
															-        headers = {
														
 
															-            'authority': 'www.ixigua.com',
														
 
															-            'accept': 'application/json, text/plain, */*',
														
 
															-            'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
														
 
															-            'cache-control': 'no-cache',
														
 
															-            'cookie': 'MONITOR_WEB_ID=67cb5099-a022-4ec3-bb8e-c4de6ba51dd0; passport_csrf_token=72b2574f3c99f8ba670e42df430218fd; passport_csrf_token_default=72b2574f3c99f8ba670e42df430218fd; sid_guard=c7472b508ea631823ba765a60cf8757f%7C1680867422%7C3024002%7CFri%2C+12-May-2023+11%3A37%3A04+GMT; uid_tt=c13f47d51767f616befe32fb3e9f485a; uid_tt_ss=c13f47d51767f616befe32fb3e9f485a; sid_tt=c7472b508ea631823ba765a60cf8757f; sessionid=c7472b508ea631823ba765a60cf8757f; sessionid_ss=c7472b508ea631823ba765a60cf8757f; sid_ucp_v1=1.0.0-KGUzNWYxNmRkZGJiZjgxY2MzZWNkMTEzMTkwYjY1Yjg5OTY5NzVlNmMKFQiu3d-eqQIQ3oDAoQYYGCAMOAhACxoCaGwiIGM3NDcyYjUwOGVhNjMxODIzYmE3NjVhNjBjZjg3NTdm; ssid_ucp_v1=1.0.0-KGUzNWYxNmRkZGJiZjgxY2MzZWNkMTEzMTkwYjY1Yjg5OTY5NzVlNmMKFQiu3d-eqQIQ3oDAoQYYGCAMOAhACxoCaGwiIGM3NDcyYjUwOGVhNjMxODIzYmE3NjVhNjBjZjg3NTdm; odin_tt=b893608d4dde2e1e8df8cd5d97a0e2fbeafc4ca762ac72ebef6e6c97e2ed19859bb01d46b4190ddd6dd17d7f9678e1de; SEARCH_CARD_MODE=7168304743566296612_0; support_webp=true; support_avif=false; csrf_session_id=a5355d954d3c63ed1ba35faada452b4d; tt_scid=7Pux7s634-z8DYvCM20y7KigwH5u7Rh6D9C-RROpnT.aGMEcz6Vsxp.oai47wJqa4f86; ttwid=1%7CHHtv2QqpSGuSu8r-zXF1QoWsvjmNi1SJrqOrZzg-UCY%7C1683858689%7Ca5223fe1500578e01e138a0d71d6444692018296c4c24f5885af174a65873c95; ixigua-a-s=3; msToken=50-JJObWB07HfHs-BMJWT1eIDX3G-6lPSF_i-QwxBIXE9VVa-iN0jbEXR5pG2DKjXBmP299n6ZTuXzY-GAy968CCvouSAYIS4GzvGQT3pNlKNejr5G4-1g==; __ac_nonce=0645dcbf0005064517440; __ac_signature=_02B4Z6wo00f01FEGmAwAAIDBKchzCGqn-MBRJpyAAHAjieFC5GEg6gGiwz.I4PRrJl7f0GcixFrExKmgt6QI1i1S-dQyofPEj2ugWTCnmKUdJQv-wYuDofeKNe8VtMtZq2aKewyUGeKU-5Ud21; ixigua-a-s=3',
														
 
															-            'pragma': 'no-cache',
														
 
															-            'referer': f'https://www.ixigua.com/{item_id}?logTag=3c5aa86a8600b9ab8540',
														
 
															-            'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
														
 
															-            'sec-ch-ua-mobile': '?0',
														
 
															-            'sec-ch-ua-platform': '"macOS"',
														
 
															-            'sec-fetch-dest': 'empty',
														
 
															-            'sec-fetch-mode': 'cors',
														
 
															-            'sec-fetch-site': 'same-origin',
														
 
															-            'tt-anti-token': 'cBITBHvmYjEygzv-f9c78c1297722cf1f559c74b084e4525ce4900bdcf9e8588f20cc7c2e3234422',
														
 
															-            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.35',
														
 
															-            'x-secsdk-csrf-token': '000100000001f8e733cf37f0cd255a51aea9a81ff7bc0c09490cfe41ad827c3c5c18ec809279175e4d9f5553d8a5'
														
 
															-        }
														
 
															-        urllib3.disable_warnings()
														
 
															-        s = requests.session()
														
 
															-        # max_retries=3 重试3次
														
 
															-        s.mount('http://', HTTPAdapter(max_retries=3))
														
 
															-        s.mount('https://', HTTPAdapter(max_retries=3))
														
 
															-        response = s.get(url=url, headers=headers, params=params, verify=False, proxies=Common.tunnel_proxies(), timeout=5)
														
 
															-        response.close()
														
 
															-        if response.status_code != 200 or 'total_number' not in response.json() or response.json() == {}:
														
 
															-            return 0
														
 
															-        return response.json().get("total_number", 0)
														
 
															-
														
 
															-    # 获取视频详情
														
 
															-    @classmethod
														
 
															-    def get_video_info(cls, log_type, crawler, item_id):
														
 
															-        url = 'https://www.ixigua.com/api/mixVideo/information?'
														
 
															-        headers = {
														
 
															-            "accept-encoding": "gzip, deflate",
														
 
															-            "accept-language": "zh-CN,zh-Hans;q=0.9",
														
 
															-            "user-agent": get_random_user_agent('pc'),
														
 
															-            "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
														
 
															-        }
														
 
															-        params = {
														
 
															-            'mixId': str(item_id),
														
 
															-            'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC'
														
 
															-                       'NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
														
 
															-            'X-Bogus': 'DFSzswVupYTANCJOSBk0P53WxM-r',
														
 
															-            '_signature': '_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px'
														
 
															-                          'fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94',
														
 
															-        }
														
 
															-        cookies = {
														
 
															-            'ixigua-a-s': '1',
														
 
															-            'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB'
														
 
															-                       'NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
														
 
															-            'ttwid': '1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7'
														
 
															-                     '6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8',
														
 
															-            'tt_scid': 'QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3',
														
 
															-            'MONITOR_WEB_ID': '0a49204a-7af5-4e96-95f0-f4bafb7450ad',
														
 
															-            '__ac_nonce': '06304878000964fdad287',
														
 
															-            '__ac_signature': '_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb'
														
 
															-                              'FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8',
														
 
															-            'ttcid': 'e56fabf6e85d4adf9e4d91902496a0e882',
														
 
															-            '_tea_utm_cache_1300': 'undefined',
														
 
															-            'support_avif': 'false',
														
 
															-            'support_webp': 'false',
														
 
															-            'xiguavideopcwebid': '7134967546256016900',
														
 
															-            'xiguavideopcwebid.sig': 'xxRww5R1VEMJN_dQepHorEu_eAc',
														
 
															-        }
														
 
															-        urllib3.disable_warnings()
														
 
															-        s = requests.session()
														
 
															-        # max_retries=3 重试3次
														
 
															-        s.mount('http://', HTTPAdapter(max_retries=3))
														
 
															-        s.mount('https://', HTTPAdapter(max_retries=3))
														
 
															-        response = s.get(url=url, headers=headers, params=params, cookies=cookies, verify=False, proxies=Common.tunnel_proxies(), timeout=5)
														
 
															-        response.close()
														
 
															-        if response.status_code != 200 or 'data' not in response.json() or response.json()['data'] == {}:
														
 
															-            Common.logger(log_type, crawler).warning(f"get_video_info:{response.status_code}, {response.text}\n")
														
 
															-            return None
														
 
															-        else:
														
 
															-            video_info = response.json()['data'].get("gidInformation", {}).get("packerData", {}).get("video", {})
														
 
															-            if video_info == {}:
														
 
															-                return None
														
 
															-            video_dict = {
														
 
															-                "video_title": video_info.get("title", ""),
														
 
															-                "video_id": video_info.get("videoResource", {}).get("vid", ""),
														
 
															-                "gid": str(item_id),
														
 
															-                "play_cnt": int(video_info.get("video_watch_count", 0)),
														
 
															-                "like_cnt": int(video_info.get("video_like_count", 0)),
														
 
															-                "comment_cnt": int(cls.get_comment_cnt(item_id)),
														
 
															-                "share_cnt": 0,
														
 
															-                "favorite_cnt": 0,
														
 
															-                "duration": int(video_info.get("video_duration", 0)),
														
 
															-                "video_width": int(cls.get_video_url(video_info)["video_width"]),
														
 
															-                "video_height": int(cls.get_video_url(video_info)["video_height"]),
														
 
															-                "publish_time_stamp": int(video_info.get("video_publish_time", 0)),
														
 
															-                "publish_time_str": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_info.get("video_publish_time", 0)))),
														
 
															-                "user_name": video_info.get("user_info", {}).get("name", ""),
														
 
															-                "user_id": str(video_info.get("user_info", {}).get("user_id", "")),
														
 
															-                "avatar_url": str(video_info.get("user_info", {}).get("avatar_url", "")),
														
 
															-                "cover_url": video_info.get("poster_url", ""),
														
 
															-                "audio_url": cls.get_video_url(video_info)["audio_url"],
														
 
															-                "video_url": cls.get_video_url(video_info)["video_url"],
														
 
															-                "session": f"xigua-search-{int(time.time())}"
														
 
															-            }
														
 
															-            return video_dict
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_videoList(cls, log_type, crawler, search_word, our_uid, env):
														
 
															-        # 打印请求配置
														
 
															-        ca = DesiredCapabilities.CHROME
														
 
															-        ca["goog:loggingPrefs"] = {"performance": "ALL"}
														
 
															-        # # 不打开浏览器运行
														
 
															-        chrome_options = webdriver.ChromeOptions()
														
 
															-        chrome_options.add_argument(f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
														
 
															-        chrome_options.add_argument("--headless")
														
 
															-        chrome_options.add_argument("--window-size=1920,1080")
														
 
															-        chrome_options.add_argument("--no-sandbox")
														
 
															-        if env == "dev":
														
 
															-            chromedriver = "/Users/wangkun/Downloads/chromedriver/chromedriver_v112/chromedriver"
														
 
															-        else:
														
 
															-            chromedriver = "/usr/bin/chromedriver"
														
 
															-        # driver初始化
														
 
															-        driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(chromedriver))
														
 
															-        driver.implicitly_wait(10)
														
 
															-        Common.logger(log_type, crawler).info(f"打开搜索页:{search_word}")
														
 
															-        driver.get(f"https://www.ixigua.com/search/{search_word}/")
														
 
															-        time.sleep(3)
														
 
															-        # if len(driver.find_elements(By.XPATH, '//*[@class="xg-notification-close"]')) != 0:
														
 
															-        #     driver.find_element(By.XPATH, '//*[@class="xg-notification-close"]').click()
														
 
															-        # Common.logger(log_type, crawler).info("点击筛选")
														
 
															-        # driver.find_element(By.XPATH, '//*[@class="searchPageV2__header-icons-categories"]').click()
														
 
															-        # time.sleep(1)
														
 
															-        # Common.logger(log_type, crawler).info("点击最新排序")
														
 
															-        # driver.find_element(By.XPATH, '//*[@class="searchPageV2-category__wrapper"]/*[2]/*[1]').click()
														
 
															-        # time.sleep(5)
														
 
															-
														
 
															-        index = 0
														
 
															-        while True:
														
 
															-            video_elements = driver.find_elements(By.XPATH, '//*[@class="HorizontalFeedCard searchPageV2__card"]')
														
 
															-            video_element_temp = video_elements[index:]
														
 
															-            if len(video_element_temp) == 0:
														
 
															-                Common.logger(log_type, crawler).info('到底啦~~~~~~~~~~~~~\n')
														
 
															-                cls.i = 0
														
 
															-                cls.videos_cnt = 0
														
 
															-                driver.quit()
														
 
															-                return
														
 
															-            for i, video_element in enumerate(video_element_temp):
														
 
															-                try:
														
 
															-                    if cls.videos_cnt >= cls.videos_cnt_rule(log_type, crawler):
														
 
															-                        Common.logger(log_type, crawler).info(f"搜索词: {search_word}，已下载视频数: {cls.videos_cnt}\n")
														
 
															-                        cls.i = 0
														
 
															-                        cls.videos_cnt = 0
														
 
															-                        driver.quit()
														
 
															-                        return
														
 
															-                    # Common.logger(log_type, crawler).info(f"i:{i}, video_element:{video_element}")
														
 
															-                    if video_element is None:
														
 
															-                        Common.logger(log_type, crawler).info('到底啦~\n')
														
 
															-                        cls.i = 0
														
 
															-                        cls.videos_cnt = 0
														
 
															-                        driver.quit()
														
 
															-                        return
														
 
															-                    cls.i += 1
														
 
															-                    Common.logger(log_type, crawler).info(f'拖动"视频"列表第{cls.i}个至屏幕中间')
														
 
															-                    # Common.logger(log_type, crawler).info(f"video_elements:{len(video_elements)}")
														
 
															-                    # Common.logger(log_type, crawler).info(f"index+i:{index+i}")
														
 
															-                    driver.execute_script("arguments[0].scrollIntoView({block:'center',inline:'center'})", video_element)
														
 
															-                    time.sleep(1)
														
 
															-                    item_id = video_element.find_elements(By.XPATH, '//*[@class="HorizontalFeedCard__coverWrapper disableZoomAnimation"]')[index+i].get_attribute('href')
														
 
															-                    item_id = item_id.split("com/")[-1].split("?&")[0]
														
 
															-                    video_dict = cls.get_video_info(log_type, crawler, item_id)
														
 
															-                    if video_dict is None:
														
 
															-                        Common.logger(log_type, crawler).info("无效视频")
														
 
															-                    else:
														
 
															-                        for k, v in video_dict.items():
														
 
															-                            Common.logger(log_type, crawler).info(f"{k}:{v}")
														
 
															-                        rule_dict = cls.get_rule_dict(log_type, crawler)
														
 
															-                        # if int((int(time.time()) - int(video_dict["publish_time_stamp"])) / (3600 * 24)) > int(rule_dict["publish_time"]):
														
 
															-                        #     Common.logger(log_type, crawler).info(f'发布时间超过{int(rule_dict["publish_time"])}天\n')
														
 
															-                        #     driver.quit()
														
 
															-                        #     return
														
 
															-                        if cls.download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
														
 
															-                            Common.logger(log_type, crawler).info("不满足抓取规则\n")
														
 
															-                        elif any(str(word) if str(word) in video_dict["video_title"] else False for word in cls.filter_words(log_type, crawler, env)) is True:
														
 
															-                            Common.logger(log_type, crawler).info("已中过滤词\n")
														
 
															-                        elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
														
 
															-                            Common.logger(log_type, crawler).info("视频已下载\n")
														
 
															-                        else:
														
 
															-                            cls.download_publish(log_type=log_type,
														
 
															-                                                 crawler=crawler,
														
 
															-                                                 search_word=search_word,
														
 
															-                                                 video_dict=video_dict,
														
 
															-                                                 rule_dict=rule_dict,
														
 
															-                                                 our_uid=our_uid,
														
 
															-                                                 env=env)
														
 
															-                except Exception as e:
														
 
															-                    Common.logger(log_type, crawler).warning(f"抓取单条视频异常:{e}\n")
														
 
															-
														
 
															-            Common.logger(log_type, crawler).info('已抓取完一组视频，休眠10秒\n')
														
 
															-            time.sleep(10)
														
 
															-            index = index + len(video_element_temp)
														
 
															-
														
 
															-    @classmethod
														
 
															-    def repeat_video(cls, log_type, crawler, video_id, env):
														
 
															-        sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{video_id}"; """
														
 
															-        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env, action="")
														
 
															-        return len(repeat_video)
														
 
															-
														
 
															-    # 下载 / 上传
														
 
															-    @classmethod
														
 
															-    def download_publish(cls, log_type, crawler, search_word, video_dict, rule_dict, our_uid, env):
														
 
															-
														
 
															-        Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video',
														
 
															-                               title=video_dict['video_title'], url=video_dict['video_url'])
														
 
															-        # 下载音频
														
 
															-        Common.download_method(log_type=log_type, crawler=crawler, text='xigua_audio',
														
 
															-                               title=video_dict['video_title'], url=video_dict['audio_url'])
														
 
															-        # 合成音视频
														
 
															-        Common.video_compose(log_type=log_type, crawler=crawler,
														
 
															-                             video_dir=f"./{crawler}/videos/{video_dict['video_title']}")
														
 
															-        md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
														
 
															-        try:
														
 
															-            if os.path.getsize(f"./{crawler}/videos/{md_title}/video.mp4") == 0:
														
 
															-                # 删除视频文件夹
														
 
															-                shutil.rmtree(f"./{crawler}/videos/{md_title}")
														
 
															-                Common.logger(log_type, crawler).info("视频size=0，删除成功\n")
														
 
															-                return
														
 
															-        except FileNotFoundError:
														
 
															-            # 删除视频文件夹
														
 
															-            shutil.rmtree(f"./{crawler}/videos/{md_title}")
														
 
															-            Common.logger(log_type, crawler).info("视频文件不存在，删除文件夹成功\n")
														
 
															-            return
														
 
															-        # 下载封面
														
 
															-        Common.download_method(log_type=log_type, crawler=crawler, text='cover',
														
 
															-                               title=video_dict['video_title'], url=video_dict['cover_url'])
														
 
															-        # 保存视频信息至txt
														
 
															-        Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
														
 
															-
														
 
															-        # 上传视频
														
 
															-        Common.logger(log_type, crawler).info("开始上传视频...")
														
 
															-        if env == "dev":
														
 
															-            oss_endpoint = "out"
														
 
															-        else:
														
 
															-            oss_endpoint = "inner"
														
 
															-        our_video_id = Publish.upload_and_publish(log_type=log_type,
														
 
															-                                                  crawler=crawler,
														
 
															-                                                  strategy="搜索爬虫策略",
														
 
															-                                                  our_uid=our_uid,
														
 
															-                                                  env=env,
														
 
															-                                                  oss_endpoint=oss_endpoint)
														
 
															-        if env == 'dev':
														
 
															-            our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
														
 
															-        else:
														
 
															-            our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
														
 
															-        Common.logger(log_type, crawler).info("视频上传完成")
														
 
															-
														
 
															-        if our_video_id is None:
														
 
															-            try:
														
 
															-                # 删除视频文件夹
														
 
															-                shutil.rmtree(f"./{crawler}/videos/{md_title}")
														
 
															-                return
														
 
															-            except FileNotFoundError:
														
 
															-                return
														
 
															-
														
 
															-        # 视频信息保存数据库
														
 
															-        insert_sql = f""" insert into crawler_video(video_id,
														
 
															-                                user_id,
														
 
															-                                out_user_id,
														
 
															-                                platform,
														
 
															-                                strategy,
														
 
															-                                out_video_id,
														
 
															-                                video_title,
														
 
															-                                cover_url,
														
 
															-                                video_url,
														
 
															-                                duration,
														
 
															-                                publish_time,
														
 
															-                                play_cnt,
														
 
															-                                crawler_rule,
														
 
															-                                width,
														
 
															-                                height)
														
 
															-                                values({our_video_id},
														
 
															-                                {our_uid},
														
 
															-                                "{video_dict['user_id']}",
														
 
															-                                "{cls.platform}",
														
 
															-                                "搜索爬虫策略",
														
 
															-                                "{video_dict['video_id']}",
														
 
															-                                "{video_dict['video_title']}",
														
 
															-                                "{video_dict['cover_url']}",
														
 
															-                                "{video_dict['video_url']}",
														
 
															-                                {int(video_dict['duration'])},
														
 
															-                                "{video_dict['publish_time_str']}",
														
 
															-                                {int(video_dict['play_cnt'])},
														
 
															-                                '{json.dumps(rule_dict)}',
														
 
															-                                {int(video_dict['video_width'])},
														
 
															-                                {int(video_dict['video_height'])}) """
														
 
															-        Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
														
 
															-        MysqlHelper.update_values(log_type, crawler, insert_sql, env, action="")
														
 
															-        Common.logger(log_type, crawler).info("视频信息写入数据库完成")
														
 
															-
														
 
															-        # 视频信息写入飞书
														
 
															-        Feishu.insert_columns(log_type, crawler, "BUNvGC", "ROWS", 1, 2)
														
 
															-        values = [[
														
 
															-            search_word,
														
 
															-            time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
														
 
															-            "关键词搜索",
														
 
															-            video_dict['video_title'],
														
 
															-            str(video_dict['video_id']),
														
 
															-            our_video_link,
														
 
															-            video_dict['gid'],
														
 
															-            video_dict['play_cnt'],
														
 
															-            video_dict['comment_cnt'],
														
 
															-            video_dict['like_cnt'],
														
 
															-            video_dict['share_cnt'],
														
 
															-            video_dict['duration'],
														
 
															-            str(video_dict['video_width']) + '*' + str(video_dict['video_height']),
														
 
															-            video_dict['publish_time_str'],
														
 
															-            video_dict['user_name'],
														
 
															-            video_dict['user_id'],
														
 
															-            video_dict['avatar_url'],
														
 
															-            video_dict['cover_url'],
														
 
															-            video_dict['video_url'],
														
 
															-            video_dict['audio_url']]]
														
 
															-        time.sleep(0.5)
														
 
															-        Feishu.update_values(log_type, crawler, "BUNvGC", "E2:Z2", values)
														
 
															-        Common.logger(log_type, crawler).info('视频信息写入飞书完成\n')
														
 
															-        cls.videos_cnt += 1
														
 
															-
														
 
															-    @classmethod
														
 
															-    def get_search_videos(cls, log_type, crawler, env):
														
 
															-        user_list = cls.get_user_list(log_type=log_type, crawler=crawler, sheetid="SSPNPW", env=env)
														
 
															-        for user in user_list:
														
 
															-            try:
														
 
															-                cls.i = 0
														
 
															-                cls.videos_cnt = 0
														
 
															-                search_word = user["search_word"]
														
 
															-                our_uid = user["our_uid"]
														
 
															-                Common.logger(log_type, crawler).info(f"开始抓取 {search_word} 视频\n")
														
 
															-                cls.get_videoList(log_type=log_type,
														
 
															-                                  crawler=crawler,
														
 
															-                                  search_word=search_word,
														
 
															-                                  our_uid=our_uid,
														
 
															-                                  env=env)
														
 
															-            except Exception as e:
														
 
															-                Common.logger(log_type, crawler).error(f"get_search_videos:{e}\n")
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    # XiguaSearch.get_search_videos('search', 'xigua', 'dev')
														
 
															-    # XiguaSearch.get_videoList("search", "xigua", "长寿食物", "dev")
														
 
															-    # XiguaSearch.get_video_info("search", "xigua", "7027495456829768196")
														
 
															-    # print(XiguaSearch.get_comment_cnt("7027495456829768196"))
														
 
															-    # print(XiguaSearch.videos_cnt_rule("search", "xigua"))
														
 
															-    # XiguaSearch.filter_words('search', 'xigua', 'dev')
														
 
															-    # print(XiguaSearchNew.get_rule_dict('search', 'xigua'))
														
 
															-    # os.system("ps aux | grep Chrome | grep -v grep | awk '{print $2}' | xargs kill -9")
														
 
															-    pass
														
 
															-