wangkun 1 рік тому
батько
коміт
766295af54
41 змінених файлів з 4 додано та 9019 видалено
  1. 0 139
      README.MD
  2. 0 28
      benshanzhufu/benshanzhufu_main/run_benshanzhufu_recommend.py
  3. 0 48
      benshanzhufu/benshanzhufu_main/run_benshanzhufu_recommend_scheduling.py
  4. 0 272
      benshanzhufu/benshanzhufu_recommend/benshanzhufu_recommend.py
  5. 2 4
      common/public.py
  6. 0 3
      kuaishou/kuaishou_follow/__init__.py
  7. 0 659
      kuaishou/kuaishou_follow/kuaishou_follow.py
  8. 1 1
      kuaishou/kuaishou_main/run_ks_recommend.py
  9. 1 1
      kuaishou/kuaishou_main/run_ks_recommend_dev.py
  10. 0 501
      kuaishou/kuaishou_recommend/recommend_kuaishou.py
  11. 0 213
      main/process.sh
  12. 0 1
      main/process_mq.sh
  13. 0 24
      suisuiniannianyingfuqi/suisuiniannianyingfuqi_main/run_suisuiniannianyingfuqi_recommend.py
  14. 0 49
      suisuiniannianyingfuqi/suisuiniannianyingfuqi_main/run_suisuiniannianyingfuqi_recommend_scheduling.py
  15. 0 214
      suisuiniannianyingfuqi/suisuiniannianyingfuqi_recommend/suisuiniannianyingfuqi_recommend.py
  16. 0 152
      weixinzhishu/weixinzhishu_main/weixinzhishu_inner_long.py
  17. 0 152
      weixinzhishu/weixinzhishu_main/weixinzhishu_inner_sort.py
  18. 0 153
      weixinzhishu/weixinzhishu_main/weixinzhishu_out.py
  19. 0 3
      xiaoniangao/xiaoniangao_follow/__init__.py
  20. 0 69
      xiaoniangao/xiaoniangao_follow/xiaoniangao_follow.py
  21. 0 685
      xiaoniangao/xiaoniangao_hour/xiaoniangao_hour.py
  22. 0 43
      xiaoniangao/xiaoniangao_main/run_xiaoniangao_author_scheduling.py
  23. 0 35
      xiaoniangao/xiaoniangao_main/run_xiaoniangao_follow.py
  24. 0 55
      xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour.py
  25. 0 74
      xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour_scheduling.py
  26. 0 36
      xiaoniangao/xiaoniangao_main/run_xiaoniangao_play.py
  27. 0 48
      xiaoniangao/xiaoniangao_main/run_xiaoniangao_play_scheduling.py
  28. 0 430
      xiaoniangao/xiaoniangao_play/xiaoniangao_play.py
  29. 0 3
      xigua/xigua_follow/__init__.py
  30. 0 1039
      xigua/xigua_follow/xigua_follow.py
  31. 0 895
      xigua/xigua_follow/xigua_follow_scheduling.py
  32. 0 43
      xigua/xigua_main/run_xigua_author_scheduling.py
  33. 0 41
      xigua/xigua_main/run_xigua_follow.py
  34. 0 30
      xigua/xigua_main/run_xigua_recommend.py
  35. 0 48
      xigua/xigua_main/run_xigua_recommend_scheduling.py
  36. 0 42
      xigua/xigua_main/run_xigua_search.py
  37. 0 28
      xigua/xigua_main/run_xigua_search_new.py
  38. 0 45
      xigua/xigua_main/run_xigua_search_scheduling.py
  39. 0 850
      xigua/xigua_recommend/xigua_recommend.py
  40. 0 959
      xigua/xigua_search/xigua_search.py
  41. 0 904
      xigua/xigua_search/xigua_search_new.py

+ 0 - 139
README.MD

@@ -62,85 +62,6 @@ ps aux | grep run_youtube
 ps aux | grep run_youtube | grep -v grep | awk '{print $2}' | xargs kill -9
 ps aux | grep run_youtube | grep -v grep | awk '{print $2}' | xargs kill -9
 ```
 ```
 
 
-
-#### 西瓜视频
-```commandline
-阿里云 102 服务器
-西瓜定向: sh ./main/main.sh ./xigua/xigua_main/run_xigua_follow.py --log_type="follow" --crawler="xigua" --strategy="定向爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" xigua/nohup.log
-西瓜推荐: /usr/bin/sh ./main/scheduling_main.sh ./xigua/xigua_main/run_xigua_recommend.py --log_type="recommend" --crawler="xigua" --env="prod" xigua/logs/nohup-recommend.log
-本机
-西瓜定向: sh ./main/main.sh ./xigua/xigua_main/run_xigua_follow.py --log_type="follow" --crawler="xigua" --strategy="定向爬虫策略" --oss_endpoint="out" --env="prod" --machine="local" xigua/nohup.log
-西瓜推荐: sh ./main/scheduling_main.sh ./xigua/xigua_main/run_xigua_recommend.py --log_type="recommend" --crawler="xigua" --env="dev" xigua/logs/nohup-recommend.log
-西瓜搜索: sh main/scheduling_main.sh ./xigua/xigua_main/run_xigua_search_new.py --log_type="search" --crawler="xigua" --env="dev" xigua/logs/search-shell.log
-杀进程命令:
-ps aux | grep run_xigua
-ps aux | grep run_xigua | grep -v grep | awk '{print $2}' | xargs kill -9
-ps aux | grep run_xigua_follow | grep -v grep | awk '{print $2}' | xargs kill -9
-ps aux | grep run_xigua_recommend | grep -v grep | awk '{print $2}' | xargs kill -9
-ps aux | grep run_xigua_search | grep -v grep | awk '{print $2}' | xargs kill -9
-```
-
-#### 快手
-```commandline
-阿里云 102 服务器
-sh ./main/main.sh ./kuaishou/kuaishou_main/run_kuaishou_recommend.py --log_type="recommend" --crawler="kuaishou" --strategy="推荐爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" kuaishou/recommend.log
-sh ./main/main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="follow" --crawler="kuaishou" --strategy="定向爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" kuaishou/follow.log
-# sh ./main/main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="follow" --crawler="kuaishou" --strategy="定向爬虫策略" --env="prod" --machine="aliyun" kuaishou/nohup.log
-本机
-sh ./main/main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="follow" --crawler="kuaishou" --strategy="定向爬虫策略" --oss_endpoint="out" --env="dev" --machine="local" kuaishou/nohup.log
-# sh ./main/main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="follow" --crawler="kuaishou" --strategy="定向爬虫策略" --env="dev" --machine="local" kuaishou/nohup.log
-macpro
-sh ./main/main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="follow" --crawler="kuaishou" --strategy="定向爬虫策略" --oss_endpoint="out" --env="prod" --machine="macpro" kuaishou/nohup.log
-# sh ./main/main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="follow" --crawler="kuaishou" --strategy="定向爬虫策略" --env="prod" --machine="macpro" kuaishou/nohup.log
-杀进程命令:
-ps aux | grep run_kuaishou
-ps aux | grep run_kuaishou | grep -v grep | awk '{print $2}' | xargs kill -9
-ps aux | grep Appium.app | grep -v grep | awk '{print $2}' | xargs kill -9
-```
-
-#### 小年糕
-```commandline
-阿里云 102 服务器
-定向爬虫策略: /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_follow.py --log_type="follow" --crawler="xiaoniangao" --env="prod"  xiaoniangao/nohup-follow.log
-小时榜爬虫策略: /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour.py --log_type="hour" --crawler="xiaoniangao" --env="prod" xiaoniangao/nohup-hour.log
-播放量榜爬虫策略: /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_play.py --log_type="play" --crawler="xiaoniangao" --env="prod" xiaoniangao/nohup-play.log
-
-线下调试
-定向爬虫策略: sh main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_follow.py --log_type="follow" --crawler="xiaoniangao" --env="dev" xiaoniangao/logs/nohup-follow.log
-小时榜爬虫策略: sh main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour.py --log_type="hour" --crawler="xiaoniangao" --env="dev" xiaoniangao/logs/nohup-hour.log
-播放量榜爬虫策略: sh main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_play.py --log_type="play" --crawler="xiaoniangao" --env="dev" xiaoniangao/logs/nohup-play.log
-
-nohup python3 -u xiaoniangao/xiaoniangao_follow/insert_video_1.py >> xiaoniangao/nohup-1.log 2>&1 &
-nohup python3 -u xiaoniangao/xiaoniangao_follow/insert_video_2.py >> xiaoniangao/nohup-1.log 2>&1 &
-nohup python3 -u xiaoniangao/xiaoniangao_follow/insert_video_3.py >> xiaoniangao/nohup-1.log 2>&1 &
-
-杀进程命令
-ps aux | grep run_xiaoniangao_follow
-ps aux | grep run_xiaoniangao_hour
-ps aux | grep run_xiaoniangao_play
-ps aux | grep run_xiaoniangao | grep -v grep | awk '{print $2}' | xargs kill -9 
-ps aux | grep run_xiaoniangao_follow | grep -v grep | awk '{print $2}' | xargs kill -9 
-ps aux | grep run_xiaoniangao_hour | grep -v grep | awk '{print $2}' | xargs kill -9 
-ps aux | grep run_xiaoniangao_play | grep -v grep | awk '{print $2}' | xargs kill -9 
-```
-
-#### 公众号
-```commandline
-阿里云 102 服务器
-定向爬虫策略: 
-/usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py --log_type="follow" --crawler="gongzhonghao" --env="prod"  gongzhonghao/nohup-follow.log
-/usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow-2" --crawler="gongzhonghao" --env="prod"  gongzhonghao/nohup-follow-2.log
-/usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_3.py --log_type="follow-3" --crawler="gongzhonghao" --env="prod"  gongzhonghao/nohup-follow-3.log
-线下调试
-定向爬虫策略: 
-sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py --log_type="follow" --crawler="gongzhonghao" --env="dev" gongzhonghao/nohup-follow.log
-sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow-2" --crawler="gongzhonghao" --env="dev" gongzhonghao/nohup-follow-2.log
-sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_3.py --log_type="follow-3" --crawler="gongzhonghao" --env="dev" gongzhonghao/nohup-follow-3.log
-杀进程命令
-ps aux | grep run_gongzhonghao
-ps aux | grep run_gongzhonghao | grep -v grep | awk '{print $2}' | xargs kill -9 
-```
-
 #### 微信指数
 #### 微信指数
 ```commandline
 ```commandline
 获取站外标题, crontab定时脚本, 每天 12:00:00 点运行一次
 获取站外标题, crontab定时脚本, 每天 12:00:00 点运行一次
@@ -165,43 +86,6 @@ ps aux | grep 微信 | grep -v grep | awk '{print $2}' | xargs kill -9
 
 
 ```
 ```
 
 
-#### 抖音
-```commandline
-阿里云 102 服务器
-sh ./main/main.sh ./douyin/douyin_main/run_douyin_recommend.py --log_type="recommend" --crawler="douyin" --strategy="推荐爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" douyin/recommend.log
-# sh ./main/main.sh ./kuaishou/douyin_main/run_douyin_recommend.py --log_type="recommend" --crawler="douyin" --strategy="定向爬策策略" --env="prod" --machine="aliyun" kuaishou/nohup.log
-本机
-
-#### 爬虫进程监测
-```commandline
-阿里云 102 服务器:/usr/bin/sh /data5/piaoquan_crawler/main/process.sh "prod"
-香港 服务器:/usr/bin/sh /root/piaoquan_crawler/main/process.sh "hk"
-线下调试:sh /Users/wangkun/Desktop/crawler/piaoquan_crawler/main/process.sh "dev"
-```
-
-
-#### 本山祝福小程序
-```commandline
-阿里云 102 服务器
-/usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./benshanzhufu/benshanzhufu_main/run_benshanzhufu_recommend.py --log_type="recommend" --crawler="benshanzhufu" --env="prod"  ./benshanzhufu/logs/nohup-recommend.log
-线下调试
-sh ./main/scheduling_main.sh ./benshanzhufu/benshanzhufu_main/run_benshanzhufu_recommend.py --log_type="recommend" --crawler="benshanzhufu" --env="dev"  ./benshanzhufu/logs/nohup-recommend.log
-检测进程
-ps aux | grep run_benshanzhufu
-ps aux | grep run_benshanzhufu | grep -v grep | awk '{print $2}' | xargs kill -9
-```
-
-#### 岁岁年年迎福气小程序
-```commandline
-阿里云 102 服务器
-/usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./suisuiniannianyingfuqi/suisuiniannianyingfuqi_main/run_suisuiniannianyingfuqi_recommend.py --log_type="recommend" --crawler="suisuiniannianyingfuqi" --env="prod"  ./suisuiniannianyingfuqi/logs/nohup-recommend.log
-线下调试
-sh ./main/scheduling_main.sh ./suisuiniannianyingfuqi/suisuiniannianyingfuqi_main/run_suisuiniannianyingfuqi_recommend.py --log_type="recommend" --crawler="suisuiniannianyingfuqi" --env="dev"  ./suisuiniannianyingfuqi/logs/nohup-recommend.log
-检测进程
-ps aux | grep run_suisuiniannianyingfuqi
-ps aux | grep run_suisuiniannianyingfuqi | grep -v grep | awk '{print $2}' | xargs kill -9
-```
-
 #### 线下爬虫: 刚刚都传 / 吉祥幸福 / 知青天天看 / 众妙音信 / wechat_search_key
 #### 线下爬虫: 刚刚都传 / 吉祥幸福 / 知青天天看 / 众妙音信 / wechat_search_key
 ```commandline
 ```commandline
 MacAir 设备, crontab定时任务
 MacAir 设备, crontab定时任务
@@ -228,29 +112,6 @@ ps aux | grep shipinhao_search
 ps aux | grep shipinhao_search | grep -v grep | awk '{print $2}' | xargs kill -9
 ps aux | grep shipinhao_search | grep -v grep | awk '{print $2}' | xargs kill -9
 ```
 ```
 
 
-#### 爬虫进程监控: main/process.sh
-```commandline
-102 服务器: 
-* * * * * /usr/bin/sh /data5/piaoquan_crawler/main/process.sh "prod"  >>/data5/piaoquan_crawler/main/main_logs/run-process.log 2>&1
-线下调试: 
-sh main/process.sh "dev" >> main/main_logs/run-process.log 2>&1
-进程监控
-ps aux | grep search_key_mac | grep -v grep | awk '{print $2}' | xargs kill -9
-ps aux | grep gongzhonghao | grep -v grep | awk '{print $2}' | xargs kill -9
-ps aux | grep xiaoniangao | grep -v grep | awk '{print $2}' | xargs kill -9
-ps aux | grep run_xigua_search | grep -v grep | awk '{print $2}' | xargs kill -9
-ps aux | grep run_suisuiniannianyingfuqi | grep -v grep | awk '{print $2}' | xargs kill -9
-ps aux | grep run_benshanzhufu | grep -v grep | awk '{print $2}' | xargs kill -9
-ps aux | grep run_kuaishou | grep -v grep | awk '{print $2}' | xargs kill -9
-ps aux | grep run_gongzhonghao | grep -v grep | awk '{print $2}' | xargs kill -9
-ps aux | grep run_shipinhao | grep -v grep | awk '{print $2}' | xargs kill -9
-ps aux | grep Appium.app | grep -v grep | awk '{print $2}' | xargs kill -9
-ps aux | grep kuaishou | grep -v grep | awk '{print $2}' | xargs kill -9
-ps aux | grep xigua_search | grep -v grep | awk '{print $2}' | xargs kill -9
-ps aux | grep kanyikan | grep -v grep | awk '{print $2}' | xargs kill -9
-ps aux | grep shipinhao_search | grep -v grep | awk '{print $2}' | xargs kill -9
-```
-
 
 
 #### 调用MQ的爬虫进程守护: main/process_mq.sh
 #### 调用MQ的爬虫进程守护: main/process_mq.sh
 ```commandline
 ```commandline

+ 0 - 28
benshanzhufu/benshanzhufu_main/run_benshanzhufu_recommend.py

@@ -1,28 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/4/13
-import argparse
-import os
-import sys
-sys.path.append(os.getcwd())
-from common.common import Common
-from benshanzhufu.benshanzhufu_recommend.benshanzhufu_recommend import BenshanzhufuRecommend
-
-def main(log_type, crawler, env):
-    if env == "dev":
-        oss_endpoint = "out"
-    else:
-        oss_endpoint = "inner"
-    Common.logger(log_type, crawler).info('开始抓取 本山祝福小程序\n')
-    BenshanzhufuRecommend.get_videoList(log_type, crawler, oss_endpoint, env)
-    Common.del_logs(log_type, crawler)
-    Common.logger(log_type, crawler).info('抓取完一轮,休眠 1 分钟\n')
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
-    parser.add_argument('--crawler')  ## 添加参数
-    parser.add_argument('--env')  ## 添加参数
-    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
-    main(log_type=args.log_type, crawler=args.crawler, env=args.env)

+ 0 - 48
benshanzhufu/benshanzhufu_main/run_benshanzhufu_recommend_scheduling.py

@@ -1,48 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/4/13
-import argparse
-import os
-import random
-import sys
-sys.path.append(os.getcwd())
-from common.common import Common
-from common.public import task_fun
-from common.scheduling_db import MysqlHelper
-from benshanzhufu.benshanzhufu_recommend.benshanzhufu_recommend_scheduling import BenshanzhufuRecommend
-
-
-def main(log_type, crawler, task, env):
-    task_dict = task_fun(task)['task_dict']
-    rule_dict = task_fun(task)['rule_dict']
-    task_id = task_dict['task_id']
-    select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
-    user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
-    our_uid_list = []
-    for user in user_list:
-        our_uid_list.append(user["uid"])
-    our_uid = random.choice(our_uid_list)
-    Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
-    Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
-    Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
-    Common.logger(log_type, crawler).info('开始抓取 本山祝福小程序\n')
-    BenshanzhufuRecommend.get_videoList(log_type=log_type,
-                                        crawler=crawler,
-                                        our_uid=our_uid,
-                                        rule_dict=rule_dict,
-                                        env=env)
-    Common.del_logs(log_type, crawler)
-    Common.logger(log_type, crawler).info('抓取完一轮\n')
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
-    parser.add_argument('--crawler')  ## 添加参数
-    parser.add_argument('--task')  ## 添加参数
-    parser.add_argument('--env')  ## 添加参数
-    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
-    main(log_type=args.log_type,
-         crawler=args.crawler,
-         task=args.task,
-         env=args.env)

+ 0 - 272
benshanzhufu/benshanzhufu_recommend/benshanzhufu_recommend.py

@@ -1,272 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/4/13
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2022/4/25
-import json
-import os
-import random
-import shutil
-import sys
-import time
-from hashlib import md5
-from urllib import parse
-import requests
-import urllib3
-sys.path.append(os.getcwd())
-from common.common import Common
-from common.scheduling_db import MysqlHelper
-from common.feishu import Feishu
-from common.publish import Publish
-proxies = {"http": None, "https": None}
-
-
-class BenshanzhufuRecommend:
-    # 翻页参数
-    visitor_key = ""
-    page = 1
-    platform = "本山祝福"
-
-    # 过滤词库
-    @classmethod
-    def benshanzhufu_config(cls, log_type, crawler, text, env):
-        select_sql = f"""select * from crawler_config where source="benshanzhufu" """
-        contents = MysqlHelper.get_values(log_type, crawler, select_sql, env, action='')
-        title_list = []
-        filter_list = []
-        for content in contents:
-            config = content['config']
-            config_dict = eval(config)
-            for k, v in config_dict.items():
-                if k == "title":
-                    title_list_config = v.split(",")
-                    for title in title_list_config:
-                        title_list.append(title)
-                if k == "filter":
-                    filter_list_config = v.split(",")
-                    for filter_word in filter_list_config:
-                        filter_list.append(filter_word)
-        if text == "title":
-            return title_list
-        elif text == "filter":
-            return filter_list
-
-    @classmethod
-    def repeat_video(cls, log_type, crawler, video_id, env):
-        sql = f""" select * from crawler_video where platform="本山祝福" and out_video_id="{video_id}"; """
-        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
-        return len(repeat_video)
-
-    # 推荐列表获取视频
-    @classmethod
-    def get_videoList(cls, log_type, crawler, oss_endpoint, env):
-        while True:
-            now = int(time.time() * 1000)
-            url = "https://bszf.wentingyou.cn/index.php/v111/index/index?parameter="
-            header = {
-                "content-time": str(now),
-                # "visitorKey": "165086930003741",
-                "chatKey": "wx0fb8149da961d3b0",
-                "cache-time": str(now),
-                "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) "
-                              "AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 "
-                              "MicroMessenger/8.0.20(0x1800142d) NetType/WIFI Language/zh_CN",
-                "Referer": "https://servicewechat.com/wx0fb8149da961d3b0/2/page-frame.html"
-            }
-            parameter = {
-                "page": random.randint(1, 76),
-                "ini_id": cls.visitor_key
-            }
-            params = parse.quote(json.dumps(parameter))
-            url = url + str(params)
-            # try:
-            urllib3.disable_warnings()
-            r = requests.get(headers=header, url=url, proxies=proxies, verify=False)
-            if r.status_code != 200:
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.status_code}, {r.text}\n")
-                cls.visitor_key = ""
-                cls.page = 1
-                return
-            elif r.json()['message'] != "list success":
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.status_code}, {r.json()}\n")
-                cls.visitor_key = ""
-                cls.page = 1
-                return
-            elif "data" not in r.json():
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.status_code}, {r.json()}\n")
-                cls.visitor_key = ""
-                cls.page = 1
-                return
-            elif len(r.json()['data']["list"]) == 0:
-                Common.logger(log_type, crawler).info(f"没有更多数据了~ {r.json()}\n")
-                cls.visitor_key = ""
-                cls.page = 1
-                return
-            else:
-                # 翻页
-                cls.visitor_key = r.json()["data"]["visitor_key"]
-                cls.page += 1
-                feeds = r.json()["data"]["list"]
-                for i in range(len(feeds)):
-                    video_title = feeds[i].get("title", "").strip().replace("\n", "")\
-                            .replace("/", "").replace("本山祝福", "").replace(" ", "")\
-                            .replace(" ", "").replace("&NBSP", "").replace("\r", "")\
-                            .replace("#", "").replace(".", "。").replace("\\", "")\
-                            .replace(":", "").replace("*", "").replace("?", "")\
-                            .replace("?", "").replace('"', "").replace("<", "")\
-                            .replace(">", "").replace("|", "").replace("'", "").replace('"', "")
-                    video_id = str(feeds[i].get("nid", ""))
-                    play_cnt = 0
-                    comment_cnt = feeds[i].get("commentCount", 0)
-                    share_cnt = 0
-                    like_cnt = 0
-                    publish_time_stamp = feeds[i].get("update_time", 0)
-                    publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
-                    user_name = "本山祝福"
-                    user_id = "benshanzhufu"
-                    cover_url = feeds[i].get("video_cover", "")
-                    video_url = feeds[i].get("video_url", "")
-                    if ".mp4" not in video_url:
-                        video_url = ""
-
-                    video_dict = {
-                        'video_title': video_title,
-                        'video_id': video_id,
-                        'play_cnt': play_cnt,
-                        'comment_cnt': comment_cnt,
-                        'like_cnt': like_cnt,
-                        'share_cnt': share_cnt,
-                        'publish_time_stamp': publish_time_stamp,
-                        'publish_time_str': publish_time_str,
-                        'user_name': user_name,
-                        'user_id': user_id,
-                        'avatar_url': cover_url,
-                        'cover_url': cover_url,
-                        'video_url': video_url,
-                        'session': f"benshanzhufu-{int(time.time())}"
-                    }
-                    for k, v in video_dict.items():
-                        Common.logger(log_type, crawler).info(f"{k}:{v}")
-
-                    # 过滤无效视频
-                    if video_id == "" or cover_url == "" or video_url == "":
-                        Common.logger(log_type, crawler).info("无效视频\n")
-                    elif any(str(word) if str(word) in video_title else False for word in cls.benshanzhufu_config(log_type, crawler, "filter", env)) is True:
-                        Common.logger(log_type, crawler).info('已中过滤词\n')
-                    elif cls.repeat_video(log_type, crawler, video_id, env) != 0:
-                        Common.logger(log_type, crawler).info('视频已下载\n')
-                    else:
-                        cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
-            # except Exception as e:
-            #     Common.logger(log_type, crawler).error(f"get_videoList异常:{e}\n")
-
-    # 下载 / 上传
-    @classmethod
-    def download_publish(cls, log_type, crawler, video_dict, oss_endpoint, env):
-        # try:
-        # 下载视频
-        Common.download_method(log_type=log_type, crawler=crawler, text='video', title=video_dict['video_title'], url=video_dict['video_url'])
-        ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
-        if ffmpeg_dict is None:
-            md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
-            shutil.rmtree(f"./{crawler}/videos/{md_title}/")
-            Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
-            return
-        video_dict["duration"] = ffmpeg_dict["duration"]
-        video_dict["video_width"] = ffmpeg_dict["width"]
-        video_dict["video_height"] = ffmpeg_dict["height"]
-
-        # 下载封面
-        Common.download_method(log_type=log_type, crawler=crawler, text='cover', title=video_dict['video_title'], url=video_dict['cover_url'])
-        # 保存视频信息至txt
-        Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
-
-        # 上传视频
-        Common.logger(log_type, crawler).info("开始上传视频...")
-        our_video_id = Publish.upload_and_publish(log_type=log_type,
-                                                  crawler=crawler,
-                                                  strategy="推荐榜爬虫策略",
-                                                  our_uid="recommend",
-                                                  env=env,
-                                                  oss_endpoint=oss_endpoint)
-        if env == 'dev':
-            our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-        else:
-            our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-        Common.logger(log_type, crawler).info("视频上传完成")
-
-        if our_video_id is None:
-            # 删除视频文件夹
-            shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
-            return
-
-        # 视频写入飞书
-        Feishu.insert_columns(log_type, crawler, "440018", "ROWS", 1, 2)
-        upload_time = int(time.time())
-        values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
-                   "推荐榜爬虫策略",
-                   video_dict['video_id'],
-                   video_dict['video_title'],
-                   our_video_link,
-                   video_dict['play_cnt'],
-                   video_dict['comment_cnt'],
-                   video_dict['like_cnt'],
-                   video_dict['share_cnt'],
-                   video_dict['duration'],
-                   f"{video_dict['video_width']}*{video_dict['video_height']}",
-                   video_dict['publish_time_str'],
-                   video_dict['user_name'],
-                   video_dict['user_id'],
-                   video_dict['avatar_url'],
-                   video_dict['cover_url'],
-                   video_dict['video_url']]]
-        time.sleep(0.5)
-        Feishu.update_values(log_type, crawler, "440018", "E2:Z2", values)
-        Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
-
-        rule_dict = {}
-        # 视频信息保存数据库
-        insert_sql = f""" insert into crawler_video(video_id,
-                                                out_user_id,
-                                                platform,
-                                                strategy,
-                                                out_video_id,
-                                                video_title,
-                                                cover_url,
-                                                video_url,
-                                                duration,
-                                                publish_time,
-                                                play_cnt,
-                                                crawler_rule,
-                                                width,
-                                                height)
-                                                values({our_video_id},
-                                                "{video_dict['user_id']}",
-                                                "{cls.platform}",
-                                                "推荐榜爬虫策略",
-                                                "{video_dict['video_id']}",
-                                                "{video_dict['video_title']}",
-                                                "{video_dict['cover_url']}",
-                                                "{video_dict['video_url']}",
-                                                {int(video_dict['duration'])},
-                                                "{video_dict['publish_time_str']}",
-                                                {int(video_dict['play_cnt'])},
-                                                '{json.dumps(rule_dict)}',
-                                                {int(video_dict['video_width'])},
-                                                {int(video_dict['video_height'])}) """
-        Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
-        MysqlHelper.update_values(log_type, crawler, insert_sql, env, action='')
-        Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
-
-        # except Exception as e:
-        #     Common.logger(log_type, crawler).error(f"download_publish异常:{e}\n")
-        #     # 删除视频文件夹
-        #     shutil.rmtree(f"./{crawler}/videos/")
-        #     return
-
-
-if __name__ == "__main__":
-    BenshanzhufuRecommend.get_videoList("recommend", "benshanzhufu", "out", "dev")
-
-    pass

+ 2 - 4
common/public.py

@@ -232,11 +232,9 @@ def get_title_score(log_type, crawler, stop_sheet, score_sheet, title):
                     stop_word_list.append(y)
                     stop_word_list.append(y)
         break
         break
 
 
-    # 将文本分词
-    cut_list = jieba.lcut(title)
-
-    # 生成分词列表
+    # 文本分词
     cut_word_list = []
     cut_word_list = []
+    cut_list = jieba.lcut(title)
     for cut_item in cut_list:
     for cut_item in cut_list:
         if cut_item == " ":
         if cut_item == " ":
             continue
             continue

+ 0 - 3
kuaishou/kuaishou_follow/__init__.py

@@ -1,3 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/2/23

+ 0 - 659
kuaishou/kuaishou_follow/kuaishou_follow.py

@@ -1,659 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/2/24
-import os
-# import random
-import shutil
-# import string
-import sys
-import time
-from hashlib import md5
-
-import requests
-import json
-
-import urllib3
-from requests.adapters import HTTPAdapter
-
-sys.path.append(os.getcwd())
-from common.common import Common
-from common.feishu import Feishu
-from common.getuser import getUser
-from common.db import MysqlHelper
-from common.publish import Publish
-from common.public import random_title, get_config_from_mysql
-from common.public import get_user_from_mysql
-
-
-class KuaiShouFollow:
-    platform = "快手"
-    tag = "快手爬虫,定向爬虫策略"
-
-    @classmethod
-    def get_rule(cls, log_type, crawler, index):
-        try:
-            rule_sheet = Feishu.get_values_batch(log_type, crawler, "3iqG4z")
-            if index == 1:
-                rule_dict = {
-                    "play_cnt": f"{rule_sheet[1][1]}{rule_sheet[1][2]}",
-                    "video_width": f"{rule_sheet[2][1]}{rule_sheet[2][2]}",
-                    "video_height": f"{rule_sheet[3][1]}{rule_sheet[3][2]}",
-                    "like_cnt": f"{rule_sheet[4][1]}{rule_sheet[4][2]}",
-                    "duration": f"{rule_sheet[5][1]}{rule_sheet[5][2]}",
-                    "download_cnt": f"{rule_sheet[6][1]}{rule_sheet[6][2]}",
-                    "publish_time": f"{rule_sheet[7][1]}{rule_sheet[7][2]}",
-                }
-                # for k, v in rule_dict.items():
-                #     Common.logger(log_type, crawler).info(f"{k}:{v}")
-                return rule_dict
-            elif index == 2:
-                rule_dict = {
-                    "play_cnt": f"{rule_sheet[9][1]}{rule_sheet[9][2]}",
-                    "video_width": f"{rule_sheet[10][1]}{rule_sheet[10][2]}",
-                    "video_height": f"{rule_sheet[11][1]}{rule_sheet[11][2]}",
-                    "like_cnt": f"{rule_sheet[12][1]}{rule_sheet[12][2]}",
-                    "duration": f"{rule_sheet[13][1]}{rule_sheet[13][2]}",
-                    "download_cnt": f"{rule_sheet[14][1]}{rule_sheet[14][2]}",
-                    "publish_time": f"{rule_sheet[15][1]}{rule_sheet[15][2]}",
-                }
-                # for k, v in rule_dict.items():
-                #     Common.logger(log_type, crawler).info(f"{k}:{v}")
-                return rule_dict
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"get_rule:{e}\n")
-
-    @classmethod
-    def download_rule(cls, video_dict, rule_dict):
-        if eval(f"{video_dict['play_cnt']}{rule_dict['play_cnt']}") is True \
-                and eval(f"{video_dict['video_width']}{rule_dict['video_width']}") is True \
-                and eval(f"{video_dict['video_height']}{rule_dict['video_height']}") is True \
-                and eval(f"{video_dict['like_cnt']}{rule_dict['like_cnt']}") is True \
-                and eval(f"{video_dict['duration']}{rule_dict['duration']}") is True \
-                and eval(f"{video_dict['publish_time']}{rule_dict['publish_time']}") is True:
-            return True
-        else:
-            return False
-
-    # 过滤词库
-    @classmethod
-    def filter_words(cls, log_type, crawler):
-        try:
-            while True:
-                filter_words_sheet = Feishu.get_values_batch(log_type, crawler, 'HIKVvs')
-                if filter_words_sheet is None:
-                    Common.logger(log_type, crawler).warning(f"filter_words_sheet:{filter_words_sheet} 10秒钟后重试")
-                    continue
-                filter_words_list = []
-                for x in filter_words_sheet:
-                    for y in x:
-                        if y is None:
-                            pass
-                        else:
-                            filter_words_list.append(y)
-                return filter_words_list
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
-
-    # 获取站外用户信息
-    @classmethod
-    def get_out_user_info(cls, log_type, crawler, out_uid):
-        try:
-            url = "https://www.kuaishou.com/graphql"
-
-            payload = json.dumps({
-                "operationName": "visionProfile",
-                "variables": {
-                    "userId": out_uid
-                },
-                "query": "query visionProfile($userId: String) {\n  visionProfile(userId: $userId) {\n    result\n    hostName\n    userProfile {\n      ownerCount {\n        fan\n        photo\n        follow\n        photo_public\n        __typename\n      }\n      profile {\n        gender\n        user_name\n        user_id\n        headurl\n        user_text\n        user_profile_bg_url\n        __typename\n      }\n      isFollowing\n      __typename\n    }\n    __typename\n  }\n}\n"
-            })
-            # s = string.ascii_lowercase
-            # r = random.choice(s)
-            headers = {
-                'Accept': '*/*',
-                'Content-Type': 'application/json',
-                'Origin': 'https://www.kuaishou.com',
-                'Cookie': f'kpf=PC_WEB; clientid=3; did={cls.get_did(log_type, crawler)}; kpn=KUAISHOU_VISION',
-                'Content-Length': '552',
-                'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
-                'Host': 'www.kuaishou.com',
-                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15',
-                'Referer': 'https://www.kuaishou.com/profile/{}'.format(out_uid),
-                'Accept-Encoding': 'gzip, deflate, br',
-                'Connection': 'keep-alive'
-            }
-            urllib3.disable_warnings()
-            s = requests.session()
-            # max_retries=3 重试3次
-            s.mount('http://', HTTPAdapter(max_retries=3))
-            s.mount('https://', HTTPAdapter(max_retries=3))
-            response = s.post(url=url, headers=headers, data=payload, proxies=Common.tunnel_proxies(), verify=False,
-                              timeout=5)
-            response.close()
-            # Common.logger(log_type, crawler).info(f"get_out_user_info_response:{response.text}")
-            if response.status_code != 200:
-                Common.logger(log_type, crawler).warning(f"get_out_user_info_response:{response.text}\n")
-                return
-            elif 'data' not in response.json():
-                Common.logger(log_type, crawler).warning(f"get_out_user_info_response:{response.json()}\n")
-                return
-            elif 'visionProfile' not in response.json()['data']:
-                Common.logger(log_type, crawler).warning(f"get_out_user_info_response:{response.json()['data']}\n")
-                return
-            elif 'userProfile' not in response.json()['data']['visionProfile']:
-                Common.logger(log_type, crawler).warning(
-                    f"get_out_user_info_response:{response.json()['data']['visionProfile']['userProfile']}\n")
-                return
-            else:
-                userProfile = response.json()['data']['visionProfile']['userProfile']
-                # Common.logger(log_type, crawler).info(f"userProfile:{userProfile}")
-
-                try:
-                    out_fans_str = str(userProfile['ownerCount']['fan'])
-                except Exception:
-                    out_fans_str = "0"
-
-                try:
-                    out_follow_str = str(userProfile['ownerCount']['follow'])
-                except Exception:
-                    out_follow_str = "0"
-
-                try:
-                    out_avatar_url = userProfile['profile']['headurl']
-                except Exception:
-                    out_avatar_url = ""
-
-                Common.logger(log_type, crawler).info(f"out_fans_str:{out_fans_str}")
-                Common.logger(log_type, crawler).info(f"out_follow_str:{out_follow_str}")
-                Common.logger(log_type, crawler).info(f"out_avatar_url:{out_avatar_url}")
-
-                if "万" in out_fans_str:
-                    out_fans = int(float(out_fans_str.split("万")[0]) * 10000)
-                else:
-                    out_fans = int(out_fans_str.replace(",", ""))
-                if "万" in out_follow_str:
-                    out_follow = int(float(out_follow_str.split("万")[0]) * 10000)
-                else:
-                    out_follow = int(out_follow_str.replace(",", ""))
-
-                out_user_dict = {
-                    "out_fans": out_fans,
-                    "out_follow": out_follow,
-                    "out_avatar_url": out_avatar_url
-                }
-                Common.logger(log_type, crawler).info(f"out_user_dict:{out_user_dict}")
-                return out_user_dict
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"get_out_user_info:{e}\n")
-
-    # 获取用户信息列表
-    @classmethod
-    def get_user_list(cls, log_type, crawler, sheetid, env, machine):
-        try:
-            while True:
-                user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
-                if user_sheet is None:
-                    Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet} 10秒钟后重试")
-                    continue
-                our_user_list = []
-                for i in range(1, len(user_sheet)):
-                    # for i in range(1, 2):
-                    out_uid = user_sheet[i][2]
-                    user_name = user_sheet[i][3]
-                    our_uid = user_sheet[i][6]
-                    our_user_link = user_sheet[i][7]
-                    if out_uid is None or user_name is None:
-                        Common.logger(log_type, crawler).info("空行\n")
-                    else:
-                        Common.logger(log_type, crawler).info(f"正在更新 {user_name} 用户信息\n")
-                        if our_uid is None:
-                            out_user_info = cls.get_out_user_info(log_type, crawler, out_uid)
-                            out_user_dict = {
-                                "out_uid": out_uid,
-                                "user_name": user_name,
-                                "out_avatar_url": out_user_info["out_avatar_url"],
-                                "out_create_time": '',
-                                "out_tag": '',
-                                "out_play_cnt": 0,
-                                "out_fans": out_user_info["out_fans"],
-                                "out_follow": out_user_info["out_follow"],
-                                "out_friend": 0,
-                                "out_like": 0,
-                                "platform": cls.platform,
-                                "tag": cls.tag,
-                            }
-                            our_user_dict = getUser.create_user(log_type=log_type, crawler=crawler,
-                                                                out_user_dict=out_user_dict, env=env, machine=machine)
-                            our_uid = our_user_dict['our_uid']
-                            our_user_link = our_user_dict['our_user_link']
-                            Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}',
-                                                 [[our_uid, our_user_link]])
-                            Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功!\n')
-                            our_user_list.append(our_user_dict)
-                        else:
-                            our_user_dict = {
-                                'out_uid': out_uid,
-                                'user_name': user_name,
-                                'our_uid': our_uid,
-                                'our_user_link': our_user_link,
-                            }
-                            our_user_list.append(our_user_dict)
-                return our_user_list
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f'get_user_list:{e}\n')
-
-    # 处理视频标题
-    @classmethod
-    def video_title(cls, log_type, crawler, env, title):
-        title_split1 = title.split(" #")
-        if title_split1[0] != "":
-            title1 = title_split1[0]
-        else:
-            title1 = title_split1[-1]
-
-        title_split2 = title1.split(" #")
-        if title_split2[0] != "":
-            title2 = title_split2[0]
-        else:
-            title2 = title_split2[-1]
-
-        title_split3 = title2.split("@")
-        if title_split3[0] != "":
-            title3 = title_split3[0]
-        else:
-            title3 = title_split3[-1]
-
-        video_title = title3.strip().replace("\n", "") \
-                          .replace("/", "").replace("快手", "").replace(" ", "") \
-                          .replace(" ", "").replace("&NBSP", "").replace("\r", "") \
-                          .replace("#", "").replace(".", "。").replace("\\", "") \
-                          .replace(":", "").replace("*", "").replace("?", "") \
-                          .replace("?", "").replace('"', "").replace("<", "") \
-                          .replace(">", "").replace("|", "").replace("@", "").replace('"', '').replace("'", '')[:40]
-        if video_title.replace(" ", "") == "" or video_title == "。。。" or video_title == "...":
-            return random_title(log_type, crawler, env, text='title')
-        else:
-            return video_title
-
-    @classmethod
-    def get_did(cls, log_type, crawler):
-        while True:
-            did_sheet = Feishu.get_values_batch(log_type, crawler, "G7acT6")
-            if did_sheet is None:
-                Common.logger(log_type, crawler).warning(f"did_sheet:{did_sheet}")
-                time.sleep(2)
-                continue
-            return did_sheet[0][1]
-
-    @classmethod
-    def get_videoList(cls, log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine, pcursor=""):
-        download_cnt_1, download_cnt_2 = 0, 0
-        rule_dict_1 = cls.get_rule(log_type, crawler, 1)
-        rule_dict_2 = cls.get_rule(log_type, crawler, 2)
-        if rule_dict_1 is None or rule_dict_2 is None:
-            Common.logger(log_type, crawler).warning(f"rule_dict is None")
-            return
-
-        url = "https://www.kuaishou.com/graphql"
-        payload = json.dumps({
-            "operationName": "visionProfilePhotoList",
-            "variables": {
-                "userId": out_uid,
-                "pcursor": "",
-                "page": "profile"
-            },
-            "query": "fragment photoContent on PhotoEntity {\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  commentCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n  __typename\n}\n\nfragment feedContent on Feed {\n  type\n  author {\n    id\n    name\n    headerUrl\n    following\n    headerUrls {\n      url\n      __typename\n    }\n    __typename\n  }\n  photo {\n    ...photoContent\n    __typename\n  }\n  canAddComment\n  llsid\n  status\n  currentPcursor\n  tags {\n    type\n    name\n    __typename\n  }\n  __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n  visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      ...feedContent\n      __typename\n    }\n    hostName\n    pcursor\n    __typename\n  }\n}\n"
-        })
-        headers = {
-            'Accept': '*/*',
-            'Content-Type': 'application/json',
-            'Origin': 'https://www.kuaishou.com',
-            'Cookie': f'kpf=PC_WEB; clientid=3; did={cls.get_did(log_type, crawler)}; kpn=KUAISHOU_VISION',
-            'Content-Length': '1260',
-            'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
-            'Host': 'www.kuaishou.com',
-            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15',
-            'Referer': 'https://www.kuaishou.com/profile/{}'.format(out_uid),
-            'Accept-Encoding': 'gzip, deflate, br',
-            'Connection': 'keep-alive'
-        }
-        response = requests.post(url=url, headers=headers, data=payload, proxies=Common.tunnel_proxies(),
-                                 verify=False, timeout=10)
-        try:
-            feeds = response.json()['data']['visionProfilePhotoList']['feeds']
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"get_videoList:{e},response:{response.text}")
-            return
-        if not feeds:
-            Common.logger(log_type, crawler).info("没有更多视频啦 ~\n")
-            return
-        pcursor = response.json()['data']['visionProfilePhotoList']['pcursor']
-        # Common.logger(log_type, crawler).info(f"feeds0: {feeds}\n")
-        for i in range(len(feeds)):
-            try:
-                # video_title
-                if 'caption' not in feeds[i]['photo']:
-                    video_title = random_title(log_type, crawler, env, text='title')
-                elif feeds[i]['photo']['caption'].strip() == "":
-                    video_title = random_title(log_type, crawler, env, text='title')
-                else:
-                    video_title = cls.video_title(log_type, crawler, env, feeds[i]['photo']['caption'])
-
-                if 'videoResource' not in feeds[i]['photo'] \
-                        and 'manifest' not in feeds[i]['photo'] \
-                        and 'manifestH265' not in feeds[i]['photo']:
-                    Common.logger(log_type, crawler).warning(f"get_videoList:{feeds[i]['photo']}\n")
-                    break
-                videoResource = feeds[i]['photo']['videoResource']
-
-                if 'h264' not in videoResource and 'hevc' not in videoResource:
-                    Common.logger(log_type, crawler).warning(f"get_videoList:{videoResource}\n")
-                    break
-
-                # video_id
-                if 'h264' in videoResource and 'videoId' in videoResource['h264']:
-                    video_id = videoResource['h264']['videoId']
-                elif 'hevc' in videoResource and 'videoId' in videoResource['hevc']:
-                    video_id = videoResource['hevc']['videoId']
-                else:
-                    video_id = ""
-
-                # play_cnt
-                if 'viewCount' not in feeds[i]['photo']:
-                    play_cnt = 0
-                else:
-                    play_cnt = int(feeds[i]['photo']['viewCount'])
-
-                # like_cnt
-                if 'realLikeCount' not in feeds[i]['photo']:
-                    like_cnt = 0
-                else:
-                    like_cnt = feeds[i]['photo']['realLikeCount']
-
-                # publish_time
-                if 'timestamp' not in feeds[i]['photo']:
-                    publish_time_stamp = 0
-                    publish_time_str = ''
-                    publish_time = 0
-                else:
-                    publish_time_stamp = int(int(feeds[i]['photo']['timestamp']) / 1000)
-                    publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
-                    publish_time = int((int(time.time()) - publish_time_stamp) / (3600 * 24))
-
-                # duration
-                if 'duration' not in feeds[i]['photo']:
-                    duration = 0
-                else:
-                    duration = int(int(feeds[i]['photo']['duration']) / 1000)
-
-                # video_width / video_height / video_url
-                mapping = {}
-                for item in ['width', 'height']:
-                    try:
-                        val = str(videoResource['h264']['adaptationSet'][0]['representation'][0][item])
-                    except:
-                        val = str(videoResource['hevc']['adaptationSet'][0]['representation'][0][item])
-                    mapping[item] = val
-                video_width = int(mapping['width']) if mapping['width'] else 0
-                video_height = int(mapping['height']) if mapping['height'] else 0
-                # cover_url
-                if 'coverUrl' not in feeds[i]['photo']:
-                    cover_url = ""
-                else:
-                    cover_url = feeds[i]['photo']['coverUrl']
-
-                # user_name / avatar_url
-                user_name = feeds[i]['author']['name']
-                avatar_url = feeds[i]['author']['headerUrl']
-
-                video_url = feeds[i]['photo']['photoUrl']
-                video_dict = {'video_title': video_title,
-                              'video_id': video_id,
-                              'play_cnt': play_cnt,
-                              'comment_cnt': 0,
-                              'like_cnt': like_cnt,
-                              'share_cnt': 0,
-                              'video_width': video_width,
-                              'video_height': video_height,
-                              'duration': duration,
-                              'publish_time': publish_time,
-                              'publish_time_stamp': publish_time_stamp,
-                              'publish_time_str': publish_time_str,
-                              'user_name': user_name,
-                              'user_id': out_uid,
-                              'avatar_url': avatar_url,
-                              'cover_url': cover_url,
-                              'video_url': video_url,
-                              'session': f"kuaishou{int(time.time())}"}
-
-                rule_1 = cls.download_rule(video_dict, rule_dict_1)
-                Common.logger(log_type, crawler).info(f"video_title:{video_title}")
-                Common.logger(log_type, crawler).info(f"video_id:{video_id}\n")
-
-                Common.logger(log_type, crawler).info(
-                    f"play_cnt:{video_dict['play_cnt']}{rule_dict_1['play_cnt']}, {eval(str(video_dict['play_cnt']) + str(rule_dict_1['play_cnt']))}")
-                Common.logger(log_type, crawler).info(
-                    f"like_cnt:{video_dict['like_cnt']}{rule_dict_1['like_cnt']}, {eval(str(video_dict['like_cnt']) + str(rule_dict_1['like_cnt']))}")
-                Common.logger(log_type, crawler).info(
-                    f"video_width:{video_dict['video_width']}{rule_dict_1['video_width']}, {eval(str(video_dict['video_width']) + str(rule_dict_1['video_width']))}")
-                Common.logger(log_type, crawler).info(
-                    f"video_height:{video_dict['video_height']}{rule_dict_1['video_height']}, {eval(str(video_dict['video_height']) + str(rule_dict_1['video_height']))}")
-                Common.logger(log_type, crawler).info(
-                    f"duration:{video_dict['duration']}{rule_dict_1['duration']}, {eval(str(video_dict['duration']) + str(rule_dict_1['duration']))}")
-                Common.logger(log_type, crawler).info(
-                    f"publish_time:{video_dict['publish_time']}{rule_dict_1['publish_time']}, {eval(str(video_dict['publish_time']) + str(rule_dict_1['publish_time']))}")
-                Common.logger(log_type, crawler).info(f"rule_1:{rule_1}\n")
-
-                rule_2 = cls.download_rule(video_dict, rule_dict_2)
-                Common.logger(log_type, crawler).info(
-                    f"play_cnt:{video_dict['play_cnt']}{rule_dict_2['play_cnt']}, {eval(str(video_dict['play_cnt']) + str(rule_dict_2['play_cnt']))}")
-                Common.logger(log_type, crawler).info(
-                    f"like_cnt:{video_dict['like_cnt']}{rule_dict_2['like_cnt']}, {eval(str(video_dict['like_cnt']) + str(rule_dict_2['like_cnt']))}")
-                Common.logger(log_type, crawler).info(
-                    f"video_width:{video_dict['video_width']}{rule_dict_2['video_width']}, {eval(str(video_dict['video_width']) + str(rule_dict_2['video_width']))}")
-                Common.logger(log_type, crawler).info(
-                    f"video_height:{video_dict['video_height']}{rule_dict_2['video_height']}, {eval(str(video_dict['video_height']) + str(rule_dict_2['video_height']))}")
-                Common.logger(log_type, crawler).info(
-                    f"duration:{video_dict['duration']}{rule_dict_2['duration']}, {eval(str(video_dict['duration']) + str(rule_dict_2['duration']))}")
-                Common.logger(log_type, crawler).info(
-                    f"publish_time:{video_dict['publish_time']}{rule_dict_2['publish_time']}, {eval(str(video_dict['publish_time']) + str(rule_dict_2['publish_time']))}")
-                Common.logger(log_type, crawler).info(f"rule_2:{rule_2}\n")
-
-                if video_title == "" or video_url == "":
-                    Common.logger(log_type, crawler).info("无效视频\n")
-                    continue
-                elif rule_1 is True:
-                    if download_cnt_1 < int(
-                            rule_dict_1['download_cnt'].replace("=", "")[-1].replace("<", "")[-1].replace(">",
-                                                                                                          "")[
-                                -1]):
-                        cls.download_publish(log_type=log_type,
-                                             crawler=crawler,
-                                             strategy=strategy,
-                                             video_dict=video_dict,
-                                             rule_dict=rule_dict_1,
-                                             our_uid=our_uid,
-                                             oss_endpoint=oss_endpoint,
-                                             env=env,
-                                             machine=machine)
-                        # if download_finished is True:
-                        #     download_cnt_1 += 1
-                elif rule_2 is True:
-                    if download_cnt_2 < int(
-                            rule_dict_2['download_cnt'].replace("=", "")[-1].replace("<", "")[-1].replace(">",
-                                                                                                          "")[
-                                -1]):
-                        cls.download_publish(log_type=log_type,
-                                             crawler=crawler,
-                                             strategy=strategy,
-                                             video_dict=video_dict,
-                                             rule_dict=rule_dict_2,
-                                             our_uid=our_uid,
-                                             oss_endpoint=oss_endpoint,
-                                             env=env,
-                                             machine=machine)
-                        # if download_finished is True:
-                        #     download_cnt_2 += 1
-                else:
-                    Common.logger(log_type, crawler).info("不满足下载规则\n")
-                    # Common.logger(log_type, crawler).info(f"feeds: {feeds}\n")
-            except Exception as e:
-                Common.logger(log_type, crawler).warning(f"抓取单条视频异常:{e}\n")
-
-            # if pcursor == "no_more":
-            #     Common.logger(log_type, crawler).info(f"作者,{out_uid},已经到底了,没有更多内容了\n")
-            #     return
-            # cls.get_videoList(log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine,
-            #               pcursor=pcursor)
-            # time.sleep(random.randint(1, 3))
-
-    @classmethod
-    def repeat_video(cls, log_type, crawler, video_id, video_title, publish_time, env, machine):
-        sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{video_id}" or (platform="{cls.platform}" and video_title="{video_title}" and publish_time="{publish_time}") """
-        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env, machine)
-        return len(repeat_video)
-
-    @classmethod
-    def download_publish(cls, log_type, crawler, strategy, video_dict, rule_dict, our_uid, oss_endpoint, env, machine):
-        filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
-        for filter_word in filter_words:
-            if filter_word in video_dict['video_title']:
-                Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
-                return
-        download_finished = False
-        if cls.repeat_video(log_type, crawler, video_dict['video_id'], video_dict['video_title'],
-                            video_dict['publish_time_str'], env, machine) != 0:
-            Common.logger(log_type, crawler).info('视频已下载\n')
-        else:
-            # 下载视频
-            Common.download_method(log_type=log_type, crawler=crawler, text='video',
-                                   title=video_dict['video_title'], url=video_dict['video_url'])
-            md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
-            try:
-                if os.path.getsize(f"./{crawler}/videos/{md_title}/video.mp4") == 0:
-                    # 删除视频文件夹
-                    shutil.rmtree(f"./{crawler}/videos/{md_title}")
-                    Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
-                    return
-            except FileNotFoundError:
-                # 删除视频文件夹
-                shutil.rmtree(f"./{crawler}/videos/{md_title}")
-                Common.logger(log_type, crawler).info("未发现视频文件,删除成功\n")
-                return
-
-            # 下载封面
-            Common.download_method(log_type=log_type, crawler=crawler, text='cover',
-                                   title=video_dict['video_title'], url=video_dict['cover_url'])
-            # 保存视频信息至txt
-            Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
-
-            # 上传视频
-            Common.logger(log_type, crawler).info("开始上传视频...")
-            our_video_id = Publish.upload_and_publish(log_type=log_type,
-                                                      crawler=crawler,
-                                                      strategy=strategy,
-                                                      our_uid=our_uid,
-                                                      env=env,
-                                                      oss_endpoint=oss_endpoint)
-            if env == 'dev':
-                our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-            else:
-                our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-            Common.logger(log_type, crawler).info("视频上传完成")
-
-            if our_video_id is None:
-                try:
-                    Common.logger(log_type, crawler).warning(f"our_video_id:{our_video_id} 删除该视频文件夹")
-                    # 删除视频文件夹
-                    shutil.rmtree(f"./{crawler}/videos/{md_title}")
-                    return download_finished
-                except FileNotFoundError:
-                    return download_finished
-
-            # 视频信息保存数据库
-            insert_sql = f""" insert into crawler_video(video_id,
-                                                    user_id,
-                                                    out_user_id,
-                                                    platform,
-                                                    strategy,
-                                                    out_video_id,
-                                                    video_title,
-                                                    cover_url,
-                                                    video_url,
-                                                    duration,
-                                                    publish_time,
-                                                    play_cnt,
-                                                    crawler_rule,
-                                                    width,
-                                                    height)
-                                                    values({our_video_id},
-                                                    {our_uid},
-                                                    "{video_dict['user_id']}",
-                                                    "{cls.platform}",
-                                                    "定向爬虫策略",
-                                                    "{video_dict['video_id']}",
-                                                    "{video_dict['video_title']}",
-                                                    "{video_dict['cover_url']}",
-                                                    "{video_dict['video_url']}",
-                                                    {int(video_dict['duration'])},
-                                                    "{video_dict['publish_time_str']}",
-                                                    {int(video_dict['play_cnt'])},
-                                                    '{json.dumps(rule_dict)}',
-                                                    {int(video_dict['video_width'])},
-                                                    {int(video_dict['video_height'])}) """
-            Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
-            MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine)
-            Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
-
-            # 视频写入飞书
-            Feishu.insert_columns(log_type, 'kuaishou', "fYdA8F", "ROWS", 1, 2)
-            upload_time = int(time.time())
-            values = [[our_video_id,
-                       time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
-                       "定向榜",
-                       str(video_dict['video_id']),
-                       video_dict['video_title'],
-                       our_video_link,
-                       video_dict['play_cnt'],
-                       video_dict['comment_cnt'],
-                       video_dict['like_cnt'],
-                       video_dict['share_cnt'],
-                       video_dict['duration'],
-                       f"{video_dict['video_width']}*{video_dict['video_height']}",
-                       video_dict['publish_time_str'],
-                       video_dict['user_name'],
-                       video_dict['user_id'],
-                       video_dict['avatar_url'],
-                       video_dict['cover_url'],
-                       video_dict['video_url']]]
-            time.sleep(1)
-            Feishu.update_values(log_type, 'kuaishou', "fYdA8F", "E2:Z2", values)
-            Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
-            download_finished = True
-        return download_finished
-
-    @classmethod
-    def get_follow_videos(cls, log_type, crawler, strategy, oss_endpoint, env, machine):
-        user_list = get_user_from_mysql(log_type, crawler, crawler, env)
-        for user in user_list:
-            try:
-                spider_link = user["link"]
-                out_uid = spider_link.split('/')[-1]
-                user_name = user["nick_name"]
-                our_uid = user["uid"]
-                Common.logger(log_type, crawler).info(f"开始抓取 {user_name} 用户主页视频\n")
-                cls.get_videoList(log_type=log_type,
-                                  crawler=crawler,
-                                  strategy=strategy,
-                                  our_uid=our_uid,
-                                  out_uid=out_uid,
-                                  oss_endpoint=oss_endpoint,
-                                  env=env,
-                                  machine=machine)
-            except Exception as e:
-                Common.logger(log_type, crawler).warning(f"抓取用户{user}时异常:{e}\n")
-
-
-if __name__ == "__main__":
-    print(KuaiShouFollow.get_did("follow", "kuaishou"))
-    pass

+ 1 - 1
kuaishou/kuaishou_main/run_ks_recommend.py

@@ -10,7 +10,7 @@ sys.path.append(os.getcwd())
 from common.common import Common
 from common.common import Common
 from common.public import get_consumer, ack_message, task_fun_mq
 from common.public import get_consumer, ack_message, task_fun_mq
 from common.scheduling_db import MysqlHelper
 from common.scheduling_db import MysqlHelper
-from kuaishou.kuaishou_recommend.kuaishou_recommend_shceduling import KuaiShouRecommendScheduling
+from kuaishou.kuaishou_recommend.kuaishou_recommend_cut_title import KuaiShouRecommendScheduling
 
 
 
 
 def main(log_type, crawler, topic_name, group_id, env):
 def main(log_type, crawler, topic_name, group_id, env):

+ 1 - 1
kuaishou/kuaishou_main/run_ks_recommend_dev.py

@@ -13,7 +13,7 @@ def kuaishou_recommend_main(log_type, crawler, env):
     KuaiShouRecommendScheduling.get_videoList(log_type=log_type,
     KuaiShouRecommendScheduling.get_videoList(log_type=log_type,
                                               crawler=crawler,
                                               crawler=crawler,
                                               our_uid=6267140,
                                               our_uid=6267140,
-                                              rule_dict={"play_cnt":{"min":100000,"max":0},"like_cnt":{"min":80000,"max":0},"duration":{"min":50,"max":0},"period":{"min":30,"max":30}},
+                                              rule_dict={"play_cnt":{"min":10000,"max":0},"like_cnt":{"min":8000,"max":0},"duration":{"min":50,"max":0},"period":{"min":30,"max":30}},
                                               env=env)
                                               env=env)
     Common.del_logs(log_type, crawler)
     Common.del_logs(log_type, crawler)
     Common.logger(log_type, crawler).info("抓取一轮结束\n")
     Common.logger(log_type, crawler).info("抓取一轮结束\n")

+ 0 - 501
kuaishou/kuaishou_recommend/recommend_kuaishou.py

@@ -1,501 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/2/24
-import os
-import random
-import shutil
-import sys
-import time
-import string
-from hashlib import md5
-
-import requests
-import json
-
-import urllib3
-from requests.adapters import HTTPAdapter
-
-sys.path.append(os.getcwd())
-from common.common import Common
-from common.feishu import Feishu
-from common.getuser import getUser
-from common.db import MysqlHelper
-from common.publish import Publish
-from common.public import get_user_from_mysql, random_title, get_config_from_mysql
-from common.userAgent import get_random_user_agent
-
-
-class KuaiShouRecommend:
-    platform = "快手"
-    tag = "快手爬虫,推荐爬虫策略"
-
-    @classmethod
-    def get_rule(cls, log_type, crawler):
-        try:
-            rule_sheet = Feishu.get_values_batch(log_type, crawler, "NQ6CZN")
-            rule_dict = {
-                "play_cnt": f"{rule_sheet[0][1]}{rule_sheet[0][2]}",
-                "video_width": f"{rule_sheet[1][1]}{rule_sheet[1][2]}",
-                "video_height": f"{rule_sheet[2][1]}{rule_sheet[2][2]}",
-                "like_cnt": f"{rule_sheet[5][1]}{rule_sheet[5][2]}",
-                "duration": f"{rule_sheet[3][1]}{rule_sheet[3][2]}",
-                "publish_time": f"{rule_sheet[4][1]}{rule_sheet[4][2]}",
-            }
-            return rule_dict
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"get_rule:{e}\n")
-
-    @classmethod
-    def download_rule(cls, video_dict, rule_dict):
-        if eval(f"{video_dict['video_width']}{rule_dict['video_width']}") is True \
-                and eval(f"{video_dict['play_cnt']}{rule_dict['play_cnt']}") is True \
-                and eval(f"{video_dict['video_height']}{rule_dict['video_height']}") is True \
-                and eval(f"{video_dict['like_cnt']}{rule_dict['like_cnt']}") is True \
-                and eval(f"{video_dict['duration']}{rule_dict['duration']}") is True \
-                and eval(f"{video_dict['publish_time']}{rule_dict['publish_time']}") is True:
-            return True
-        else:
-            return False
-
-    # 过滤词库
-    @classmethod
-    def filter_words(cls, log_type, crawler):
-        try:
-            while True:
-                filter_words_sheet = Feishu.get_values_batch(log_type, crawler, 'HIKVvs')
-                if filter_words_sheet is None:
-                    Common.logger(log_type, crawler).warning(f"filter_words_sheet:{filter_words_sheet} 10秒钟后重试")
-                    continue
-                filter_words_list = []
-                for x in filter_words_sheet:
-                    for y in x:
-                        if y is None:
-                            pass
-                        else:
-                            filter_words_list.append(y)
-                return filter_words_list
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
-
-    # 获取用户信息列表
-    @classmethod
-    def get_user_list(cls, log_type, crawler, sheetid, env, machine):
-        try:
-            while True:
-                user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
-                if user_sheet is None:
-                    Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet} 10秒钟后重试")
-                    continue
-                our_user_list = []
-                for i in range(1, len(user_sheet)):
-                    # for i in range(1, 2):
-                    out_uid = user_sheet[i][2]
-                    user_name = user_sheet[i][3]
-                    our_uid = user_sheet[i][6]
-                    our_user_link = user_sheet[i][7]
-                    if out_uid is None or user_name is None:
-                        Common.logger(log_type, crawler).info("空行\n")
-                    else:
-                        Common.logger(log_type, crawler).info(f"正在更新 {user_name} 用户信息\n")
-                        if our_uid is None:
-                            out_user_info = cls.get_out_user_info(log_type, crawler, out_uid)
-                            out_user_dict = {
-                                "out_uid": out_uid,
-                                "user_name": user_name,
-                                "out_avatar_url": out_user_info["out_avatar_url"],
-                                "out_create_time": '',
-                                "out_tag": '',
-                                "out_play_cnt": 0,
-                                "out_fans": out_user_info["out_fans"],
-                                "out_follow": out_user_info["out_follow"],
-                                "out_friend": 0,
-                                "out_like": 0,
-                                "platform": cls.platform,
-                                "tag": cls.tag,
-                            }
-                            our_user_dict = getUser.create_user(log_type=log_type, crawler=crawler,
-                                                                out_user_dict=out_user_dict, env=env, machine=machine)
-                            our_uid = our_user_dict['our_uid']
-                            our_user_link = our_user_dict['our_user_link']
-                            Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}',
-                                                 [[our_uid, our_user_link]])
-                            Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功!\n')
-                            our_user_list.append(our_user_dict)
-                        else:
-                            our_user_dict = {
-                                'out_uid': out_uid,
-                                'user_name': user_name,
-                                'our_uid': our_uid,
-                                'our_user_link': our_user_link,
-                            }
-                            our_user_list.append(our_user_dict)
-                return our_user_list
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f'get_user_list:{e}\n')
-
-    # 处理视频标题
-    @classmethod
-    def video_title(cls, log_type, crawler, env, title):
-        title_split1 = title.split(" #")
-        if title_split1[0] != "":
-            title1 = title_split1[0]
-        else:
-            title1 = title_split1[-1]
-
-        title_split2 = title1.split(" #")
-        if title_split2[0] != "":
-            title2 = title_split2[0]
-        else:
-            title2 = title_split2[-1]
-
-        title_split3 = title2.split("@")
-        if title_split3[0] != "":
-            title3 = title_split3[0]
-        else:
-            title3 = title_split3[-1]
-
-        video_title = title3.strip().replace("\n", "") \
-                          .replace("/", "").replace("快手", "").replace(" ", "") \
-                          .replace(" ", "").replace("&NBSP", "").replace("\r", "") \
-                          .replace("#", "").replace(".", "。").replace("\\", "") \
-                          .replace(":", "").replace("*", "").replace("?", "") \
-                          .replace("?", "").replace('"', "").replace("<", "") \
-                          .replace(">", "").replace("|", "").replace("@", "").replace('"', '').replace("'", '')[:40]
-        if video_title.replace(" ", "") == "" or video_title == "。。。" or video_title == "...":
-            return random_title(log_type, crawler, env, text='title')
-        else:
-            return video_title
-
-    @classmethod
-    def get_videoList(cls, log_type, crawler, strategy, our_uid, oss_endpoint, env, machine):
-        rule_dict_1 = cls.get_rule(log_type, crawler)
-        if rule_dict_1 is None:
-            Common.logger(log_type, crawler).warning(f"rule_dict is None")
-            return
-
-        for i in range(100):
-            url = "https://www.kuaishou.com/graphql"
-
-            payload = json.dumps({
-                "operationName": "visionNewRecoFeed",
-                "variables": {
-                    "dailyFirstPage": False
-                },
-                "query": "fragment photoContent on PhotoEntity {\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n  __typename\n}\n\nfragment feedContent on Feed {\n  type\n  author {\n    id\n    name\n    headerUrl\n    following\n    headerUrls {\n      url\n      __typename\n    }\n    __typename\n  }\n  photo {\n    ...photoContent\n    __typename\n  }\n  canAddComment\n  llsid\n  status\n  currentPcursor\n  tags {\n    type\n    name\n    __typename\n  }\n  __typename\n}\n\nfragment photoResult on PhotoResult {\n  result\n  llsid\n  expTag\n  serverExpTag\n  pcursor\n  feeds {\n    ...feedContent\n    __typename\n  }\n  webPageArea\n  __typename\n}\n\nquery visionNewRecoFeed($semKeyword: String, $semCrowd: String, $utmSource: String, $utmMedium: String, $utmCampaign: String, $dailyFirstPage: Boolean) {\n  visionNewRecoFeed(semKeyword: $semKeyword, semCrowd: $semCrowd, utmSource: $utmSource, utmMedium: $utmMedium, utmCampaign: $utmCampaign, dailyFirstPage: $dailyFirstPage) {\n    ...photoResult\n    __typename\n  }\n}\n"
-            })
-            s = string.ascii_lowercase
-            r = random.choice(s)
-
-            headers = {
-                'Accept-Language': 'zh-CN,zh;q=0.9',
-                'Connection': 'keep-alive',
-                'Cookie': 'kpf=PC_WEB; clientid=3; did=web_7cdc486ebd1aba220455a7781d6ae5b5{r}7; kpn=KUAISHOU_VISION;'.format(
-                    r=r),
-                'Origin': 'https://www.kuaishou.com',
-                'Referer': 'https://www.kuaishou.com/new-reco',
-                'Sec-Fetch-Dest': 'empty',
-                'Sec-Fetch-Mode': 'cors',
-                'Sec-Fetch-Site': 'same-origin',
-                'User-Agent': get_random_user_agent('pc'),
-                'accept': '*/*',
-                'content-type': 'application/json',
-                'sec-ch-ua': '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"',
-                'sec-ch-ua-mobile': '?0',
-                'sec-ch-ua-platform': '"macOS"'
-            }
-
-            try:
-                urllib3.disable_warnings()
-                s = requests.session()
-                # max_retries=3 重试3次
-                s.mount('http://', HTTPAdapter(max_retries=3))
-                s.mount('https://', HTTPAdapter(max_retries=3))
-                response = s.post(url=url, headers=headers, data=payload, proxies=Common.tunnel_proxies(), verify=False,
-                                  timeout=10)
-                response.close()
-            except Exception as e:
-                Common.logger(log_type, crawler).error(f"get_videoList:{e}\n")
-                continue
-            # Common.logger(log_type, crawler).info(f"get_videoList:{response.text}\n")
-            if response.status_code != 200:
-                Common.logger(log_type, crawler).warning(f"get_videoList_response:{response.text}\n")
-                continue
-            elif 'data' not in response.json():
-                Common.logger(log_type, crawler).warning(f"get_videoList_response:{response.json()}\n")
-                continue
-            elif 'visionNewRecoFeed' not in response.json()['data']:
-                Common.logger(log_type, crawler).warning(f"get_videoList_response:{response.json()['data']}\n")
-                continue
-            elif 'feeds' not in response.json()['data']['visionNewRecoFeed']:
-                Common.logger(log_type, crawler).warning(
-                    f"get_videoList_response:{response.json()['data']['visionNewRecoFeed']}\n")
-                continue
-            elif len(response.json()['data']['visionNewRecoFeed']['feeds']) == 0:
-                Common.logger(log_type, crawler).info("没有更多视频啦 ~\n")
-                continue
-            else:
-                feeds = response.json()['data']['visionNewRecoFeed']['feeds']
-                for i in range(len(feeds)):
-                    if 'photo' not in feeds[i]:
-                        Common.logger(log_type, crawler).warning(f"get_videoList:{feeds[i]}\n")
-                        continue
-
-                    # video_title
-                    if 'caption' not in feeds[i]['photo']:
-                        video_title = random_title(log_type, crawler, env, text='title')
-
-                    elif feeds[i]['photo']['caption'].strip() == "":
-                        video_title = random_title(log_type, crawler, env, text='title')
-                    else:
-                        video_title = cls.video_title(log_type, crawler, env, feeds[i]['photo']['caption'])
-
-                    if 'videoResource' not in feeds[i]['photo'] \
-                            and 'manifest' not in feeds[i]['photo'] \
-                            and 'manifestH265' not in feeds[i]['photo']:
-                        Common.logger(log_type, crawler).warning(f"get_videoList:{feeds[i]['photo']}\n")
-                        continue
-                    videoResource = feeds[i]['photo']['videoResource']
-
-                    if 'h264' not in videoResource and 'hevc' not in videoResource:
-                        Common.logger(log_type, crawler).warning(f"get_videoList:{videoResource}\n")
-                        continue
-
-                    # video_id
-                    if 'h264' in videoResource and 'videoId' in videoResource['h264']:
-                        video_id = videoResource['h264']['videoId']
-                    elif 'hevc' in videoResource and 'videoId' in videoResource['hevc']:
-                        video_id = videoResource['hevc']['videoId']
-                    else:
-                        video_id = ""
-
-                    # play_cnt
-                    if 'viewCount' not in feeds[i]['photo']:
-                        play_cnt = 0
-                    else:
-                        play_cnt = int(feeds[i]['photo']['viewCount'])
-
-                    # like_cnt
-                    if 'realLikeCount' not in feeds[i]['photo']:
-                        like_cnt = 0
-                    else:
-                        like_cnt = feeds[i]['photo']['realLikeCount']
-
-                    # publish_time
-                    if 'timestamp' not in feeds[i]['photo']:
-                        publish_time_stamp = 0
-                        publish_time_str = ''
-                        publish_time = 0
-                    else:
-                        publish_time_stamp = int(int(feeds[i]['photo']['timestamp']) / 1000)
-                        publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
-                        publish_time = int((int(time.time()) - publish_time_stamp) / (3600 * 24))
-
-                    # duration
-                    if 'duration' not in feeds[i]['photo']:
-                        duration = 0
-                    else:
-                        duration = int(int(feeds[i]['photo']['duration']) / 1000)
-
-                    # video_width / video_height / video_url
-                    mapping = {}
-                    for item in ['width', 'height']:
-                        try:
-                            val = str(videoResource['h264']['adaptationSet'][0]['representation'][0][item])
-                        except Exception:
-                            val = str(videoResource['hevc']['adaptationSet'][0]['representation'][0][item])
-                        except:
-                            val = ''
-                        mapping[item] = val
-                    video_width = int(mapping['width']) if mapping['width'] != '' else 0
-                    video_height = int(mapping['height']) if mapping['height'] != '' else 0
-                    # cover_url
-                    if 'coverUrl' not in feeds[i]['photo']:
-                        cover_url = ""
-                    else:
-                        cover_url = feeds[i]['photo']['coverUrl']
-
-                    # user_name / avatar_url
-                    try:
-                        user_name = feeds[i]['author']['name']
-                        avatar_url = feeds[i]['author']['headerUrl']
-                        user_id = feeds[i]['author']['id']
-                    except Exception:
-                        user_name = ''
-                        avatar_url = ''
-                        user_id = ''
-                    video_url = feeds[i]['photo']['photoUrl']
-                    video_dict = {'video_title': video_title,
-                                  'video_id': video_id,
-                                  'play_cnt': play_cnt,
-                                  'comment_cnt': 0,
-                                  'like_cnt': like_cnt,
-                                  'share_cnt': 0,
-                                  'video_width': video_width,
-                                  'video_height': video_height,
-                                  'duration': duration,
-                                  'publish_time': publish_time,
-                                  'publish_time_stamp': publish_time_stamp,
-                                  'publish_time_str': publish_time_str,
-                                  'user_name': user_name,
-                                  'user_id': user_id,
-                                  'avatar_url': avatar_url,
-                                  'cover_url': cover_url,
-                                  'video_url': video_url,
-                                  'session': f"kuaishou{int(time.time())}"}
-
-                    rule_1 = cls.download_rule(video_dict, rule_dict_1)
-                    Common.logger(log_type, crawler).info(f"video_title:{video_title}")
-                    Common.logger(log_type, crawler).info(f"video_id:{video_id}\n")
-
-                    Common.logger(log_type, crawler).info(
-                        f"play_cnt:{video_dict['play_cnt']}{rule_dict_1['play_cnt']}, {eval(str(video_dict['play_cnt']) + str(rule_dict_1['play_cnt']))}")
-                    Common.logger(log_type, crawler).info(
-                        f"like_cnt:{video_dict['like_cnt']}{rule_dict_1['like_cnt']}, {eval(str(video_dict['like_cnt']) + str(rule_dict_1['like_cnt']))}")
-                    Common.logger(log_type, crawler).info(
-                        f"video_width:{video_dict['video_width']}{rule_dict_1['video_width']}, {eval(str(video_dict['video_width']) + str(rule_dict_1['video_width']))}")
-                    Common.logger(log_type, crawler).info(
-                        f"video_height:{video_dict['video_height']}{rule_dict_1['video_height']}, {eval(str(video_dict['video_height']) + str(rule_dict_1['video_height']))}")
-                    Common.logger(log_type, crawler).info(
-                        f"duration:{video_dict['duration']}{rule_dict_1['duration']}, {eval(str(video_dict['duration']) + str(rule_dict_1['duration']))}")
-                    Common.logger(log_type, crawler).info(
-                        f"publish_time:{video_dict['publish_time']}{rule_dict_1['publish_time']}, {eval(str(video_dict['publish_time']) + str(rule_dict_1['publish_time']))}")
-                    Common.logger(log_type, crawler).info(f"rule_1:{rule_1}\n")
-
-                    if video_title == "" or video_url == "":
-                        Common.logger(log_type, crawler).info("无效视频\n")
-                        continue
-                    elif rule_1 is True:
-                        cls.download_publish(log_type=log_type,
-                                             crawler=crawler,
-                                             strategy=strategy,
-                                             video_dict=video_dict,
-                                             rule_dict=rule_dict_1,
-                                             our_uid=our_uid,
-                                             oss_endpoint=oss_endpoint,
-                                             env=env,
-                                             machine=machine)
-
-                    else:
-                        Common.logger(log_type, crawler).info("不满足下载规则\n")
-
-    @classmethod
-    def repeat_video(cls, log_type, crawler, video_id, video_title, publish_time, env, machine):
-        sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{video_id}" or (platform="{cls.platform}" and video_title="{video_title}" and publish_time="{publish_time}") """
-        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env, machine)
-        return len(repeat_video)
-
-    @classmethod
-    def download_publish(cls, log_type, crawler, strategy, video_dict, rule_dict, our_uid, oss_endpoint, env, machine):
-        try:
-            filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
-            for filter_word in filter_words:
-                if filter_word in video_dict['video_title']:
-                    Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
-                    return
-            download_finished = False
-            if cls.repeat_video(log_type, crawler, video_dict['video_id'], video_dict['video_title'],
-                                video_dict['publish_time_str'], env, machine) != 0:
-                Common.logger(log_type, crawler).info('视频已下载\n')
-            else:
-                # 下载视频
-                Common.download_method(log_type=log_type, crawler=crawler, text='video',
-                                       title=video_dict['video_title'], url=video_dict['video_url'])
-                md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
-                if os.path.getsize(f"./{crawler}/videos/{md_title}/video.mp4") == 0:
-                    # 删除视频文件夹
-                    shutil.rmtree(f"./{crawler}/videos/{md_title}")
-                    Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
-                    return
-                # 下载封面
-                Common.download_method(log_type=log_type, crawler=crawler, text='cover',
-                                       title=video_dict['video_title'], url=video_dict['cover_url'])
-                # 保存视频信息至txt
-                Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
-
-                # 上传视频
-                Common.logger(log_type, crawler).info("开始上传视频...")
-                our_video_id = Publish.upload_and_publish(log_type=log_type,
-                                                          crawler=crawler,
-                                                          strategy=strategy,
-                                                          our_uid=our_uid,
-                                                          env=env,
-                                                          oss_endpoint=oss_endpoint)
-                if env == 'dev':
-                    our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-                else:
-                    our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-                Common.logger(log_type, crawler).info("视频上传完成")
-
-                if our_video_id is None:
-                    Common.logger(log_type, crawler).warning(f"our_video_id:{our_video_id} 删除该视频文件夹")
-                    # 删除视频文件夹
-                    shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
-                    return download_finished
-
-                # 视频信息保存数据库
-                insert_sql = f""" insert into crawler_video(video_id,
-                                                        user_id,
-                                                        out_user_id,
-                                                        platform,
-                                                        strategy,
-                                                        out_video_id,
-                                                        video_title,
-                                                        cover_url,
-                                                        video_url,
-                                                        duration,
-                                                        publish_time,
-                                                        play_cnt,
-                                                        crawler_rule,
-                                                        width,
-                                                        height)
-                                                        values({our_video_id},
-                                                        {our_uid},
-                                                        "{video_dict['user_id']}",
-                                                        "{cls.platform}",
-                                                        "{strategy}",
-                                                        "{video_dict['video_id']}",
-                                                        "{video_dict['video_title']}",
-                                                        "{video_dict['cover_url']}",
-                                                        "{video_dict['video_url']}",
-                                                        {int(video_dict['duration'])},
-                                                        "{video_dict['publish_time_str']}",
-                                                        {int(video_dict['play_cnt'])},
-                                                        '{json.dumps(rule_dict)}',
-                                                        {int(video_dict['video_width'])},
-                                                        {int(video_dict['video_height'])}) """
-                Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
-                MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine)
-                Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
-
-                # 视频写入飞书
-                Feishu.insert_columns(log_type, 'kuaishou', "Aps2BI", "ROWS", 1, 2)
-                upload_time = int(time.time())
-                values = [[our_video_id,
-                           time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
-                           strategy,
-                           str(video_dict['video_id']),
-                           video_dict['video_title'],
-                           our_video_link,
-                           video_dict['play_cnt'],
-                           video_dict['comment_cnt'],
-                           video_dict['like_cnt'],
-                           video_dict['share_cnt'],
-                           video_dict['duration'],
-                           f"{video_dict['video_width']}*{video_dict['video_height']}",
-                           video_dict['publish_time_str'],
-                           video_dict['user_name'],
-                           video_dict['user_id'],
-                           video_dict['avatar_url'],
-                           video_dict['cover_url'],
-                           video_dict['video_url']]]
-                time.sleep(1)
-                Feishu.update_values(log_type, 'kuaishou', "Aps2BI", "E2:Z2", values)
-                Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
-                download_finished = True
-            return download_finished
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"download_publish:{e}\n")
-
-
-if __name__ == "__main__":
-    KuaiShouRecommend.get_videoList('recommend', 'kuaishou', '推荐抓取策略', 55440319, 'outer', 'prod', 'aliyun')

+ 0 - 213
main/process.sh

@@ -24,190 +24,6 @@ echo "$(date "+%Y-%m-%d %H:%M:%S") 更新环境变量..." >> ${log_path}
 cd ~ && source /etc/profile
 cd ~ && source /etc/profile
 echo "$(date "+%Y-%m-%d %H:%M:%S") 更新环境变量完成!" >> ${log_path}
 echo "$(date "+%Y-%m-%d %H:%M:%S") 更新环境变量完成!" >> ${log_path}
 
 
-## 公众号爬虫策略
-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 公众号爬虫策略 1-100个账号 进程状态" >> ${log_path}
-#ps -ef | grep "run_gongzhonghao_follow.py" | grep -v "grep"
-#if [ "$?" -eq 1 ];then
-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
-#  if [ ${env} = "dev" ];then
-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py --log_type="author" --crawler="gongzhonghao" --env="dev" gongzhonghao/logs/nohup-follow.log
-#  else
-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py --log_type="author" --crawler="gongzhonghao" --env="prod"  gongzhonghao/logs/nohup-follow.log
-#  fi
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
-#else
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 公众号爬虫策略1-100个账号 进程状态正常" >> ${log_path}
-#fi
-#
-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 公众号爬虫策略101-145个账号 进程状态" >> ${log_path}
-#ps -ef | grep "run_gongzhonghao_follow_2.py" | grep -v "grep"
-#if [ "$?" -eq 1 ];then
-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
-#  if [ ${env} = "dev" ];then
-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow-2" --crawler="gongzhonghao" --env="dev" gongzhonghao/logs/nohup-follow-2.log
-#  else
-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow-2" --crawler="gongzhonghao" --env="prod"  gongzhonghao/logs/nohup-follow-2.log
-#  fi
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
-#else
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 公众号爬虫策略101-145个账号 进程状态正常" >> ${log_path}
-#fi
-
-## 小年糕定向爬虫策略
-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 小年糕定向爬虫策略 进程状态" >> ${log_path}
-#ps -ef | grep "run_xiaoniangao_follow.py" | grep -v "grep"
-#if [ "$?" -eq 1 ];then
-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
-#  if [ ${env} = "dev" ];then
-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_follow.py --log_type="author" --crawler="xiaoniangao" --env="dev" xiaoniangao/logs/nohup-follow.log
-#  else
-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_follow.py --log_type="author" --crawler="xiaoniangao" --env="prod"  xiaoniangao/logs/nohup-follow.log
-#  fi
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
-#else
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕定向爬虫策略 进程状态正常" >> ${log_path}
-#fi
-
-## 小年糕小时榜爬虫策略
-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 小年糕小时榜爬虫策略 进程状态" >> ${log_path}
-#ps -ef | grep "run_xiaoniangao_hour.py" | grep -v "grep"
-#if [ "$?" -eq 1 ];then
-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
-#  if [ ${env} = "dev" ];then
-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour.py --log_type="hour" --crawler="xiaoniangao" --env="dev" xiaoniangao/logs/nohup-hour.log
-#  else
-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour.py --log_type="hour" --crawler="xiaoniangao" --env="prod" xiaoniangao/logs/nohup-hour.log
-#  fi
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
-#else
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕小时榜爬虫策略 进程状态正常" >> ${log_path}
-#fi
-
-## 小年糕播放量榜爬虫策略
-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 播放量榜爬虫策略 进程状态" >> ${log_path}
-#ps -ef | grep "run_xiaoniangao_play.py" | grep -v "grep"
-#if [ "$?" -eq 1 ];then
-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
-#  if [ ${env} = "dev" ];then
-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_play.py --log_type="play" --crawler="xiaoniangao" --env="dev" xiaoniangao/logs/nohup-play.log
-#  else
-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_play.py --log_type="play" --crawler="xiaoniangao" --env="prod" xiaoniangao/logs/nohup-play.log
-#  fi
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
-#else
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 播放量榜爬虫策略 进程状态正常" >> ${log_path}
-#fi
-
-
-## 快手定向爬虫策略
-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 快手定向爬虫策略 进程状态" >> ${log_path}
-#ps -ef | grep "run_kuaishou_follow.py" | grep -v "grep"
-#if [ "$?" -eq 1 ];then
-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
-#  if [ ${env} = "dev" ];then
-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="author" --crawler="kuaishou" --env="dev" kuaishou/logs/nohup-follow.log
-#  else
-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh ./main/main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="author" --crawler="kuaishou" --strategy="定向爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" kuaishou/logs/nohup-follow.log
-#  fi
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
-#else
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 快手定向爬虫策略 进程状态正常" >> ${log_path}
-#fi
-
-## 快手推荐爬虫策略
-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 快手推荐爬虫策略 进程状态" >> ${log_path}
-#ps -ef | grep "run_kuaishou_recommend.py" | grep -v "grep"
-#if [ "$?" -eq 1 ];then
-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
-#  if [ ${env} = "dev" ];then
-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="author" --crawler="kuaishou" --env="dev" kuaishou/logs/nohup-recommend.log
-#  else
-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh ./main/main.sh ./kuaishou/kuaishou_main/run_kuaishou_recommend.py --log_type="recommend" --crawler="kuaishou" --strategy="推荐爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" kuaishou/logs/nohup-recommend.log
-#  fi
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
-#else
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 快手推荐爬虫策略 进程状态正常" >> ${log_path}
-#fi
-
-## 抖音推荐爬虫策略
-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 抖音推荐爬虫策略 进程状态" >> ${log_path}
-#ps -ef | grep "run_douyin_recommend.py" | grep -v "grep"
-#if [ "$?" -eq 1 ];then
-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
-#  if [ ${env} = "dev" ];then
-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="author" --crawler="kuaishou" --env="dev" douyin/logs/nohup-recommend.log
-#  else
-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh ./main/main.sh ./douyin/douyin_main/run_douyin_recommend.py --log_type="recommend" --crawler="douyin" --strategy="抖音推荐爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" douyin/logs/nohup-recommend.log
-#  fi
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
-#else
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 抖音推荐爬虫策略 进程状态正常" >> ${log_path}
-#fi
-
-## 抖音定向爬虫策略
-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 抖音定向爬虫策略 进程状态" >> ${log_path}
-#ps -ef | grep "run_douyin_follow.py" | grep -v "grep"
-#if [ "$?" -eq 1 ];then
-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
-#  if [ ${env} = "dev" ];then
-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./kuaishou/douyin_main/run_douyin_follow.py --log_type="author" --crawler="douyin" --env="dev" douyin/logs/nohup-follow.log
-#  else
-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh ./main/main.sh ./douyin/douyin_main/run_douyin_follow.py --log_type="author" --crawler="douyin" --strategy="抖音定向爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" douyin/logs/nohup-author.log
-#  fi
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
-#else
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 抖音推荐爬虫策略 进程状态正常" >> ${log_path}
-#fi
-
-## 西瓜定向爬虫策略
-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 西瓜定向爬虫策略 进程状态" >> ${log_path}
-#ps -ef | grep "run_xigua_follow.py" | grep -v "grep"
-#if [ "$?" -eq 1 ];then
-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
-#  if [ ${env} = "dev" ];then
-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./xigua/xigua_main/run_xigua_follow.py --log_type="author" --crawler="xigua" --env="dev" xigua/logs/nohup-follow.log
-#  else
-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh ./main/main.sh ./xigua/xigua_main/run_xigua_follow.py --log_type="author" --crawler="xigua" --strategy="定向爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" xigua/logs/nohup-follow.log
-#  fi
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
-#else
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 西瓜定向爬虫策略 进程状态正常" >> ${log_path}
-#fi
-
-## 西瓜推荐榜爬虫策略
-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 西瓜推荐榜爬虫策略 进程状态" >> ${log_path}
-#ps -ef | grep "run_xigua_recommend.py" | grep -v "grep"
-#if [ "$?" -eq 1 ];then
-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
-#  if [ ${env} = "dev" ];then
-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./xigua/xigua_main/run_xigua_recommend.py --log_type="recommend" --crawler="xigua" --env="dev" xigua/logs/nohup-recommend.log
-#  else
-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh main/scheduling_main.sh ./xigua/xigua_main/run_xigua_recommend.py --log_type="recommend" --crawler="xigua" --env="prod" xigua/logs/nohup-recommend.log
-#  fi
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
-#else
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 西瓜推荐榜爬虫策略 进程状态正常" >> ${log_path}
-#fi
-
-## 西瓜搜索爬虫策略
-#if [[ "$time" > "00:00:00" ]] && [[ "$time" < "00:10:00" ]]; then
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 西瓜搜索爬虫策略 进程状态" >> ${log_path}
-#  ps -ef | grep "run_xigua_search_new" | grep -v "grep"
-#  if [ "$?" -eq 1 ];then
-#    echo "$(date "+%Y-%m-%d %H:%M:%S") 西瓜搜索爬虫策略, 异常停止, 正在重启!" >> ${log_path}
-#    if [ ${env} = "dev" ];then
-#      cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./xigua/xigua_main/run_xigua_search_new.py --log_type="search" --crawler="xigua" --env="dev" xigua/logs/nohup-search.log
-#    else
-#      cd ${piaoquan_crawler_dir} && /usr/bin/sh main/scheduling_main.sh ./xigua/xigua_main/run_xigua_search_new.py --log_type="search" --crawler="xigua" --env="prod" xigua/logs/nohup-search.log
-#    fi
-#    echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
-#  else
-#    echo "$(date "+%Y-%m-%d %H:%M:%S") 西瓜搜索爬虫策略 进程状态正常" >> ${log_path}
-#  fi
-#else
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 不在任务启动时间范围: 西瓜搜索爬虫" >> ${log_path}
-#fi
-
 # youtube定向爬虫策略
 # youtube定向爬虫策略
 echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 youtube定向爬虫策略 进程状态" >> ${log_path}
 echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 youtube定向爬虫策略 进程状态" >> ${log_path}
 ps -ef | grep "run_youtube_follow.py" | grep -v "grep"
 ps -ef | grep "run_youtube_follow.py" | grep -v "grep"
@@ -223,35 +39,6 @@ else
   echo "$(date "+%Y-%m-%d %H:%M:%S") youtube定向爬虫策略 进程状态正常" >> ${log_path}
   echo "$(date "+%Y-%m-%d %H:%M:%S") youtube定向爬虫策略 进程状态正常" >> ${log_path}
 fi
 fi
 
 
-## 本山祝福小程序爬虫
-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 本山祝福小程序爬虫 进程状态" >> ${log_path}
-#ps -ef | grep "run_benshanzhufu" | grep -v "grep"
-#if [ "$?" -eq 1 ];then
-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
-#  if [ ${env} = "dev" ];then
-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./benshanzhufu/benshanzhufu_main/run_benshanzhufu_recommend.py --log_type="recommend" --crawler="benshanzhufu" --env="dev" benshanzhufu/logs/nohup-recommend.log
-#  else
-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./benshanzhufu/benshanzhufu_main/run_benshanzhufu_recommend.py --log_type="recommend" --crawler="benshanzhufu" --env="prod"  benshanzhufu/logs/nohup-recommend.log
-#  fi
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
-#else
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 本山祝福小程序爬虫 进程状态正常" >> ${log_path}
-#fi
-
-## 岁岁年年迎福气小程序爬虫
-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 岁岁年年迎福气小程序爬虫 进程状态" >> ${log_path}
-#ps -ef | grep "run_suisuiniannianyingfuqi" | grep -v "grep"
-#if [ "$?" -eq 1 ];then
-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
-#  if [ ${env} = "dev" ];then
-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./suisuiniannianyingfuqi/suisuiniannianyingfuqi_main/run_suisuiniannianyingfuqi_recommend.py --log_type="recommend" --crawler="suisuiniannianyingfuqi" --env="dev" suisuiniannianyingfuqi/logs/nohup-recommend.log
-#  else
-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./suisuiniannianyingfuqi/suisuiniannianyingfuqi_main/run_suisuiniannianyingfuqi_recommend.py --log_type="recommend" --crawler="suisuiniannianyingfuqi" --env="prod"  suisuiniannianyingfuqi/logs/nohup-recommend.log
-#  fi
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
-#else
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 岁岁年年迎福气小程序爬虫 进程状态正常" >> ${log_path}
-#fi
 
 
 # 微信指数监控
 # 微信指数监控
 echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 微信指数 bot 爬虫 进程状态" >> ${log_path}
 echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 微信指数 bot 爬虫 进程状态" >> ${log_path}

+ 0 - 1
main/process_mq.sh

@@ -37,7 +37,6 @@ cd ${piaoquan_crawler_dir} && git pull origin master --force
 echo "$(date "+%Y-%m-%d %H:%M:%S") 代码更新完成!" >> ${log_path}
 echo "$(date "+%Y-%m-%d %H:%M:%S") 代码更新完成!" >> ${log_path}
 
 
 # ====================接入爬虫平台,且调用MQ进程检测====================
 # ====================接入爬虫平台,且调用MQ进程检测====================
-# 岁岁年年迎福气
 echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 ${crawler}_${log_type} 进程状态" >> ${log_path}
 echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 ${crawler}_${log_type} 进程状态" >> ${log_path}
 ps -ef | grep "run_${crawler}_${log_type}.py" | grep -v "grep"
 ps -ef | grep "run_${crawler}_${log_type}.py" | grep -v "grep"
 if [ "$?" -eq 1 ];then
 if [ "$?" -eq 1 ];then

+ 0 - 24
suisuiniannianyingfuqi/suisuiniannianyingfuqi_main/run_suisuiniannianyingfuqi_recommend.py

@@ -1,24 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/4/13
-import argparse
-import os
-import sys
-sys.path.append(os.getcwd())
-from common.common import Common
-from suisuiniannianyingfuqi.suisuiniannianyingfuqi_recommend.suisuiniannianyingfuqi_recommend import SuisuiniannianyingfuqiRecommend
-
-def main(log_type, crawler, env):
-    Common.logger(log_type, crawler).info('开始抓取 岁岁年年迎福气小程序\n')
-    SuisuiniannianyingfuqiRecommend.get_videoList(log_type, crawler, env)
-    Common.del_logs(log_type, crawler)
-    Common.logger(log_type, crawler).info('抓取完一轮\n')
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
-    parser.add_argument('--crawler')  ## 添加参数
-    parser.add_argument('--env')  ## 添加参数
-    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
-    main(log_type=args.log_type, crawler=args.crawler, env=args.env)

+ 0 - 49
suisuiniannianyingfuqi/suisuiniannianyingfuqi_main/run_suisuiniannianyingfuqi_recommend_scheduling.py

@@ -1,49 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/4/13
-import argparse
-import os
-import random
-import sys
-sys.path.append(os.getcwd())
-from common.public import task_fun
-from common.common import Common
-from common.scheduling_db import MysqlHelper
-from suisuiniannianyingfuqi.suisuiniannianyingfuqi_recommend.suisuiniannianyingfuqi_recommend_scheduling import SuisuiniannianyingfuqiRecommendScheduling
-
-
-def main(log_type, crawler, task, env):
-    task_dict = task_fun(task)['task_dict']
-    rule_dict = task_fun(task)['rule_dict']
-    task_id = task_dict['task_id']
-    select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
-    user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
-    our_uid_list = []
-    for user in user_list:
-        our_uid_list.append(user["uid"])
-    our_uid = random.choice(our_uid_list)
-    Common.logger(log_type, crawler).info(f"调度任务:\n{task_dict}")
-    Common.logger(log_type, crawler).info(f"抓取规则:\n{rule_dict}")
-    Common.logger(log_type, crawler).info(f"用户列表:\n{user_list}")
-    Common.logger(log_type, crawler).info('开始抓取 岁岁年年迎福气小程序\n')
-    SuisuiniannianyingfuqiRecommendScheduling.get_videoList(log_type=log_type,
-                                                            crawler=crawler,
-                                                            our_uid=our_uid,
-                                                            rule_dict=rule_dict,
-                                                            env=env)
-    Common.del_logs(log_type, crawler)
-    Common.logger(log_type, crawler).info('抓取完一轮\n')
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
-    parser.add_argument('--crawler')  ## 添加参数
-    parser.add_argument('--task')  ## 添加参数
-    # parser.add_argument('--oss_endpoint')  ## 添加参数
-    parser.add_argument('--env')  ## 添加参数
-    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
-    main(log_type=args.log_type,
-         crawler=args.crawler,
-         task=args.task,
-         env=args.env)

+ 0 - 214
suisuiniannianyingfuqi/suisuiniannianyingfuqi_recommend/suisuiniannianyingfuqi_recommend.py

@@ -1,214 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/4/13
-import json
-import os
-import random
-import shutil
-import sys
-import time
-from hashlib import md5
-import requests
-import urllib3
-sys.path.append(os.getcwd())
-from common.common import Common
-from common.feishu import Feishu
-from common.publish import Publish
-from common.scheduling_db import MysqlHelper
-# from common.public import download_rule
-
-
-class SuisuiniannianyingfuqiRecommend:
-    platform = "岁岁年年迎福气"
-
-    @classmethod
-    def repeat_video(cls, log_type, crawler, video_id, env):
-        sql = f""" select * from crawler_video where platform="岁岁年年迎福气" and out_video_id="{video_id}"; """
-        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
-        return len(repeat_video)
-
-    @classmethod
-    def get_videoList(cls, log_type, crawler, env):
-        page = 1
-        while True:
-            try:
-                url = 'https://www.jzkksp.com/index/home/get_home_list.html'
-                headers = {
-                    'content-type': 'application/x-www-form-urlencoded',
-                    'Accept-Encoding': 'gzip,compress,br,deflate',
-                    'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_3_1 like Mac OS X) '
-                                  'AppleWebKit/605.1.15 (KHTML, like Gecko) '
-                                  'Mobile/15E148 MicroMessenger/8.0.25(0x1800192b) NetType/WIFI Language/zh_CN',
-                    'Referer': 'https://servicewechat.com/wxd4c54f60812f6f36/1/page-frame.html',
-                }
-                data = {
-                    'token': '851ae159fd33f955bf433e7c47a4a298',
-                    'time': '1667905857000',
-                    'str_data': 'uT551tU8',
-                    'page': str(page),
-                    'limit': '10',
-                    'appid': 'wxd4c54f60812f6f36',
-                    'version': '1.4.1',
-                    'openid': 'oDAjy5SCFe7Ml3PNgiow3ncozL1o'
-                }
-                urllib3.disable_warnings()
-                response = requests.post(url=url, headers=headers, data=data, verify=False)
-                page += 1
-                if response.status_code != 200:
-                    Common.logger(log_type, crawler).warning(f'get_videoList:{response.status_code}, {response.text}\n')
-                    return
-                elif 'data' not in response.json():
-                    Common.logger(log_type, crawler).warning(f'get_videoList:{response.status_code}, {response.json()}\n')
-                    return
-                elif len(response.json()['data']['video_list']['data']) == 0:
-                    Common.logger(log_type, crawler).info(f'没有更多数据啦~ {response.json()}\n')
-                    return
-                else:
-                    feeds = response.json()['data']['video_list']['data']
-                    for i in range(len(feeds)):
-                        try:
-                            publish_time_str = feeds[i].get('createtime', '')
-                            publish_time_stamp = int(time.mktime(time.strptime(publish_time_str, "%Y-%m-%d")))
-                            video_dict = {'video_title': feeds[i].get('title', "").replace("'", "").replace('"', ''),
-                                          'video_id': str(feeds[i].get('id', '')),
-                                          'play_cnt': feeds[i].get('browse', 0),
-                                          'comment_cnt': 0,
-                                          'like_cnt': 0,
-                                          'share_cnt': 0,
-                                          'publish_time_stamp': publish_time_stamp,
-                                          'publish_time_str': publish_time_str,
-                                          'user_name': "岁岁年年迎福气",
-                                          'user_id': "suisuiniannianyingfuqi",
-                                          'avatar_url': feeds[i].get('thumb', ''),
-                                          'cover_url': feeds[i].get('thumb', ''),
-                                          'video_url': feeds[i].get('url', ''),
-                                          'session': f"suisuiniannianyingfuqi-{int(time.time())}"}
-                            for k, v in video_dict.items():
-                                Common.logger(log_type, crawler).info(f"{k}:{v}")
-
-                            if video_dict["video_id"] == '' or video_dict["video_title"] == '' or video_dict["cover_url"] == '' or video_dict["video_url"] == '':
-                                Common.logger(log_type, crawler).info('无效视频\n')
-                            elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
-                                Common.logger(log_type, crawler).info('视频已下载\n')
-                            else:
-                                cls.download_publish(log_type, crawler, video_dict, env)
-                        except Exception as e:
-                            Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
-            except Exception as e:
-                Common.logger(log_type, crawler).error(f"抓取第{page}页时异常:{e}\n")
-
-# 下载 / 上传
-    @classmethod
-    def download_publish(cls, log_type, crawler, video_dict, env):
-        # 下载视频
-        Common.download_method(log_type=log_type, crawler=crawler, text='video', title=video_dict['video_title'], url=video_dict['video_url'])
-        md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
-        try:
-            if os.path.getsize(f"./{crawler}/videos/{md_title}/video.mp4") == 0:
-                # 删除视频文件夹
-                shutil.rmtree(f"./{crawler}/videos/{md_title}")
-                Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
-                return
-        except FileNotFoundError:
-            # 删除视频文件夹
-            shutil.rmtree(f"./{crawler}/videos/{md_title}")
-            Common.logger(log_type, crawler).info("视频文件不存在,删除文件夹成功\n")
-            return
-
-        ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
-        video_dict["duration"] = ffmpeg_dict["duration"]
-        video_dict["video_width"] = ffmpeg_dict["width"]
-        video_dict["video_height"] = ffmpeg_dict["height"]
-
-        # 下载封面
-        Common.download_method(log_type=log_type, crawler=crawler, text='cover', title=video_dict['video_title'], url=video_dict['cover_url'])
-        # 保存视频信息至txt
-        Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
-        if env == "dev":
-            oss_endpoint = "out"
-        else:
-            oss_endpoint = "inner"
-
-        select_user_sql = f"""select * from crawler_user_v3 where source="suisuiniannianyingfuqi" """
-        user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
-        our_uid_list = []
-        for user in user_list:
-            our_uid_list.append(user["uid"])
-        our_uid = random.choice(our_uid_list)
-
-        # 上传视频
-        Common.logger(log_type, crawler).info("开始上传视频...")
-        our_video_id = Publish.upload_and_publish(log_type=log_type,
-                                                  crawler=crawler,
-                                                  strategy="推荐榜爬虫策略",
-                                                  our_uid=our_uid,
-                                                  env=env,
-                                                  oss_endpoint=oss_endpoint)
-        if env == 'dev':
-            our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-        else:
-            our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-        Common.logger(log_type, crawler).info("视频上传完成")
-
-        if our_video_id is None:
-            try:
-                # 删除视频文件夹
-                shutil.rmtree(f"./{crawler}/videos/{md_title}")
-                return
-            except FileNotFoundError:
-                return
-
-        # 视频写入飞书
-        Feishu.insert_columns(log_type, crawler, "290bae", "ROWS", 1, 2)
-        upload_time = int(time.time())
-        values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
-                   "推荐榜爬虫策略",
-                   video_dict['video_title'],
-                   video_dict['video_id'],
-                   our_video_link,
-                   video_dict['play_cnt'],
-                   video_dict['duration'],
-                   f"{video_dict['video_width']}*{video_dict['video_height']}",
-                   video_dict['cover_url'],
-                   video_dict['video_url']]]
-        time.sleep(0.5)
-        Feishu.update_values(log_type, crawler, "290bae", "F2:Z2", values)
-        Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
-
-        rule_dict = {}
-        # 视频信息保存数据库
-        insert_sql = f""" insert into crawler_video(video_id,
-                                                out_user_id,
-                                                platform,
-                                                strategy,
-                                                out_video_id,
-                                                video_title,
-                                                cover_url,
-                                                video_url,
-                                                duration,
-                                                publish_time,
-                                                play_cnt,
-                                                crawler_rule,
-                                                width,
-                                                height)
-                                                values({our_video_id},
-                                                "{video_dict['user_id']}",
-                                                "{cls.platform}",
-                                                "推荐榜爬虫策略",
-                                                "{video_dict['video_id']}",
-                                                "{video_dict['video_title']}",
-                                                "{video_dict['cover_url']}",
-                                                "{video_dict['video_url']}",
-                                                {int(video_dict['duration'])},
-                                                "{video_dict['publish_time_str']}",
-                                                {int(video_dict['play_cnt'])},
-                                                '{json.dumps(rule_dict)}',
-                                                {int(video_dict['video_width'])},
-                                                {int(video_dict['video_height'])}) """
-        Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
-        MysqlHelper.update_values(log_type, crawler, insert_sql, env, action='')
-        Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
-
-
-if __name__ == '__main__':
-    pass

+ 0 - 152
weixinzhishu/weixinzhishu_main/weixinzhishu_inner_long.py

@@ -1,152 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/2/28
-import json
-import os
-import sys
-import time
-from datetime import date, timedelta
-import requests
-sys.path.append(os.getcwd())
-from common.common import Common
-from common.feishu import Feishu
-proxies = {"http": None, "https": None}
-
-
-class Test:
-    # 获取微信 key / openid
-    @classmethod
-    def get_wechat_key(cls, log_type, crawler):
-        """
-        获取微信 key / openid
-        https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?sheet=sVL74k
-        :param log_type: 日志名
-        :param crawler: 哪款爬虫,填写:weixinzhishu
-        :return: search_key, openid
-        """
-        try:
-            # while True:
-            sheet = Feishu.get_values_batch(log_type, crawler, 'sVL74k')
-                # if sheet is None:
-                #     Common.logger(log_type, crawler).warning(f"获取热词sheet:{sheet} ,10秒钟后重试")
-                #     time.sleep(10)
-                # else:
-                #     break
-            for i in range(len(sheet)):
-                search_key = sheet[1][1]
-                openid = sheet[1][2]
-                return search_key, openid
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"wechat_key:{e}\n")
-
-    @classmethod
-    def get_words(cls, log_type, crawler):
-        try:
-            while True:
-                sheet = Feishu.get_values_batch(log_type, crawler, 'X6K0vN')
-                if sheet is None:
-                    Common.logger(log_type, crawler).warning(f"获取热词sheet:{sheet} ,10秒钟后重试")
-                    time.sleep(10)
-                else:
-                    break
-            word_list = []
-            for i in range(len(sheet)):
-                word_dict = {
-                    "title": sheet[i][0],
-                    "word": sheet[i][1]
-                }
-                word_list.append(word_dict)
-            return word_list
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"get_words:{e}\n")
-
-    @classmethod
-    def get_score_test(cls, log_type, crawler):
-
-        start_ymd = (date.today() + timedelta(days=-7)).strftime("%Y%m%d")
-        end_ymd = (date.today() + timedelta(days=0)).strftime("%Y%m%d")
-
-        word_list = cls.get_words(log_type, crawler)
-        for i in range(len(word_list)):
-            Common.logger(log_type, crawler).info(f"热词: {word_list[i]['word']}")
-            while True:
-                wechat_key = cls.get_wechat_key(log_type, crawler)
-                if wechat_key is None:
-                    Common.logger(log_type, crawler).info(
-                        f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time())))} auth 过期,休眠 10 秒,重新获取")
-                    time.sleep(10)
-                    continue
-
-                search_key = wechat_key[0]
-                openid = wechat_key[-1]
-                url = "https://search.weixin.qq.com/cgi-bin/wxaweb/wxindex"
-                payload = json.dumps({
-                    "openid": openid,
-                    "search_key": search_key,
-                    "cgi_name": "GetDefaultIndex",
-                    "start_ymd": start_ymd,
-                    "end_ymd": end_ymd,
-                    "query": word_list[i]['word']
-                })
-                headers = {
-                    'Host': 'search.weixin.qq.com',
-                    'content-type': 'application/json',
-                    'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.32(0x1800202a) NetType/WIFI Language/zh_CN',
-                    'Referer': 'https://servicewechat.com/wxc026e7662ec26a3a/42/page-frame.html'
-                }
-                response = requests.request("POST", url, headers=headers, data=payload, proxies=proxies)
-                if response.json()['code'] == -10000:
-                    Common.logger(log_type, crawler).info(f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time())))} response:{response.json()['msg']} 休眠 10 秒,重新获取")
-                    time.sleep(10)
-                    continue
-
-                wechat_score_list = []
-                word_wechat_score_dict = {
-                    "id": i+1,
-                    "word": word_list[i]['word'],
-                    "wechatScores": wechat_score_list,
-                }
-                if response.json()['code'] == -10002:
-                    Common.logger(log_type, crawler).info("该词暂未收录")
-                    # # 写飞书
-                    # if word_list[i]['word'] in [x for y in Feishu.get_values_batch(log_type, crawler, "JpgyAv") for x in y]:
-                    #     Common.logger(log_type, crawler).info("该词已存在")
-                    #     continue
-                    Feishu.insert_columns(log_type, crawler, "JpgyAv", "ROWS", 1, 2)
-                    time.sleep(0.5)
-                    Feishu.update_values(log_type, crawler, "JpgyAv", "F2:Z2",
-                                         [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
-                                           word_list[i]['title'],
-                                           word_list[i]['word'],
-                                           "",
-                                           "该词暂未收录"]])
-                    Common.logger(log_type, crawler).info("写入飞书成功\n")
-                elif response.json()['code'] != 0:
-                    Common.logger(log_type, crawler).warning(f"{word_wechat_score_dict}")
-                    continue
-                else:
-                    time_index = response.json()['content']['resp_list'][0]['indexes'][0]['time_indexes']
-                    for x in range(len(time_index)):
-                        Common.logger(log_type, crawler).info(f"正在更新 {word_list[i]['word']}")
-                        score_time = time_index[x]['time']
-                        score_time_str = f"{str(score_time)[:4]}-{str(score_time)[4:6]}-{str(score_time)[6:]}"
-                        score = time_index[x]['score']
-                        wechat_score_dict = {"score": score, "scoreDate": score_time_str}
-                        wechat_score_list.append(wechat_score_dict)
-                        Common.logger(log_type, crawler).info(f"wechat_score_dict:{wechat_score_dict}")
-                        Feishu.insert_columns(log_type, crawler, "JpgyAv", "ROWS", 1, 2)
-                        time.sleep(1)
-                        Feishu.update_values(log_type, crawler, "JpgyAv", "F2:Z2", [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
-                                                                               word_list[i]['title'],
-                                                                               word_list[i]['word'],
-                                                                               score_time_str,
-                                                                               score]])
-                        Common.logger(log_type, crawler).info("写入飞书成功\n")
-                break
-
-        Feishu.bot(log_type, "weixinzhishu_inner_long", "微信指数_站内长期指数抓取完毕")
-
-
-if __name__ == "__main__":
-    Test.get_score_test("inner-long", "weixinzhishu")
-    pass

+ 0 - 152
weixinzhishu/weixinzhishu_main/weixinzhishu_inner_sort.py

@@ -1,152 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/2/28
-import json
-import os
-import sys
-import time
-from datetime import date, timedelta
-import requests
-sys.path.append(os.getcwd())
-from common.common import Common
-from common.feishu import Feishu
-proxies = {"http": None, "https": None}
-
-
-class Test:
-    # 获取微信 key / openid
-    @classmethod
-    def get_wechat_key(cls, log_type, crawler):
-        """
-        获取微信 key / openid
-        https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?sheet=sVL74k
-        :param log_type: 日志名
-        :param crawler: 哪款爬虫,填写:weixinzhishu
-        :return: search_key, openid
-        """
-        try:
-            # while True:
-            sheet = Feishu.get_values_batch(log_type, crawler, 'sVL74k')
-                # if sheet is None:
-                #     Common.logger(log_type, crawler).warning(f"获取热词sheet:{sheet} ,10秒钟后重试")
-                #     time.sleep(10)
-                # else:
-                #     break
-            for i in range(len(sheet)):
-                search_key = sheet[1][1]
-                openid = sheet[1][2]
-                return search_key, openid
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"wechat_key:{e}\n")
-
-    @classmethod
-    def get_words(cls, log_type, crawler):
-        try:
-            while True:
-                sheet = Feishu.get_values_batch(log_type, crawler, 'D9IqTp')
-                if sheet is None:
-                    Common.logger(log_type, crawler).warning(f"获取热词sheet:{sheet} ,10秒钟后重试")
-                    time.sleep(10)
-                else:
-                    break
-            word_list = []
-            for i in range(len(sheet)):
-                word_dict = {
-                    "title": sheet[i][0],
-                    "word": sheet[i][1]
-                }
-                word_list.append(word_dict)
-            return word_list
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"get_words:{e}\n")
-
-    @classmethod
-    def get_score_test(cls, log_type, crawler):
-
-        start_ymd = (date.today() + timedelta(days=-7)).strftime("%Y%m%d")
-        end_ymd = (date.today() + timedelta(days=0)).strftime("%Y%m%d")
-
-        word_list = cls.get_words(log_type, crawler)
-        for i in range(len(word_list)):
-            Common.logger(log_type, crawler).info(f"热词: {word_list[i]['word']}")
-            while True:
-                wechat_key = cls.get_wechat_key(log_type, crawler)
-                if wechat_key is None:
-                    Common.logger(log_type, crawler).info(
-                        f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time())))} auth 过期,休眠 10 秒,重新获取")
-                    time.sleep(10)
-                    continue
-
-                search_key = wechat_key[0]
-                openid = wechat_key[-1]
-                url = "https://search.weixin.qq.com/cgi-bin/wxaweb/wxindex"
-                payload = json.dumps({
-                    "openid": openid,
-                    "search_key": search_key,
-                    "cgi_name": "GetDefaultIndex",
-                    "start_ymd": start_ymd,
-                    "end_ymd": end_ymd,
-                    "query": word_list[i]['word']
-                })
-                headers = {
-                    'Host': 'search.weixin.qq.com',
-                    'content-type': 'application/json',
-                    'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.32(0x1800202a) NetType/WIFI Language/zh_CN',
-                    'Referer': 'https://servicewechat.com/wxc026e7662ec26a3a/42/page-frame.html'
-                }
-                response = requests.request("POST", url, headers=headers, data=payload, proxies=proxies)
-                if response.json()['code'] == -10000:
-                    Common.logger(log_type, crawler).info(f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time())))} response:{response.json()['msg']} 休眠 10 秒,重新获取")
-                    time.sleep(10)
-                    continue
-
-                wechat_score_list = []
-                word_wechat_score_dict = {
-                    "id": i+1,
-                    "word": word_list[i]['word'],
-                    "wechatScores": wechat_score_list,
-                }
-                if response.json()['code'] == -10002:
-                    Common.logger(log_type, crawler).info("该词暂未收录")
-                    # # 写飞书
-                    # if word_list[i]['word'] in [x for y in Feishu.get_values_batch(log_type, crawler, "DrZHpa") for x in y]:
-                    #     Common.logger(log_type, crawler).info("该词已存在")
-                    #     continue
-                    Feishu.insert_columns(log_type, crawler, "DrZHpa", "ROWS", 1, 2)
-                    time.sleep(0.5)
-                    Feishu.update_values(log_type, crawler, "DrZHpa", "F2:Z2",
-                                         [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
-                                           word_list[i]['title'],
-                                           word_list[i]['word'],
-                                           "",
-                                           "该词暂未收录"]])
-                    Common.logger(log_type, crawler).info("写入飞书成功\n")
-                elif response.json()['code'] != 0:
-                    Common.logger(log_type, crawler).warning(f"{word_wechat_score_dict}")
-                    continue
-                else:
-                    time_index = response.json()['content']['resp_list'][0]['indexes'][0]['time_indexes']
-                    for x in range(len(time_index)):
-                        Common.logger(log_type, crawler).info(f"正在更新 {word_list[i]['word']}")
-                        score_time = time_index[x]['time']
-                        score_time_str = f"{str(score_time)[:4]}-{str(score_time)[4:6]}-{str(score_time)[6:]}"
-                        score = time_index[x]['score']
-                        wechat_score_dict = {"score": score, "scoreDate": score_time_str}
-                        wechat_score_list.append(wechat_score_dict)
-                        Common.logger(log_type, crawler).info(f"wechat_score_dict:{wechat_score_dict}")
-                        Feishu.insert_columns(log_type, crawler, "DrZHpa", "ROWS", 1, 2)
-                        time.sleep(1)
-                        Feishu.update_values(log_type, crawler, "DrZHpa", "F2:Z2", [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
-                                                                                     word_list[i]['title'],
-                                                                                     word_list[i]['word'],
-                                                                                     score_time_str,
-                                                                                     score]])
-                        Common.logger(log_type, crawler).info("写入飞书成功\n")
-                break
-
-        Feishu.bot(log_type, "weixinzhishu_inner_sort", "微信指数_站内短期指数抓取完毕")
-
-
-if __name__ == "__main__":
-    Test.get_score_test("inner-sort", "weixinzhishu")
-    pass

+ 0 - 153
weixinzhishu/weixinzhishu_main/weixinzhishu_out.py

@@ -1,153 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/2/28
-import json
-import os
-import sys
-import time
-from datetime import date, timedelta
-import requests
-sys.path.append(os.getcwd())
-from common.common import Common
-from common.feishu import Feishu
-proxies = {"http": None, "https": None}
-
-
-class Test:
-    # 获取微信 key / openid
-    @classmethod
-    def get_wechat_key(cls, log_type, crawler):
-        """
-        获取微信 key / openid
-        https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?sheet=sVL74k
-        :param log_type: 日志名
-        :param crawler: 哪款爬虫,填写:weixinzhishu
-        :return: search_key, openid
-        """
-        try:
-            # while True:
-            sheet = Feishu.get_values_batch(log_type, crawler, 'sVL74k')
-                # if sheet is None:
-                #     Common.logger(log_type, crawler).warning(f"获取热词sheet:{sheet} ,10秒钟后重试")
-                #     time.sleep(10)
-                # else:
-                #     break
-            for i in range(len(sheet)):
-                search_key = sheet[1][1]
-                openid = sheet[1][2]
-                return search_key, openid
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"wechat_key:{e}\n")
-
-    @classmethod
-    def get_words(cls, log_type, crawler):
-        try:
-            while True:
-                sheet = Feishu.get_values_batch(log_type, crawler, 'MvFi8s')
-                if sheet is None:
-                    Common.logger(log_type, crawler).warning(f"获取热词sheet:{sheet} ,10秒钟后重试")
-                    time.sleep(10)
-                else:
-                    break
-            word_list = []
-            for i in range(len(sheet)):
-                word_dict = {
-                    "title": sheet[i][0],
-                    "word": sheet[i][1]
-                }
-                word_list.append(word_dict)
-            return word_list
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"get_words:{e}\n")
-
-    @classmethod
-    def get_score_test(cls, log_type, crawler):
-
-        start_ymd = (date.today() + timedelta(days=-7)).strftime("%Y%m%d")
-        end_ymd = (date.today() + timedelta(days=0)).strftime("%Y%m%d")
-
-        word_list = cls.get_words(log_type, crawler)
-        for i in range(len(word_list)):
-            Common.logger(log_type, crawler).info(f"热词: {word_list[i]['word']}")
-            while True:
-                wechat_key = cls.get_wechat_key(log_type, crawler)
-                if wechat_key is None:
-                    Common.logger(log_type, crawler).info(
-                        f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time())))} auth 过期,休眠 10 秒,重新获取")
-                    time.sleep(10)
-                    continue
-
-                search_key = wechat_key[0]
-                openid = wechat_key[-1]
-                url = "https://search.weixin.qq.com/cgi-bin/wxaweb/wxindex"
-                payload = json.dumps({
-                    "openid": openid,
-                    "search_key": search_key,
-                    "cgi_name": "GetDefaultIndex",
-                    "start_ymd": start_ymd,
-                    "end_ymd": end_ymd,
-                    "query": word_list[i]['word']
-                })
-                headers = {
-                    'Host': 'search.weixin.qq.com',
-                    'content-type': 'application/json',
-                    'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.32(0x1800202a) NetType/WIFI Language/zh_CN',
-                    'Referer': 'https://servicewechat.com/wxc026e7662ec26a3a/42/page-frame.html'
-                }
-                response = requests.request("POST", url, headers=headers, data=payload, proxies=proxies)
-                if response.json()['code'] == -10000:
-                    Common.logger(log_type, crawler).info(f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time())))} response:{response.json()['msg']} 休眠 10 秒,重新获取")
-                    time.sleep(10)
-                    continue
-
-                wechat_score_list = []
-                word_wechat_score_dict = {
-                    "id": i+1,
-                    "word": word_list[i]['word'],
-                    "wechatScores": wechat_score_list,
-                }
-                if response.json()['code'] == -10002:
-                    Common.logger(log_type, crawler).info("该词暂未收录")
-                    # 写飞书
-                    # if word_list[i]['word'] in [x for y in Feishu.get_values_batch(log_type, crawler, "YVuVgQ") for x in y]:
-                    #     Common.logger(log_type, crawler).info("该词已存在")
-                    #     continue
-                    Feishu.insert_columns(log_type, crawler, "YVuVgQ", "ROWS", 1, 2)
-                    time.sleep(0.5)
-                    Feishu.update_values(log_type, crawler, "YVuVgQ", "F2:Z2",
-                                         [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
-                                           word_list[i]['title'],
-                                           word_list[i]['word'],
-                                           "",
-                                           "该词暂未收录"]])
-                    Common.logger(log_type, crawler).info("写入飞书成功\n")
-                elif response.json()['code'] != 0:
-                    Common.logger(log_type, crawler).warning(f"{word_wechat_score_dict}")
-                    continue
-                else:
-                    time_index = response.json()['content']['resp_list'][0]['indexes'][0]['time_indexes']
-                    for x in range(len(time_index)):
-                        Common.logger(log_type, crawler).info(f"正在更新 {word_list[i]['word']}")
-                        score_time = time_index[x]['time']
-                        score_time_str = f"{str(score_time)[:4]}-{str(score_time)[4:6]}-{str(score_time)[6:]}"
-                        score = time_index[x]['score']
-                        wechat_score_dict = {"score": score, "scoreDate": score_time_str}
-                        wechat_score_list.append(wechat_score_dict)
-                        Common.logger(log_type, crawler).info(f"wechat_score_dict:{wechat_score_dict}")
-                        Feishu.insert_columns(log_type, crawler, "YVuVgQ", "ROWS", 1, 2)
-                        time.sleep(1)
-                        Feishu.update_values(log_type, crawler, "YVuVgQ", "F2:Z2", [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
-                                                                                     word_list[i]['title'],
-                                                                                     word_list[i]['word'],
-                                                                                     score_time_str,
-                                                                                     score]])
-                        Common.logger(log_type, crawler).info("写入飞书成功\n")
-                break
-
-        Feishu.bot(log_type, "weixinzhishu_out", "微信指数_站外指数抓取完毕")
-
-
-if __name__ == "__main__":
-    # print(Test.get_words("test", "weixinzhishu"))
-    Test.get_score_test("out", "weixinzhishu")
-    pass

+ 0 - 3
xiaoniangao/xiaoniangao_follow/__init__.py

@@ -1,3 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/3/13

Різницю між файлами не показано, бо вона завелика
+ 0 - 69
xiaoniangao/xiaoniangao_follow/xiaoniangao_follow.py


+ 0 - 685
xiaoniangao/xiaoniangao_hour/xiaoniangao_hour.py

@@ -1,685 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/3/15
-import datetime
-import json
-import os
-import random
-import shutil
-import sys
-import time
-import requests
-import urllib3
-
-sys.path.append(os.getcwd())
-from common.common import Common
-from common.feishu import Feishu
-from common.publish import Publish
-from common.scheduling_db import MysqlHelper
-from common.public import get_config_from_mysql
-
-proxies = {"http": None, "https": None}
-
-
-class XiaoniangaoHour:
-    platform = "小年糕"
-
-    words = "abcdefghijklmnopqrstuvwxyz0123456789"
-    uid = f"""{"".join(random.sample(words, 8))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 12))}"""
-    token = "".join(random.sample(words, 32))
-    uid_token_dict = {
-        "uid": uid,
-        "token": token
-    }
-
-    # 生成 uid、token
-    @classmethod
-    def get_uid_token(cls):
-        words = "abcdefghijklmnopqrstuvwxyz0123456789"
-        uid = f"""{"".join(random.sample(words, 8))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 12))}"""
-        token = "".join(random.sample(words, 32))
-        uid_token_dict = {
-            "uid": uid,
-            "token": token
-        }
-        return uid_token_dict
-
-    # 基础门槛规则
-    @staticmethod
-    def download_rule(video_dict):
-        """
-        下载视频的基本规则
-        :param video_dict: 视频信息,字典格式
-        :return: 满足规则,返回 True;反之,返回 False
-        """
-        # 视频时长
-        if int(float(video_dict["duration"])) >= 40:
-            # 宽或高
-            if int(video_dict["video_width"]) >= 0 or int(video_dict["video_height"]) >= 0:
-                # 播放量
-                if int(video_dict["play_cnt"]) >= 4000:
-                    # 点赞量
-                    if int(video_dict["like_cnt"]) >= 0:
-                        # 分享量
-                        if int(video_dict["share_cnt"]) >= 0:
-                            # 发布时间 <= 10 天
-                            if int(time.time()) - int(video_dict["publish_time_stamp"]) <= 3600 * 24 * 10:
-                                return True
-                            else:
-                                return False
-                        else:
-                            return False
-                    else:
-                        return False
-                else:
-                    return False
-            return False
-        return False
-
-    # 获取表情及符号
-    @classmethod
-    def get_expression(cls):
-        # 表情列表
-        expression_list = ['📍', '⭕️', '🔥', '📣', '🎈', '⚡', '🔔', '🚩', '💢', '💎', '👉', '💓', '❗️', '🔴', '🔺', '♦️', '♥️', '👉',
-                           '👈', '🏆', '❤️\u200d🔥']
-        # 符号列表
-        char_list = ['...', '~~']
-        return expression_list, char_list
-
-    @classmethod
-    def repeat_video(cls, log_type, crawler, video_id, env):
-        sql = f""" select * from crawler_video where platform="小年糕" and out_video_id="{video_id}"; """
-        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
-        return len(repeat_video)
-
-    @classmethod
-    def repeat_hour(cls, log_type, crawler, video_id, env):
-        sql = f""" select * from crawler_xiaoniangao_hour where platform="小年糕" and out_video_id="{video_id}"; """
-        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
-        return len(repeat_video)
-
-    # 获取列表
-    @classmethod
-    def get_videoList(cls, log_type, crawler, env):
-        # try:
-        uid_token_dict = cls.uid_token_dict
-        url = "https://kapi.xiaoniangao.cn/trends/get_recommend_trends"
-        headers = {
-            # "x-b3-traceid": cls.hour_x_b3_traceid,
-            "x-b3-traceid": '1c403a4aa72e3c',
-            # "X-Token-Id": cls.hour_x_token_id,
-            "X-Token-Id": 'ab619e96d801f1567388629260aa68ec-1202200806',
-            # "uid": cls.hour_uid,
-            "uid": uid_token_dict['uid'],
-            "content-type": "application/json",
-            "Accept-Encoding": "gzip,compress,br,deflate",
-            "User-Agent": 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)'
-                          ' AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 '
-                          'MicroMessenger/8.0.20(0x18001432) NetType/WIFI Language/zh_CN',
-            # "Referer": cls.hour_referer
-            "Referer": 'https://servicewechat.com/wxd7911e4c177690e4/624/page-frame.html'
-        }
-        data = {
-            "log_params": {
-                "page": "discover_rec",
-                "common": {
-                    "brand": "iPhone",
-                    "device": "iPhone 11",
-                    "os": "iOS 14.7.1",
-                    "weixinver": "8.0.20",
-                    "srcver": "2.24.2",
-                    "net": "wifi",
-                    "scene": 1089
-                }
-            },
-            "qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!750x500r/crop/750x500/interlace/1/format/jpg",
-            "h_qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!80x80r/crop/80x80/interlace/1/format/jpg",
-            "share_width": 625,
-            "share_height": 500,
-            "ext": {
-                "fmid": 0,
-                "items": {}
-            },
-            "app": "xng",
-            "rec_scene": "discover_rec",
-            "log_common_params": {
-                "e": [{
-                    "data": {
-                        "page": "discoverIndexPage",
-                        "topic": "recommend"
-                    },
-                    "ab": {}
-                }],
-                "ext": {
-                    "brand": "iPhone",
-                    "device": "iPhone 11",
-                    "os": "iOS 14.7.1",
-                    "weixinver": "8.0.20",
-                    "srcver": "2.24.3",
-                    "net": "wifi",
-                    "scene": "1089"
-                },
-                "pj": "1",
-                "pf": "2",
-                "session_id": "7bcce313-b57d-4305-8d14-6ebd9a1bad29"
-            },
-            "refresh": False,
-            "token": uid_token_dict["token"],
-            "uid": uid_token_dict["uid"],
-            "proj": "ma",
-            "wx_ver": "8.0.20",
-            "code_ver": "3.62.0"
-        }
-        urllib3.disable_warnings()
-        r = requests.post(url=url, headers=headers, json=data, proxies=proxies, verify=False)
-        if 'data' not in r.text or r.status_code != 200:
-            Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
-            return
-        elif "data" not in r.json():
-            Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()}\n")
-            return
-        elif "list" not in r.json()["data"]:
-            Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']}\n")
-            return
-        elif len(r.json()['data']['list']) == 0:
-            Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']['list']}\n")
-            return
-        else:
-            # 视频列表数据
-            feeds = r.json()["data"]["list"]
-            for i in range(len(feeds)):
-                # 标题,表情随机加在片头、片尾,或替代句子中间的标点符号
-                if "title" in feeds[i]:
-                    befor_video_title = feeds[i]["title"].strip().replace("\n", "") \
-                        .replace("/", "").replace("\r", "").replace("#", "") \
-                        .replace(".", "。").replace("\\", "").replace("&NBSP", "") \
-                        .replace(":", "").replace("*", "").replace("?", "") \
-                        .replace("?", "").replace('"', "").replace("<", "") \
-                        .replace(">", "").replace("|", "").replace(" ", "").replace("#表情", "").replace("#符号","").replace(
-                        '"', '').replace("'", '').replace('"', '').replace("'", '')
-
-                    expression = cls.get_expression()
-                    expression_list = expression[0]
-                    char_list = expression[1]
-                    # 随机取一个表情
-                    expression = random.choice(expression_list)
-                    # 生成标题list[表情+title, title+表情]
-                    expression_title_list = [expression + befor_video_title, befor_video_title + expression]
-                    # 从标题list中随机取一个标题
-                    title_list1 = random.choice(expression_title_list)
-                    # 生成标题:原标题+符号
-                    title_list2 = befor_video_title + random.choice(char_list)
-                    # 表情和标题组合,与标题和符号组合,汇总成待使用的标题列表
-                    title_list4 = [title_list2, title_list1]
-                    # 最终标题
-                    video_title = random.choice(title_list4)
-                else:
-                    video_title = 0
-
-                # 视频 ID
-                if "vid" in feeds[i]:
-                    video_id = feeds[i]["vid"]
-                else:
-                    video_id = 0
-
-                # 播放量
-                if "play_pv" in feeds[i]:
-                    video_play_cnt = feeds[i]["play_pv"]
-                else:
-                    video_play_cnt = 0
-
-                # 点赞量
-                if "favor" in feeds[i]:
-                    video_like_cnt = feeds[i]["favor"]["total"]
-                else:
-                    video_like_cnt = 0
-
-                # 评论数
-                if "comment_count" in feeds[i]:
-                    video_comment_cnt = feeds[i]["comment_count"]
-                else:
-                    video_comment_cnt = 0
-
-                # 分享量
-                if "share" in feeds[i]:
-                    video_share_cnt = feeds[i]["share"]
-                else:
-                    video_share_cnt = 0
-
-                # 时长
-                if "du" in feeds[i]:
-                    video_duration = int(feeds[i]["du"] / 1000)
-                else:
-                    video_duration = 0
-
-                # 宽和高
-                if "w" or "h" in feeds[i]:
-                    video_width = feeds[i]["w"]
-                    video_height = feeds[i]["h"]
-                else:
-                    video_width = 0
-                    video_height = 0
-
-                # 发布时间
-                if "t" in feeds[i]:
-                    video_send_time = feeds[i]["t"]
-                else:
-                    video_send_time = 0
-                publish_time_stamp = int(int(video_send_time) / 1000)
-                publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
-
-                # 用户名 / 头像
-                if "user" in feeds[i]:
-                    user_name = feeds[i]["user"]["nick"].strip().replace("\n", "") \
-                        .replace("/", "").replace("快手", "").replace(" ", "") \
-                        .replace(" ", "").replace("&NBSP", "").replace("\r", "")
-                    head_url = feeds[i]["user"]["hurl"]
-                else:
-                    user_name = 0
-                    head_url = 0
-
-                # 用户 ID
-                profile_id = feeds[i]["id"]
-
-                # 用户 mid
-                profile_mid = feeds[i]["user"]["mid"]
-
-                # 视频封面
-                if "url" in feeds[i]:
-                    cover_url = feeds[i]["url"]
-                else:
-                    cover_url = 0
-
-                # 视频播放地址
-                if "v_url" in feeds[i]:
-                    video_url = feeds[i]["v_url"]
-                else:
-                    video_url = 0
-
-                video_dict = {
-                    "video_title": video_title,
-                    "video_id": video_id,
-                    "duration": video_duration,
-                    "play_cnt": video_play_cnt,
-                    "like_cnt": video_like_cnt,
-                    "comment_cnt": video_comment_cnt,
-                    "share_cnt": video_share_cnt,
-                    "user_name": user_name,
-                    "publish_time_stamp": publish_time_stamp,
-                    "publish_time_str": publish_time_str,
-                    "video_width": video_width,
-                    "video_height": video_height,
-                    "avatar_url": head_url,
-                    "profile_id": profile_id,
-                    "profile_mid": profile_mid,
-                    "cover_url": cover_url,
-                    "video_url": video_url,
-                    "session": f"xiaoniangao-hour-{int(time.time())}"
-                }
-                for k, v in video_dict.items():
-                    Common.logger(log_type, crawler).info(f"{k}:{v}")
-
-                # 过滤无效视频
-                if video_title == 0 or video_id == 0 or video_duration == 0 \
-                        or video_send_time == 0 or user_name == 0 or head_url == 0 \
-                        or cover_url == 0 or video_url == 0:
-                    Common.logger(log_type, crawler).warning("无效视频\n")
-                # 抓取基础规则过滤
-                elif cls.download_rule(video_dict) is False:
-                    Common.logger(log_type, crawler).info("不满足基础门槛规则\n")
-                elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
-                    Common.logger(log_type, crawler).info('视频已下载\n')
-                elif any(str(word) if str(word) in video_dict['video_title'] else False for word in
-                         get_config_from_mysql(log_type=log_type,
-                                               source=crawler,
-                                               env=env,
-                                               text="filter",
-                                               action="")) is True:
-                    Common.logger(log_type, crawler).info("视频已中过滤词\n")
-                    time.sleep(1)
-                else:
-                    # 写入飞书小时级feeds数据库表
-                    insert_sql = f""" insert into crawler_xiaoniangao_hour(profile_id,
-                    profile_mid,
-                    platform,
-                    out_video_id,
-                    video_title,
-                    user_name,
-                    cover_url,
-                    video_url,
-                    duration,
-                    publish_time,
-                    play_cnt,
-                    crawler_time_stamp,
-                    crawler_time)
-                    values({profile_id},
-                    {profile_mid},
-                    "{cls.platform}",
-                    "{video_id}",
-                    "{video_title}",
-                    "{user_name}",
-                    "{cover_url}",
-                    "{video_url}",
-                    {video_duration},
-                    "{publish_time_str}",
-                    {video_play_cnt},
-                    {int(time.time())},
-                    "{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time())))}"
-                    )"""
-                    Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
-                    MysqlHelper.update_values(log_type, crawler, insert_sql, env)
-                    Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
-
-    @classmethod
-    def get_video_info(cls, log_type, crawler, p_id, p_mid, v_title, v_id):
-        # try:
-        uid_token_dict = cls.uid_token_dict
-        url = "https://kapi.xiaoniangao.cn/profile/get_profile_by_id"
-        headers = {
-            # "x-b3-traceid": cls.hour_x_b3_traceid,
-            "x-b3-traceid": '1c403a4aa72e3c',
-            # "X-Token-Id": cls.hour_x_token_id,
-            "X-Token-Id": 'ab619e96d801f1567388629260aa68ec-1202200806',
-            "uid": uid_token_dict['uid'],
-            "content-type": "application/json",
-            "Accept-Encoding": "gzip,compress,br,deflate",
-            "User-Agent": 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)'
-                          ' AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 '
-                          'MicroMessenger/8.0.20(0x18001432) NetType/WIFI Language/zh_CN',
-            # "Referer": cls.hour_referer
-            "Referer": 'https://servicewechat.com/wxd7911e4c177690e4/624/page-frame.html'
-        }
-        data = {
-            "play_src": "1",
-            "profile_id": int(p_id),
-            "profile_mid": int(p_mid),
-            "qs": "imageMogr2/gravity/center/rotate/$/thumbnail/"
-                  "!400x400r/crop/400x400/interlace/1/format/jpg",
-            "h_qs": "imageMogr2/gravity/center/rotate/$/thumbnail"
-                    "/!80x80r/crop/80x80/interlace/1/format/jpg",
-            "share_width": 625,
-            "share_height": 500,
-            "no_comments": True,
-            "no_follow": True,
-            "vid": v_id,
-            "hot_l1_comment": True,
-            # "token": cls.hour_token,
-            "token": uid_token_dict['token'],
-            # "uid": cls.hour_uid,
-            "uid": uid_token_dict['uid'],
-            "proj": "ma",
-            "wx_ver": "8.0.20",
-            "code_ver": "3.62.0",
-            "log_common_params": {
-                "e": [{
-                    "data": {
-                        "page": "dynamicSharePage"
-                    }
-                }],
-                "ext": {
-                    "brand": "iPhone",
-                    "device": "iPhone 11",
-                    "os": "iOS 14.7.1",
-                    "weixinver": "8.0.20",
-                    "srcver": "2.24.3",
-                    "net": "wifi",
-                    "scene": "1089"
-                },
-                "pj": "1",
-                "pf": "2",
-                "session_id": "7bcce313-b57d-4305-8d14-6ebd9a1bad29"
-            }
-        }
-        urllib3.disable_warnings()
-        r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False)
-        if r.status_code != 200 or 'data' not in r.text:
-            Common.logger(log_type, crawler).warning(f"get_videoInfo:{r.text}\n")
-        else:
-            hour_play_cnt = r.json()["data"]["play_pv"]
-            hour_cover_url = r.json()["data"]["url"]
-            hour_video_url = r.json()["data"]["v_url"]
-            hour_video_duration = r.json()["data"]["du"]
-            hour_video_comment_cnt = r.json()["data"]["comment_count"]
-            hour_video_like_cnt = r.json()["data"]["favor"]["total"]
-            hour_video_share_cnt = r.json()["data"]["share"]
-            hour_video_width = r.json()["data"]["w"]
-            hour_video_height = r.json()["data"]["h"]
-            hour_video_send_time = r.json()["data"]["t"]
-            publish_time_stamp = int(int(hour_video_send_time) / 1000)
-            publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
-            hour_user_name = r.json()["data"]["user"]["nick"]
-            hour_head_url = r.json()["data"]["user"]["hurl"]
-            video_info_dict = {
-                "video_id": v_id,
-                "video_title": v_title,
-                "duration": hour_video_duration,
-                "play_cnt": hour_play_cnt,
-                "like_cnt": hour_video_like_cnt,
-                "comment_cnt": hour_video_comment_cnt,
-                "share_cnt": hour_video_share_cnt,
-                "user_name": hour_user_name,
-                "publish_time_stamp": publish_time_stamp,
-                "publish_time_str": publish_time_str,
-                "video_width": hour_video_width,
-                "video_height": hour_video_height,
-                "avatar_url": hour_head_url,
-                "profile_id": p_id,
-                "profile_mid": p_mid,
-                "cover_url": hour_cover_url,
-                "video_url": hour_video_url,
-                "session": f"xiaoniangao-hour-{int(time.time())}"
-            }
-            return video_info_dict
-
-    # 更新小时榜数据
-    @classmethod
-    def update_videoList(cls, log_type, crawler, strategy, oss_endpoint, env):
-        """
-        更新小时榜数据
-        """
-        # try:
-        befor_yesterday = (datetime.date.today() + datetime.timedelta(days=-3)).strftime("%Y-%m-%d %H:%M:%S")
-        update_time_stamp = int(time.mktime(time.strptime(befor_yesterday, "%Y-%m-%d %H:%M:%S")))
-        select_sql = f""" select * from crawler_xiaoniangao_hour where crawler_time_stamp >= {update_time_stamp} GROUP BY out_video_id """
-        update_video_list = MysqlHelper.get_values(log_type, crawler, select_sql, env)
-        if len(update_video_list) == 0:
-            Common.logger(log_type, crawler).info("暂无需要更新的小时榜数据\n")
-            return
-        for update_video_info in update_video_list:
-            profile_id = update_video_info["profile_id"]
-            profile_mid = update_video_info["profile_mid"]
-            video_title = update_video_info["video_title"]
-            video_id = update_video_info["out_video_id"]
-            if datetime.datetime.now().hour == 10 and datetime.datetime.now().minute <= 10:
-                video_info_dict = cls.get_video_info(log_type=log_type,
-                                                     crawler=crawler,
-                                                     p_id=profile_id,
-                                                     p_mid=profile_mid,
-                                                     v_title=video_title,
-                                                     v_id=video_id)
-                ten_play_cnt = video_info_dict['play_cnt']
-                Common.logger(log_type, crawler).info(f"ten_play_cnt:{ten_play_cnt}")
-                update_sql = f""" update crawler_xiaoniangao_hour set ten_play_cnt={ten_play_cnt} WHERE out_video_id="{video_id}"; """
-                # Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
-                MysqlHelper.update_values(log_type, crawler, update_sql, env)
-                cls.download_publish(log_type, crawler, video_info_dict, update_video_info, strategy, oss_endpoint,
-                                     env)
-            elif datetime.datetime.now().hour == 15 and datetime.datetime.now().minute <= 10:
-                video_info_dict = cls.get_video_info(log_type=log_type,
-                                                     crawler=crawler,
-                                                     p_id=profile_id,
-                                                     p_mid=profile_mid,
-                                                     v_title=video_title,
-                                                     v_id=video_id)
-                fifteen_play_cnt = video_info_dict['play_cnt']
-                Common.logger(log_type, crawler).info(f"fifteen_play_cnt:{fifteen_play_cnt}")
-                update_sql = f""" update crawler_xiaoniangao_hour set fifteen_play_cnt={fifteen_play_cnt} WHERE out_video_id="{video_id}"; """
-                # Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
-                MysqlHelper.update_values(log_type, crawler, update_sql, env)
-                cls.download_publish(log_type, crawler, video_info_dict, update_video_info, strategy, oss_endpoint,
-                                     env)
-            elif datetime.datetime.now().hour == 20 and datetime.datetime.now().minute <= 10:
-                video_info_dict = cls.get_video_info(log_type=log_type,
-                                                     crawler=crawler,
-                                                     p_id=profile_id,
-                                                     p_mid=profile_mid,
-                                                     v_title=video_title,
-                                                     v_id=video_id)
-                twenty_play_cnt = video_info_dict['play_cnt']
-                Common.logger(log_type, crawler).info(f"twenty_play_cnt:{twenty_play_cnt}")
-                update_sql = f""" update crawler_xiaoniangao_hour set twenty_play_cnt={twenty_play_cnt} WHERE out_video_id="{video_id}"; """
-                # Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
-                MysqlHelper.update_values(log_type, crawler, update_sql, env)
-                cls.download_publish(log_type, crawler, video_info_dict, update_video_info, strategy, oss_endpoint,
-                                     env)
-            else:
-                pass
-
-    @classmethod
-    def download(cls, log_type, crawler, video_info_dict, strategy, oss_endpoint, env):
-        # 下载封面
-        Common.download_method(log_type=log_type, crawler=crawler, text="cover", title=video_info_dict["video_title"],
-                               url=video_info_dict["cover_url"])
-        # 下载视频
-        Common.download_method(log_type=log_type, crawler=crawler, text="video", title=video_info_dict["video_title"],
-                               url=video_info_dict["video_url"])
-        # 保存视频信息至 "./videos/{download_video_title}/info.txt"
-        Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_info_dict)
-
-        # 上传视频
-        Common.logger(log_type, crawler).info("开始上传视频...")
-        our_video_id = Publish.upload_and_publish(log_type=log_type,
-                                                  crawler=crawler,
-                                                  strategy=strategy,
-                                                  our_uid="hour",
-                                                  env=env,
-                                                  oss_endpoint=oss_endpoint)
-        if env == "dev":
-            our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-        else:
-            our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-        Common.logger(log_type, crawler).info("视频上传完成")
-
-        if our_video_id is None:
-            # 删除视频文件夹
-            shutil.rmtree(f"./{crawler}/videos/{video_info_dict['video_title']}")
-            return
-
-        # 视频信息保存数据库
-        rule_dict = {
-            "duration": {"min": 40},
-            "play_cnt": {"min": 4000},
-            "publish_day": {"min": 10}
-        }
-
-        insert_sql = f""" insert into crawler_video(video_id,
-                                                        out_user_id,
-                                                        platform,
-                                                        strategy,
-                                                        out_video_id,
-                                                        video_title,
-                                                        cover_url,
-                                                        video_url,
-                                                        duration,
-                                                        publish_time,
-                                                        play_cnt,
-                                                        crawler_rule,
-                                                        width,
-                                                        height)
-                                                        values({our_video_id},
-                                                        "{video_info_dict['profile_id']}",
-                                                        "{cls.platform}",
-                                                        "小时榜爬虫策略",
-                                                        "{video_info_dict['video_id']}",
-                                                        "{video_info_dict['video_title']}",
-                                                        "{video_info_dict['cover_url']}",
-                                                        "{video_info_dict['video_url']}",
-                                                        {int(video_info_dict['duration'])},
-                                                        "{video_info_dict['publish_time_str']}",
-                                                        {int(video_info_dict['play_cnt'])},
-                                                        '{json.dumps(rule_dict)}',
-                                                        {int(video_info_dict['video_width'])},
-                                                        {int(video_info_dict['video_height'])}) """
-        Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
-        MysqlHelper.update_values(log_type, crawler, insert_sql, env)
-        Common.logger(log_type, crawler).info('视频信息插入数据库成功!')
-
-        # 视频写入飞书
-        Feishu.insert_columns(log_type, crawler, "yatRv2", "ROWS", 1, 2)
-        # 视频ID工作表,首行写入数据
-        upload_time = int(time.time())
-        values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
-                   "小时级上升榜",
-                   str(video_info_dict['video_id']),
-                   str(video_info_dict['video_title']),
-                   our_video_link,
-                   video_info_dict['play_cnt'],
-                   video_info_dict['comment_cnt'],
-                   video_info_dict['like_cnt'],
-                   video_info_dict['share_cnt'],
-                   video_info_dict['duration'],
-                   f"{video_info_dict['video_width']}*{video_info_dict['video_height']}",
-                   str(video_info_dict['publish_time_str'].replace("-", "/")),
-                   str(video_info_dict['user_name']),
-                   str(video_info_dict['profile_id']),
-                   str(video_info_dict['profile_mid']),
-                   str(video_info_dict['avatar_url']),
-                   str(video_info_dict['cover_url']),
-                   str(video_info_dict['video_url'])]]
-        time.sleep(1)
-        Feishu.update_values(log_type, crawler, "yatRv2", "F2:Z2", values)
-        Common.logger(log_type, crawler).info('视频信息写入飞书成功\n')
-
-    # 下载/上传
-    @classmethod
-    def download_publish(cls, log_type, crawler, video_info_dict, update_video_info, strategy, oss_endpoint, env):
-        # try:
-        if cls.repeat_video(log_type, crawler, video_info_dict["video_id"], env) != 0:
-            Common.logger(log_type, crawler).info('视频已下载\n')
-        # 播放量大于 50000,直接下载
-        elif int(video_info_dict["play_cnt"]) >= 30000:
-            Common.logger(log_type, crawler).info(
-                f"播放量:{video_info_dict['play_cnt']} >= 30000,满足下载规则,开始下载视频")
-            cls.download(log_type, crawler, video_info_dict, strategy, oss_endpoint, env)
-
-        # 上升榜判断逻辑,任意时间段上升量>=5000,连续两个时间段上升量>=2000
-        elif int(update_video_info['ten_play_cnt']) >= 3000 or int(
-                update_video_info['fifteen_play_cnt']) >= 3000 or int(update_video_info['twenty_play_cnt']) >= 3000:
-            Common.logger(log_type, crawler).info(
-                f"10:00 or 15:00 or 20:00 数据上升量:{int(update_video_info['ten_play_cnt'])} or {int(update_video_info['fifteen_play_cnt'])} or {int(update_video_info['twenty_play_cnt'])} >= 3000")
-            Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
-            cls.download(log_type, crawler, video_info_dict, strategy, oss_endpoint, env)
-
-        elif int(update_video_info['ten_play_cnt']) >= 1000 and int(update_video_info['fifteen_play_cnt']) >= 1000:
-            Common.logger(log_type, crawler).info(
-                f"10:00 and 15:00 数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['fifteen_play_cnt'])} >= 1000")
-            Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
-            cls.download(log_type, crawler, video_info_dict, strategy, oss_endpoint, env)
-
-        elif int(update_video_info['fifteen_play_cnt']) >= 1000 and int(update_video_info['twenty_play_cnt']) >= 1000:
-            Common.logger(log_type, crawler).info(
-                f"15:00 and 20:00 数据上升量:{int(update_video_info['fifteen_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 1000")
-            Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
-            cls.download(log_type, crawler, video_info_dict, strategy, oss_endpoint, env)
-
-        elif int(update_video_info['ten_play_cnt']) >= 1000 and int(update_video_info['twenty_play_cnt']) >= 1000:
-            Common.logger(log_type, crawler).info(
-                f"今日10:00 / 20:00数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 1000")
-            Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
-            cls.download(log_type, crawler, video_info_dict, strategy, oss_endpoint, env)
-
-        else:
-            Common.logger(log_type, crawler).info("上升量不满足下载规则")
-
-
-if __name__ == "__main__":
-    # print(XiaoniangaoHour.get_expression())
-    # print(XiaoniangaoHour.get_uid_token())
-    # XiaoniangaoHour.get_videoList("test", "xiaoniangao", "dev")
-    # XiaoniangaoHour.update_videoList("test", "xiaoniangao", "小时榜爬虫策略", "out", "dev")
-    # befor_yesterday = (datetime.date.today() + datetime.timedelta(days=-3)).strftime("%Y-%m-%d %H:%M:%S")
-    # update_time_stamp = int(time.mktime(time.strptime(befor_yesterday, "%Y-%m-%d %H:%M:%S")))
-    # print(update_time_stamp)
-    # print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))))
-    print(XiaoniangaoHour.uid_token_dict)
-    pass

+ 0 - 43
xiaoniangao/xiaoniangao_main/run_xiaoniangao_author_scheduling.py

@@ -1,43 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/4/20
-import argparse
-import os
-import sys
-sys.path.append(os.getcwd())
-from common.public import task_fun
-from common.common import Common
-from common.scheduling_db import MysqlHelper
-from xiaoniangao.xiaoniangao_author.xiaoniangao_author_scheduling import XiaoniangaoAuthorScheduling
-
-
-def main(log_type, crawler, task, env):
-    task_dict = task_fun(task)['task_dict']
-    rule_dict = task_fun(task)['rule_dict']
-    task_id = task_dict['task_id']
-    select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
-    user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
-    Common.logger(log_type, crawler).info(f"调度任务:\n{task_dict}")
-    Common.logger(log_type, crawler).info(f"抓取规则:\n{rule_dict}")
-    Common.logger(log_type, crawler).info(f"用户列表:\n{user_list}")
-    Common.logger(log_type, crawler).info('开始抓取 小年糕 定向榜\n')
-    XiaoniangaoAuthorScheduling.get_author_videos(log_type=log_type,
-                                                  crawler=crawler,
-                                                  user_list=user_list,
-                                                  rule_dict=rule_dict,
-                                                  env=env)
-    Common.del_logs(log_type, crawler)
-    Common.logger(log_type, crawler).info('抓取完一轮\n')
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
-    parser.add_argument('--crawler')  ## 添加参数
-    parser.add_argument('--task')  ## 添加参数
-    parser.add_argument('--env')  ## 添加参数
-    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
-    main(log_type=args.log_type,
-         crawler=args.crawler,
-         task=args.task,
-         env=args.env)

+ 0 - 35
xiaoniangao/xiaoniangao_main/run_xiaoniangao_follow.py

@@ -1,35 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/3/13
-import argparse
-import os
-import sys
-sys.path.append(os.getcwd())
-from common.common import Common
-from xiaoniangao.xiaoniangao_follow.xiaoniangao_follow import XiaoniangaoFollow
-
-
-def main(log_type, crawler, env):
-    if env == "dev":
-        oss_endpoint = "out"
-    else:
-        oss_endpoint = "inner"
-    Common.logger(log_type, crawler).info('开始抓取 小年糕 定向榜\n')
-    XiaoniangaoFollow.get_follow_videos(log_type=log_type,
-                                        crawler=crawler,
-                                        strategy="定向爬虫策略",
-                                        oss_endpoint=oss_endpoint,
-                                        env=env)
-    Common.del_logs(log_type, crawler)
-    Common.logger(log_type, crawler).info('抓取完一轮\n')
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
-    parser.add_argument('--crawler')  ## 添加参数
-    parser.add_argument('--env')  ## 添加参数
-    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
-    main(log_type=args.log_type,
-         crawler=args.crawler,
-         env=args.env)

+ 0 - 55
xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour.py

@@ -1,55 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/3/15
-import argparse
-import datetime
-import os
-import sys
-sys.path.append(os.getcwd())
-from common.common import Common
-from xiaoniangao.xiaoniangao_hour.xiaoniangao_hour import XiaoniangaoHour
-
-
-def main(log_type, crawler, env):
-    if env == "dev":
-        oss_endpoint = "out"
-    else:
-        oss_endpoint = "inner"
-    # 获取符合规则的视频,写入小时级数据_feeds
-    XiaoniangaoHour.get_videoList(log_type, crawler, env)
-    now = datetime.datetime.now()
-    if now.hour == 10 and 0 <= now.minute <= 10:
-        Common.logger(log_type, crawler).info("开始更新/下载上升榜")
-        XiaoniangaoHour.update_videoList(log_type=log_type,
-                                         crawler=crawler,
-                                         strategy="小时榜爬虫策略",
-                                         oss_endpoint=oss_endpoint,
-                                         env=env)
-
-    elif now.hour == 15 and now.minute <= 10:
-        Common.logger(log_type, crawler).info("开始更新/下载上升榜")
-        XiaoniangaoHour.update_videoList(log_type=log_type,
-                                         crawler=crawler,
-                                         strategy="小时榜爬虫策略",
-                                         oss_endpoint=oss_endpoint,
-                                         env=env)
-
-    elif now.hour == 20 and now.minute <= 10:
-        Common.logger(log_type, crawler).info("开始更新/下载上升榜")
-        XiaoniangaoHour.update_videoList(log_type=log_type,
-                                         crawler=crawler,
-                                         strategy="小时榜爬虫策略",
-                                         oss_endpoint=oss_endpoint,
-                                         env=env)
-    Common.del_logs(log_type, crawler)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
-    parser.add_argument('--crawler')  ## 添加参数
-    parser.add_argument('--env')  ## 添加参数
-    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
-    main(log_type=args.log_type,
-         crawler=args.crawler,
-         env=args.env)

+ 0 - 74
xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour_scheduling.py

@@ -1,74 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/3/15
-import argparse
-import datetime
-import os
-import random
-import sys
-sys.path.append(os.getcwd())
-from common.scheduling_db import MysqlHelper
-from common.common import Common
-from common.public import task_fun
-from xiaoniangao.xiaoniangao_hour.xiaoniangao_hour_scheduling import XiaoniangaoHourScheduling
-
-
-def main(log_type, crawler, task, env):
-    task_dict = task_fun(task)['task_dict']
-    rule_dict = task_fun(task)['rule_dict']
-    task_id = task_dict['task_id']
-    select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
-    user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
-    our_uid_list = []
-    for user in user_list:
-        our_uid_list.append(user["uid"])
-    our_uid = random.choice(our_uid_list)
-    Common.logger(log_type, crawler).info(f"调度任务:\n{task_dict}")
-    Common.logger(log_type, crawler).info(f"抓取规则:\n{rule_dict}")
-    Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["task_name"]}\n')
-    # 获取符合规则的视频,写入小时级数据_feeds
-    for i in range(1, 101):
-        try:
-            Common.logger(log_type, crawler).info(f"正在抓取第{i}页")
-            XiaoniangaoHourScheduling.get_videoList(log_type, crawler, rule_dict, env)
-        except Exception as e:
-            Common.logger(log_type, crawler).info(f"抓取第{i}页时异常:{e}\n")
-    now = datetime.datetime.now()
-    if now.hour == 10 and 0 <= now.minute <= 10:
-        Common.logger(log_type, crawler).info("开始更新/下载上升榜")
-        XiaoniangaoHourScheduling.update_videoList(log_type=log_type,
-                                                   crawler=crawler,
-                                                   rule_dict=rule_dict,
-                                                   our_uid=our_uid,
-                                                   env=env)
-
-    elif now.hour == 15 and now.minute <= 10:
-        Common.logger(log_type, crawler).info("开始更新/下载上升榜")
-        XiaoniangaoHourScheduling.update_videoList(log_type=log_type,
-                                                   crawler=crawler,
-                                                   rule_dict=rule_dict,
-                                                   our_uid=our_uid,
-                                                   env=env)
-
-    elif now.hour == 20 and now.minute <= 10:
-        Common.logger(log_type, crawler).info("开始更新/下载上升榜")
-        XiaoniangaoHourScheduling.update_videoList(log_type=log_type,
-                                                   crawler=crawler,
-                                                   rule_dict=rule_dict,
-                                                   our_uid=our_uid,
-                                                   env=env)
-    Common.del_logs(log_type, crawler)
-    Common.logger(log_type, crawler).info("抓取完一轮\n")
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
-    parser.add_argument('--crawler')  ## 添加参数
-    parser.add_argument('--task')  ## 添加参数
-    parser.add_argument('--env')  ## 添加参数
-    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
-    main(log_type=args.log_type,
-         crawler=args.crawler,
-         task=args.task,
-         env=args.env)

+ 0 - 36
xiaoniangao/xiaoniangao_main/run_xiaoniangao_play.py

@@ -1,36 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/3/16
-import argparse
-import os
-import sys
-sys.path.append(os.getcwd())
-from common.common import Common
-from xiaoniangao.xiaoniangao_play.xiaoniangao_play import XiaoniangaoPlay
-
-
-class Main:
-    @classmethod
-    def main(cls, log_type, crawler, env):
-        if env == "dev":
-            oss_endpoint = "out"
-        else:
-            oss_endpoint = "inner"
-        for i in range(100):
-            Common.logger(log_type, crawler).info(f'正在抓取小年糕播放量榜,第{i+1}页\n')
-            XiaoniangaoPlay.get_videoList(log_type=log_type,
-                                          crawler=crawler,
-                                          strategy="播放量榜爬虫策略",
-                                          oss_endpoint=oss_endpoint,
-                                          env=env)
-        Common.del_logs(log_type, crawler)
-        Common.logger(log_type, crawler).info('抓取完一轮\n')
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
-    parser.add_argument('--crawler')  ## 添加参数
-    parser.add_argument('--env')  ## 添加参数
-    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
-    Main.main(log_type=args.log_type, crawler=args.crawler, env=args.env)

+ 0 - 48
xiaoniangao/xiaoniangao_main/run_xiaoniangao_play_scheduling.py

@@ -1,48 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/4/21
-import argparse
-import os
-import random
-import sys
-sys.path.append(os.getcwd())
-from common.common import Common
-from common.public import task_fun
-from common.scheduling_db import MysqlHelper
-from xiaoniangao.xiaoniangao_play.xiaoniangao_play_scheduling import XiaoniangaoplayScheduling
-
-
-def main(log_type, crawler, task, env):
-    task_dict = task_fun(task)['task_dict']
-    rule_dict = task_fun(task)['rule_dict']
-    task_id = task_dict['task_id']
-    select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
-    user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
-    our_uid_list = []
-    for user in user_list:
-        our_uid_list.append(user["uid"])
-    our_uid = random.choice(our_uid_list)
-    Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
-    Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
-    # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
-    Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["task_name"]}\n')
-    XiaoniangaoplayScheduling.get_videoList(log_type=log_type,
-                                            crawler=crawler,
-                                            rule_dict=rule_dict,
-                                            our_uid=our_uid,
-                                            env=env)
-    Common.del_logs(log_type, crawler)
-    Common.logger(log_type, crawler).info('抓取任务结束\n')
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
-    parser.add_argument('--log_type', default='recommend')  ## 添加参数,注明参数类型
-    parser.add_argument('--crawler', default='kuaishou')  ## 添加参数
-    parser.add_argument('--task')  ## 添加参数
-    parser.add_argument('--env', default='prod')  ## 添加参数
-    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
-    main(log_type=args.log_type,
-         crawler=args.crawler,
-         task=args.task,
-         env=args.env)

+ 0 - 430
xiaoniangao/xiaoniangao_play/xiaoniangao_play.py

@@ -1,430 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/3/16
-import json
-import os
-import random
-import shutil
-import sys
-import time
-import requests
-import urllib3
-sys.path.append(os.getcwd())
-from common.common import Common
-from common.feishu import Feishu
-from common.publish import Publish
-from common.public import get_config_from_mysql
-from common.scheduling_db import MysqlHelper
-proxies = {"http": None, "https": None}
-
-
-class XiaoniangaoPlay:
-    platform = "小年糕"
-
-    words = "abcdefghijklmnopqrstuvwxyz0123456789"
-    uid = f"""{"".join(random.sample(words, 8))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 12))}"""
-    token = "".join(random.sample(words, 32))
-    uid_token_dict = {
-        "uid": uid,
-        "token": token
-    }
-
-    # 生成 uid、token
-    @classmethod
-    def get_uid_token(cls):
-        words = "abcdefghijklmnopqrstuvwxyz0123456789"
-        uid = f"""{"".join(random.sample(words, 8))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 12))}"""
-        token = "".join(random.sample(words, 32))
-        uid_token_dict = {
-            "uid": uid,
-            "token": token
-        }
-        return uid_token_dict
-
-    # 基础门槛规则
-    @classmethod
-    def download_rule(cls, video_dict):
-        """
-        下载视频的基本规则
-        :param video_dict: 视频信息,字典格式
-        :return: 满足规则,返回 True;反之,返回 False
-        """
-        # 视频时长
-        if int(float(video_dict['duration'])) >= 40:
-            # 宽或高
-            if int(video_dict['video_width']) >= 0 or int(video_dict['video_height']) >= 0:
-                # 播放量
-                if int(video_dict['play_cnt']) >= 20000:
-                    # 点赞量
-                    if int(video_dict['like_cnt']) >= 0:
-                        # 分享量
-                        if int(video_dict['share_cnt']) >= 0:
-                            # 发布时间 <= 60 天
-                            if int(time.time()) - int(video_dict['publish_time_stamp']) <= 3600 * 24 * 60:
-                                return True
-                            else:
-                                return False
-                        else:
-                            return False
-                    else:
-                        return False
-                else:
-                    return False
-            return False
-        return False
-
-    # 获取表情及符号
-    @classmethod
-    def get_expression(cls):
-        # 表情列表
-        expression_list = ['📍', '⭕️', '🔥', '📣', '🎈', '⚡', '🔔', '🚩', '💢', '💎', '👉', '💓', '❗️', '🔴', '🔺', '♦️', '♥️', '👉', '👈', '🏆', '❤️\u200d🔥']
-        # 符号列表
-        char_list = ['...', '~~']
-        return expression_list, char_list
-
-    # 获取列表
-    @classmethod
-    def get_videoList(cls, log_type, crawler, strategy, oss_endpoint, env):
-        uid_token_dict = cls.uid_token_dict
-        url = "https://kapi.xiaoniangao.cn/trends/get_recommend_trends"
-        headers = {
-            "x-b3-traceid": '1dc0a6d0929a2b',
-            "X-Token-Id": 'ae99a4953804085ebb0ae36fa138031d-1146052582',
-            "uid": uid_token_dict['uid'],
-            "content-type": "application/json",
-            "Accept-Encoding": "gzip,compress,br,deflate",
-            "User-Agent": 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)'
-                          ' AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 '
-                          'MicroMessenger/8.0.20(0x18001432) NetType/WIFI Language/zh_CN',
-            "Referer": 'https://servicewechat.com/wxd7911e4c177690e4/620/page-frame.html'
-        }
-        data = {
-            "log_params": {
-                "page": "discover_rec",
-                "common": {
-                    "brand": "iPhone",
-                    "device": "iPhone 11",
-                    "os": "iOS 14.7.1",
-                    "weixinver": "8.0.20",
-                    "srcver": "2.24.2",
-                    "net": "wifi",
-                    "scene": 1089
-                }
-            },
-            "qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!750x500r/crop/750x500/interlace/1/format/jpg",
-            "h_qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!80x80r/crop/80x80/interlace/1/format/jpg",
-            "share_width": 625,
-            "share_height": 500,
-            "ext": {
-                "fmid": 0,
-                "items": {}
-            },
-            "app": "xng",
-            "rec_scene": "discover_rec",
-            "log_common_params": {
-                "e": [{
-                    "data": {
-                        "page": "discoverIndexPage",
-                        "topic": "recommend"
-                    },
-                    "ab": {}
-                }],
-                "ext": {
-                    "brand": "iPhone",
-                    "device": "iPhone 11",
-                    "os": "iOS 14.7.1",
-                    "weixinver": "8.0.20",
-                    "srcver": "2.24.3",
-                    "net": "wifi",
-                    "scene": "1089"
-                },
-                "pj": "1",
-                "pf": "2",
-                "session_id": "7bcce313-b57d-4305-8d14-6ebd9a1bad29"
-            },
-            "refresh": False,
-            "token": uid_token_dict['token'],
-            "uid": uid_token_dict['uid'],
-            "proj": "ma",
-            "wx_ver": "8.0.20",
-            "code_ver": "3.62.0"
-        }
-        urllib3.disable_warnings()
-        r = requests.post(url=url, headers=headers, json=data, proxies=proxies, verify=False)
-        if "data" not in r.text or r.status_code != 200:
-            Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}")
-            return
-        elif "data" not in r.json():
-            Common.logger(log_type, crawler).info(f"get_videoList:{r.json()}")
-            return
-        elif "list" not in r.json()["data"]:
-            Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']}")
-            return
-        elif len(r.json()["data"]["list"]) == 0:
-            Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']['list']}")
-            return
-        else:
-            # 视频列表数据
-            feeds = r.json()["data"]["list"]
-            for i in range(len(feeds)):
-                # 标题,表情随机加在片头、片尾,或替代句子中间的标点符号
-                if "title" in feeds[i]:
-                    befor_video_title = feeds[i]["title"].strip().replace("\n", "") \
-                        .replace("/", "").replace("\r", "").replace("#", "") \
-                        .replace(".", "。").replace("\\", "").replace("&NBSP", "") \
-                        .replace(":", "").replace("*", "").replace("?", "") \
-                        .replace("?", "").replace('"', "").replace("<", "") \
-                        .replace(">", "").replace("|", "").replace(" ", "").replace("#表情", "").replace("#符号", "").replace('"' ,'').replace("'", '')
-
-                    expression = cls.get_expression()
-                    expression_list = expression[0]
-                    char_list = expression[1]
-                    # 随机取一个表情
-                    expression = random.choice(expression_list)
-                    # 生成标题list[表情+title, title+表情]
-                    expression_title_list = [expression + befor_video_title, befor_video_title + expression]
-                    # 从标题list中随机取一个标题
-                    title_list1 = random.choice(expression_title_list)
-                    # 生成标题:原标题+符号
-                    title_list2 = befor_video_title + random.choice(char_list)
-                    # 表情和标题组合,与标题和符号组合,汇总成待使用的标题列表
-                    title_list4 = [title_list2, title_list1]
-                    # 最终标题
-                    video_title = random.choice(title_list4)
-                else:
-                    video_title = 0
-
-                # 视频 ID
-                if "vid" in feeds[i]:
-                    video_id = feeds[i]["vid"]
-                else:
-                    video_id = 0
-
-                # 播放量
-                if "play_pv" in feeds[i]:
-                    video_play_cnt = feeds[i]["play_pv"]
-                else:
-                    video_play_cnt = 0
-
-                # 评论量
-                if "comment_count" in feeds[i]:
-                    video_comment_cnt = feeds[i]["comment_count"]
-                else:
-                    video_comment_cnt = 0
-
-                # 点赞量
-                if "favor" in feeds[i]:
-                    video_like_cnt = feeds[i]["favor"]["total"]
-                else:
-                    video_like_cnt = 0
-
-                # 分享量
-                if "share" in feeds[i]:
-                    video_share_cnt = feeds[i]["share"]
-                else:
-                    video_share_cnt = 0
-
-                # 时长
-                if "du" in feeds[i]:
-                    video_duration = int(feeds[i]["du"] / 1000)
-                else:
-                    video_duration = 0
-
-                # 宽和高
-                if "w" or "h" in feeds[i]:
-                    video_width = feeds[i]["w"]
-                    video_height = feeds[i]["h"]
-                else:
-                    video_width = 0
-                    video_height = 0
-
-                # 发布时间
-                if "t" in feeds[i]:
-                    video_send_time = feeds[i]["t"]
-                else:
-                    video_send_time = 0
-                publish_time_stamp = int(int(video_send_time)/1000)
-                publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
-
-                # 用户名 / 头像
-                if "user" in feeds[i]:
-                    user_name = feeds[i]["user"]["nick"].strip().replace("\n", "") \
-                        .replace("/", "").replace("快手", "").replace(" ", "") \
-                        .replace(" ", "").replace("&NBSP", "").replace("\r", "")
-                    head_url = feeds[i]["user"]["hurl"]
-                else:
-                    user_name = 0
-                    head_url = 0
-
-                # 用户 ID
-                profile_id = feeds[i]["id"]
-
-                # 用户 mid
-                profile_mid = feeds[i]["user"]["mid"]
-
-                # 视频封面
-                if "url" in feeds[i]:
-                    cover_url = feeds[i]["url"]
-                else:
-                    cover_url = 0
-
-                # 视频播放地址
-                if "v_url" in feeds[i]:
-                    video_url = feeds[i]["v_url"]
-                else:
-                    video_url = 0
-
-                video_dict = {
-                    "video_title": video_title,
-                    "video_id": video_id,
-                    "duration": video_duration,
-                    "play_cnt": video_play_cnt,
-                    "like_cnt": video_like_cnt,
-                    "comment_cnt": video_comment_cnt,
-                    "share_cnt": video_share_cnt,
-                    "user_name": user_name,
-                    "publish_time_stamp": publish_time_stamp,
-                    "publish_time_str": publish_time_str,
-                    "video_width": video_width,
-                    "video_height": video_height,
-                    "avatar_url": head_url,
-                    "profile_id": profile_id,
-                    "profile_mid": profile_mid,
-                    "cover_url": cover_url,
-                    "video_url": video_url,
-                    "session": f"xiaoniangao-play-{int(time.time())}"
-
-                }
-                for k, v in video_dict.items():
-                    Common.logger(log_type, crawler).info(f"{k}:{v}")
-
-                cls.download_publish(log_type=log_type,
-                                     crawler=crawler,
-                                     video_dict=video_dict,
-                                     strategy=strategy,
-                                     oss_endpoint=oss_endpoint,
-                                     env=env)
-
-    @classmethod
-    def repeat_video(cls, log_type, crawler, video_id, env):
-        sql = f""" select * from crawler_video where platform="小年糕" and out_video_id="{video_id}"; """
-        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
-        return len(repeat_video)
-
-    @classmethod
-    def download_publish(cls, log_type, crawler, video_dict, strategy, oss_endpoint, env):
-        # 过滤无效视频
-        if video_dict["video_id"] == 0 \
-                or video_dict["video_url"] == 0\
-                or video_dict["cover_url"] == 0:
-            Common.logger(log_type, crawler).warning("无效视频\n")
-        # 抓取规则
-        elif cls.download_rule(video_dict) is False:
-            Common.logger(log_type, crawler).info("不满足抓取规则\n")
-        # 去重
-        elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
-            Common.logger(log_type, crawler).info("视频已下载\n")
-        elif any(str(word) if str(word) in video_dict['video_title'] else False for word in
-                 get_config_from_mysql(log_type=log_type,
-                                       source=crawler,
-                                       env=env,
-                                       text="filter",
-                                       action="")) is True:
-            Common.logger(log_type, crawler).info("视频已中过滤词\n")
-        else:
-            # 下载封面
-            Common.download_method(log_type=log_type, crawler=crawler, text="cover", title=video_dict["video_title"], url=video_dict["cover_url"])
-            # 下载视频
-            Common.download_method(log_type=log_type, crawler=crawler, text="video", title=video_dict["video_title"], url=video_dict["video_url"])
-            # 保存视频信息至 "./videos/{download_video_title}/info.txt"
-            Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
-
-            # 上传视频
-            Common.logger(log_type, crawler).info("开始上传视频...")
-            our_video_id = Publish.upload_and_publish(log_type=log_type,
-                                                      crawler=crawler,
-                                                      strategy=strategy,
-                                                      our_uid="play",
-                                                      env=env,
-                                                      oss_endpoint=oss_endpoint)
-            if env == "dev":
-                our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-            else:
-                our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-            Common.logger(log_type, crawler).info("视频上传完成")
-
-            if our_video_id is None:
-                # 删除视频文件夹
-                shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
-                return
-
-            # 视频信息保存数据库
-            rule_dict = {
-                "duration": {"min": 40},
-                "play_cnt": {"min": 80000},
-                "min_publish_day": {"min": 60}
-            }
-
-            insert_sql = f""" insert into crawler_video(video_id,
-                                                        out_user_id,
-                                                        platform,
-                                                        strategy,
-                                                        out_video_id,
-                                                        video_title,
-                                                        cover_url,
-                                                        video_url,
-                                                        duration,
-                                                        publish_time,
-                                                        play_cnt,
-                                                        crawler_rule,
-                                                        width,
-                                                        height)
-                                                        values({our_video_id},
-                                                        "{video_dict['profile_id']}",
-                                                        "{cls.platform}",
-                                                        "播放量榜爬虫策略",
-                                                        "{video_dict['video_id']}",
-                                                        "{video_dict['video_title']}",
-                                                        "{video_dict['cover_url']}",
-                                                        "{video_dict['video_url']}",
-                                                        {int(video_dict['duration'])},
-                                                        "{video_dict['publish_time_str']}",
-                                                        {int(video_dict['play_cnt'])},
-                                                        '{json.dumps(rule_dict)}',
-                                                        {int(video_dict['video_width'])},
-                                                        {int(video_dict['video_height'])}) """
-            Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
-            MysqlHelper.update_values(log_type, crawler, insert_sql, env)
-            Common.logger(log_type, crawler).info('视频信息插入数据库成功!')
-
-            # 视频写入飞书
-            Feishu.insert_columns(log_type, crawler, "c85k1C", "ROWS", 1, 2)
-            # 视频ID工作表,首行写入数据
-            upload_time = int(time.time())
-            values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
-                       "播放量榜爬虫策略",
-                       str(video_dict['video_id']),
-                       str(video_dict['video_title']),
-                       our_video_link,
-                       video_dict['play_cnt'],
-                       video_dict['comment_cnt'],
-                       video_dict['like_cnt'],
-                       video_dict['share_cnt'],
-                       video_dict['duration'],
-                       f"{video_dict['video_width']}*{video_dict['video_height']}",
-                       str(video_dict['publish_time_str']),
-                       str(video_dict['user_name']),
-                       str(video_dict['profile_id']),
-                       str(video_dict['profile_mid']),
-                       str(video_dict['avatar_url']),
-                       str(video_dict['cover_url']),
-                       str(video_dict['video_url'])]]
-            time.sleep(1)
-            Feishu.update_values(log_type, crawler, "c85k1C", "F2:Z2", values)
-            Common.logger(log_type, crawler).info('视频信息写入飞书成功\n')
-
-
-if __name__ == '__main__':
-    XiaoniangaoPlay.get_videoList("play", "xiaoniangao", "播放量榜爬虫策略", "out", "dev")
-
-    pass

+ 0 - 3
xigua/xigua_follow/__init__.py

@@ -1,3 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/2/17

+ 0 - 1039
xigua/xigua_follow/xigua_follow.py

@@ -1,1039 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/2/17
-import base64
-import json
-import os
-import random
-import shutil
-import string
-import sys
-import time
-from hashlib import md5
-
-import requests
-import urllib3
-from requests.adapters import HTTPAdapter
-
-# from selenium.webdriver import DesiredCapabilities
-# from selenium.webdriver.chrome.service import Service
-# from selenium.webdriver.common.by import By
-# from selenium import webdriver
-from lxml import etree
-
-sys.path.append(os.getcwd())
-from common.db import MysqlHelper
-from common.getuser import getUser
-from common.common import Common
-from common.feishu import Feishu
-from common.publish import Publish
-from common.public import get_user_from_mysql, random_title, get_config_from_mysql
-
-
-class Follow:
-    # 个人主页视频翻页参数
-    offset = 0
-
-    platform = "西瓜视频"
-    tag = "西瓜视频爬虫,定向爬虫策略"
-
-    @classmethod
-    def get_rule(cls, log_type, crawler):
-        try:
-            while True:
-                rule_sheet = Feishu.get_values_batch(log_type, crawler, "4kxd31")
-                if rule_sheet is None:
-                    Common.logger(log_type, crawler).warning("rule_sheet is None! 10秒后重新获取")
-                    time.sleep(10)
-                    continue
-                rule_dict = {
-                    "play_cnt": int(rule_sheet[1][2]),
-                    "comment_cnt": int(rule_sheet[2][2]),
-                    "like_cnt": int(rule_sheet[3][2]),
-                    "duration": int(rule_sheet[4][2]),
-                    "publish_time": int(rule_sheet[5][2]),
-                    "video_width": int(rule_sheet[6][2]),
-                    "video_height": int(rule_sheet[7][2]),
-                }
-                return rule_dict
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"get_rule:{e}\n")
-
-    # 下载规则
-    @classmethod
-    def download_rule(cls, video_info_dict, rule_dict):
-        if video_info_dict['play_cnt'] >= rule_dict['play_cnt']:
-            if video_info_dict['comment_cnt'] >= rule_dict['comment_cnt']:
-                if video_info_dict['like_cnt'] >= rule_dict['like_cnt']:
-                    if video_info_dict['duration'] >= rule_dict['duration']:
-                        if video_info_dict['video_width'] >= rule_dict['video_width'] \
-                                or video_info_dict['video_height'] >= rule_dict['video_height']:
-                            return True
-                        else:
-                            return False
-                    else:
-                        return False
-                else:
-                    return False
-            else:
-                return False
-        else:
-            return False
-
-    # 过滤词库
-    @classmethod
-    def filter_words(cls, log_type, crawler):
-        try:
-            while True:
-                filter_words_sheet = Feishu.get_values_batch(log_type, crawler, 'KGB4Hc')
-                if filter_words_sheet is None:
-                    Common.logger(log_type, crawler).warning(f"filter_words_sheet:{filter_words_sheet} 10秒钟后重试")
-                    continue
-                filter_words_list = []
-                for x in filter_words_sheet:
-                    for y in x:
-                        if y is None:
-                            pass
-                        else:
-                            filter_words_list.append(y)
-                return filter_words_list
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
-
-    @classmethod
-    def get_out_user_info(cls, log_type, crawler, out_uid):
-        try:
-            headers = {
-                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41',
-                'referer': f'https://www.ixigua.com/home/{out_uid}',
-                'Cookie': f'ixigua-a-s=1; support_webp=true; support_avif=false; csrf_session_id=a5355d954d3c63ed1ba35faada452b4d; __ac_signature={cls.random_signature()}; MONITOR_WEB_ID=67cb5099-a022-4ec3-bb8e-c4de6ba51dd0; s_v_web_id=verify_lef4i99x_32SosrdH_Qrtk_4LJn_8S7q_fhu16xe3s8ZV; tt_scid=QLJjPuHf6wxVqu6IIq6gHiJXQpVrCwrdhjH2zpm7-E3ZniE1RXBcP6M8b41FJOdo41e1; ttwid=1%7CHHtv2QqpSGuSu8r-zXF1QoWsvjmNi1SJrqOrZzg-UCY%7C1677047013%7C5866a444e5ae10a9df8c11551db75010fb77b657f214ccf84e503fae8d313d09; msToken=PerXJcDdIsZ6zXkGITsftXX4mDaVaW21GuqtzSVdctH46oXXT2GcELIs9f0XW2hunRzP6KVHLZaYElRvNYflLKUXih7lC27XKxs3HjdZiXPK9NQaoKbLfA==; ixigua-a-s=1', }
-            url = f"https://www.ixigua.com/home/{out_uid}"
-            urllib3.disable_warnings()
-            s = requests.session()
-            # max_retries=3 重试3次
-            s.mount('http://', HTTPAdapter(max_retries=3))
-            s.mount('https://', HTTPAdapter(max_retries=3))
-            response = s.get(url=url, headers=headers, proxies=Common.tunnel_proxies(), verify=False, timeout=5).text
-            html = etree.HTML(response)
-            out_follow_str = html.xpath('//div[@class="userDetailV3__header__detail2"]/*[1]/span')[0].text.encode(
-                'raw_unicode_escape').decode()
-            out_fans_str = html.xpath('//div[@class="userDetailV3__header__detail2"]/*[2]/span')[0].text.encode(
-                'raw_unicode_escape').decode()
-            out_like_str = html.xpath('//div[@class="userDetailV3__header__detail2"]/*[3]/span')[0].text.encode(
-                'raw_unicode_escape').decode()
-            out_avatar_url = f"""https:{html.xpath('//span[@class="component-avatar__inner"]//img/@src')[0]}"""
-            if "万" in out_follow_str:
-                out_follow = int(float(out_follow_str.split("万")[0]) * 10000)
-            else:
-                out_follow = int(out_follow_str.replace(",", ""))
-            if "万" in out_fans_str:
-                out_fans = int(float(out_fans_str.split("万")[0]) * 10000)
-            else:
-                out_fans = int(out_fans_str.replace(",", ""))
-            if "万" in out_like_str:
-                out_like = int(float(out_like_str.split("万")[0]) * 10000)
-            else:
-                out_like = int(out_like_str.replace(",", ""))
-            out_user_dict = {
-                "out_follow": out_follow,
-                "out_fans": out_fans,
-                "out_like": out_like,
-                "out_avatar_url": out_avatar_url,
-            }
-            # for k, v in out_user_dict.items():
-            #     print(f"{k}:{v}")
-            return out_user_dict
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"get_out_user_info:{e}\n")
-
-    # 获取用户信息(字典格式). 注意:部分 user_id 字符类型是 int / str
-    @classmethod
-    def get_user_list(cls, log_type, crawler, sheetid, env, machine):
-        try:
-            while True:
-                user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
-                if user_sheet is None:
-                    Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet} 10秒钟后重试")
-                    continue
-                our_user_list = []
-                for i in range(1, len(user_sheet)):
-                    # for i in range(428, len(user_sheet)):
-                    out_uid = user_sheet[i][2]
-                    user_name = user_sheet[i][3]
-                    our_uid = user_sheet[i][6]
-                    our_user_link = user_sheet[i][7]
-                    if out_uid is None or user_name is None:
-                        Common.logger(log_type, crawler).info("空行\n")
-                    else:
-                        Common.logger(log_type, crawler).info(f"正在更新 {user_name} 用户信息\n")
-                        if our_uid is None:
-                            try:
-                                out_user_info = cls.get_out_user_info(log_type, crawler, out_uid)
-                            except Exception as e:
-                                continue
-                            out_user_dict = {
-                                "out_uid": out_uid,
-                                "user_name": user_name,
-                                "out_avatar_url": out_user_info["out_avatar_url"],
-                                "out_create_time": '',
-                                "out_tag": '',
-                                "out_play_cnt": 0,
-                                "out_fans": out_user_info["out_fans"],
-                                "out_follow": out_user_info["out_follow"],
-                                "out_friend": 0,
-                                "out_like": out_user_info["out_like"],
-                                "platform": cls.platform,
-                                "tag": cls.tag,
-                            }
-                            our_user_dict = getUser.create_user(log_type=log_type, crawler=crawler,
-                                                                out_user_dict=out_user_dict, env=env, machine=machine)
-                            our_uid = our_user_dict['our_uid']
-                            our_user_link = our_user_dict['our_user_link']
-                            Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}',
-                                                 [[our_uid, our_user_link]])
-                            Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功!\n')
-                            our_user_list.append(our_user_dict)
-                        else:
-                            our_user_dict = {
-                                'out_uid': out_uid,
-                                'user_name': user_name,
-                                'our_uid': our_uid,
-                                'our_user_link': our_user_link,
-                            }
-                            our_user_list.append(our_user_dict)
-                return our_user_list
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f'get_user_id_from_feishu异常:{e}\n')
-
-    @classmethod
-    def random_signature(cls):
-        src_digits = string.digits  # string_数字
-        src_uppercase = string.ascii_uppercase  # string_大写字母
-        src_lowercase = string.ascii_lowercase  # string_小写字母
-        digits_num = random.randint(1, 6)
-        uppercase_num = random.randint(1, 26 - digits_num - 1)
-        lowercase_num = 26 - (digits_num + uppercase_num)
-        password = random.sample(src_digits, digits_num) + random.sample(src_uppercase, uppercase_num) + random.sample(
-            src_lowercase, lowercase_num)
-        random.shuffle(password)
-        new_password = 'AAAAAAAAAA' + ''.join(password)[10:-4] + 'AAAB'
-        new_password_start = new_password[0:18]
-        new_password_end = new_password[-7:]
-        if new_password[18] == '8':
-            new_password = new_password_start + 'w' + new_password_end
-        elif new_password[18] == '9':
-            new_password = new_password_start + 'x' + new_password_end
-        elif new_password[18] == '-':
-            new_password = new_password_start + 'y' + new_password_end
-        elif new_password[18] == '.':
-            new_password = new_password_start + 'z' + new_password_end
-        else:
-            new_password = new_password_start + 'y' + new_password_end
-        return new_password
-
-    # @classmethod
-    # def get_signature(cls, log_type, crawler, out_uid, machine):
-    #     try:
-    #         # 打印请求配置
-    #         ca = DesiredCapabilities.CHROME
-    #         ca["goog:loggingPrefs"] = {"performance": "ALL"}
-    #
-    #         # 不打开浏览器运行
-    #         chrome_options = webdriver.ChromeOptions()
-    #         chrome_options.add_argument("--headless")
-    #         chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
-    #         chrome_options.add_argument("--no-sandbox")
-    #
-    #         # driver初始化
-    #         if machine == 'aliyun' or machine == 'aliyun_hk':
-    #             driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
-    #         elif machine == 'macpro':
-    #             driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
-    #                                       service=Service('/Users/lieyunye/Downloads/chromedriver_v86/chromedriver'))
-    #         elif machine == 'macair':
-    #             driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
-    #                                       service=Service('/Users/piaoquan/Downloads/chromedriver'))
-    #         else:
-    #             driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service('/Users/wangkun/Downloads/chromedriver/chromedriver_v110/chromedriver'))
-    #         driver.implicitly_wait(10)
-    #         driver.get(f'https://www.ixigua.com/home/{out_uid}/')
-    #         time.sleep(3)
-    #         data_src = driver.find_elements(By.XPATH, '//img[@class="tt-img BU-MagicImage tt-img-loaded"]')[1].get_attribute("data-src")
-    #         signature = data_src.split("x-signature=")[-1]
-    #         return signature
-    #     except Exception as e:
-    #         Common.logger(log_type, crawler).error(f'get_signature异常:{e}\n')
-
-    # 获取视频详情
-    @classmethod
-    def get_video_url(cls, log_type, crawler, gid):
-        try:
-            url = 'https://www.ixigua.com/api/mixVideo/information?'
-            headers = {
-                "accept-encoding": "gzip, deflate",
-                "accept-language": "zh-CN,zh-Hans;q=0.9",
-                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
-                              "AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15",
-                "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
-            }
-            params = {
-                'mixId': gid,
-                'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC'
-                           'NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
-                'X-Bogus': 'DFSzswVupYTANCJOSBk0P53WxM-r',
-                '_signature': '_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px'
-                              'fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94',
-            }
-            cookies = {
-                'ixigua-a-s': '1',
-                'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB'
-                           'NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
-                'ttwid': '1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7'
-                         '6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8',
-                'tt_scid': 'QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3',
-                'MONITOR_WEB_ID': '0a49204a-7af5-4e96-95f0-f4bafb7450ad',
-                '__ac_nonce': '06304878000964fdad287',
-                '__ac_signature': '_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb'
-                                  'FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8',
-                'ttcid': 'e56fabf6e85d4adf9e4d91902496a0e882',
-                '_tea_utm_cache_1300': 'undefined',
-                'support_avif': 'false',
-                'support_webp': 'false',
-                'xiguavideopcwebid': '7134967546256016900',
-                'xiguavideopcwebid.sig': 'xxRww5R1VEMJN_dQepHorEu_eAc',
-            }
-            urllib3.disable_warnings()
-            s = requests.session()
-            # max_retries=3 重试3次
-            s.mount('http://', HTTPAdapter(max_retries=3))
-            s.mount('https://', HTTPAdapter(max_retries=3))
-            response = s.get(url=url, headers=headers, params=params, cookies=cookies, verify=False,
-                             proxies=Common.tunnel_proxies(), timeout=5)
-            response.close()
-            if 'data' not in response.json() or response.json()['data'] == '':
-                Common.logger(log_type, crawler).warning('get_video_info: response: {}', response)
-            else:
-                video_info = response.json()['data']['gidInformation']['packerData']['video']
-                video_url_dict = {}
-                # video_url
-                if 'videoResource' not in video_info:
-                    video_url_dict["video_url"] = ''
-                    video_url_dict["audio_url"] = ''
-                    video_url_dict["video_width"] = 0
-                    video_url_dict["video_height"] = 0
-
-                elif 'dash_120fps' in video_info['videoResource']:
-                    if "video_list" in video_info['videoResource']['dash_120fps'] and 'video_4' in \
-                            video_info['videoResource']['dash_120fps']['video_list']:
-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_4']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_4']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vwidth']
-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_3' in \
-                            video_info['videoResource']['dash_120fps']['video_list']:
-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_3']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_3']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vwidth']
-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_2' in \
-                            video_info['videoResource']['dash_120fps']['video_list']:
-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_2']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_2']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vwidth']
-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_1' in \
-                            video_info['videoResource']['dash_120fps']['video_list']:
-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_1']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_1']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vwidth']
-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-
-                    elif 'dynamic_video' in video_info['videoResource']['dash_120fps'] \
-                            and 'dynamic_video_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
-                            and 'dynamic_audio_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
-                            and len(
-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list']) != 0 \
-                            and len(
-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list']) != 0:
-
-                        video_url = \
-                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
-                                'backup_url_1']
-                        audio_url = \
-                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list'][-1][
-                                'backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = \
-                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
-                                'vwidth']
-                        video_height = \
-                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
-                                'vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    else:
-                        video_url_dict["video_url"] = ''
-                        video_url_dict["audio_url"] = ''
-                        video_url_dict["video_width"] = 0
-                        video_url_dict["video_height"] = 0
-
-                elif 'dash' in video_info['videoResource']:
-                    if "video_list" in video_info['videoResource']['dash'] and 'video_4' in \
-                            video_info['videoResource']['dash']['video_list']:
-                        video_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash']['video_list']['video_4']['vwidth']
-                        video_height = video_info['videoResource']['dash']['video_list']['video_4']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['dash'] and 'video_3' in \
-                            video_info['videoResource']['dash']['video_list']:
-                        video_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash']['video_list']['video_3']['vwidth']
-                        video_height = video_info['videoResource']['dash']['video_list']['video_3']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['dash'] and 'video_2' in \
-                            video_info['videoResource']['dash']['video_list']:
-                        video_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash']['video_list']['video_2']['vwidth']
-                        video_height = video_info['videoResource']['dash']['video_list']['video_2']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['dash'] and 'video_1' in \
-                            video_info['videoResource']['dash']['video_list']:
-                        video_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash']['video_list']['video_1']['vwidth']
-                        video_height = video_info['videoResource']['dash']['video_list']['video_1']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-
-                    elif 'dynamic_video' in video_info['videoResource']['dash'] \
-                            and 'dynamic_video_list' in video_info['videoResource']['dash']['dynamic_video'] \
-                            and 'dynamic_audio_list' in video_info['videoResource']['dash']['dynamic_video'] \
-                            and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list']) != 0 \
-                            and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list']) != 0:
-
-                        video_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
-                            'backup_url_1']
-                        audio_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list'][-1][
-                            'backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
-                            'vwidth']
-                        video_height = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
-                            'vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    else:
-                        video_url_dict["video_url"] = ''
-                        video_url_dict["audio_url"] = ''
-                        video_url_dict["video_width"] = 0
-                        video_url_dict["video_height"] = 0
-
-                elif 'normal' in video_info['videoResource']:
-                    if "video_list" in video_info['videoResource']['normal'] and 'video_4' in \
-                            video_info['videoResource']['normal']['video_list']:
-                        video_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
-                        audio_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['normal']['video_list']['video_4']['vwidth']
-                        video_height = video_info['videoResource']['normal']['video_list']['video_4']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['normal'] and 'video_3' in \
-                            video_info['videoResource']['normal']['video_list']:
-                        video_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
-                        audio_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['normal']['video_list']['video_3']['vwidth']
-                        video_height = video_info['videoResource']['normal']['video_list']['video_3']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['normal'] and 'video_2' in \
-                            video_info['videoResource']['normal']['video_list']:
-                        video_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
-                        audio_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['normal']['video_list']['video_2']['vwidth']
-                        video_height = video_info['videoResource']['normal']['video_list']['video_2']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['normal'] and 'video_1' in \
-                            video_info['videoResource']['normal']['video_list']:
-                        video_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
-                        audio_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['normal']['video_list']['video_1']['vwidth']
-                        video_height = video_info['videoResource']['normal']['video_list']['video_1']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-
-                    elif 'dynamic_video' in video_info['videoResource']['normal'] \
-                            and 'dynamic_video_list' in video_info['videoResource']['normal']['dynamic_video'] \
-                            and 'dynamic_audio_list' in video_info['videoResource']['normal']['dynamic_video'] \
-                            and len(video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list']) != 0 \
-                            and len(video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list']) != 0:
-
-                        video_url = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
-                            'backup_url_1']
-                        audio_url = video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list'][-1][
-                            'backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
-                            'vwidth']
-                        video_height = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
-                            'vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    else:
-                        video_url_dict["video_url"] = ''
-                        video_url_dict["audio_url"] = ''
-                        video_url_dict["video_width"] = 0
-                        video_url_dict["video_height"] = 0
-
-                else:
-                    video_url_dict["video_url"] = ''
-                    video_url_dict["audio_url"] = ''
-                    video_url_dict["video_width"] = 0
-                    video_url_dict["video_height"] = 0
-
-                return video_url_dict
-
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f'get_video_url:{e}\n')
-
-    @classmethod
-    def get_videolist(cls, log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine):
-        try:
-            signature = cls.random_signature()
-            while True:
-                url = "https://www.ixigua.com/api/videov2/author/new_video_list?"
-                params = {
-                    'to_user_id': str(out_uid),
-                    'offset': str(cls.offset),
-                    'limit': '30',
-                    'maxBehotTime': '0',
-                    'order': 'new',
-                    'isHome': '0',
-                    # 'msToken': 'G0eRzNkw189a8TLaXjc6nTHVMQwh9XcxVAqTbGKi7iPJdQcLwS3-XRrJ3MZ7QBfqErpxp3EX1WtvWOIcZ3NIgr41hgcd-v64so_RRj3YCRw1UsKW8mIssNLlIMspsg==',
-                    # 'X-Bogus': 'DFSzswVuEkUANjW9ShFTgR/F6qHt',
-                    '_signature': signature,
-                }
-                headers = {
-                    'referer': f'https://www.ixigua.com/home/{out_uid}/video/?preActiveKey=hotsoon&list_entrance=userdetail',
-                    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41',
-                }
-                urllib3.disable_warnings()
-                s = requests.session()
-                # max_retries=3 重试3次
-                s.mount('http://', HTTPAdapter(max_retries=3))
-                s.mount('https://', HTTPAdapter(max_retries=3))
-                response = s.get(url=url, headers=headers, params=params, proxies=Common.tunnel_proxies(), verify=False,
-                                 timeout=5)
-                response.close()
-                cls.offset += 30
-                if response.status_code != 200:
-                    Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
-                    cls.offset = 0
-                    return
-                elif 'data' not in response.text:
-                    Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
-                    cls.offset = 0
-                    return
-                elif not response.json()["data"]['videoList']:
-                    Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.json()}\n")
-                    cls.offset = 0
-                    return
-                else:
-                    videoList = response.json()['data']['videoList']
-                    for i in range(len(videoList)):
-                        # video_title
-                        if 'title' not in videoList[i]:
-                            video_title = 0
-                        else:
-                            video_title = videoList[i]['title'].strip().replace('手游', '') \
-                                .replace('/', '').replace('\/', '').replace('\n', '').replace('"', '').replace("'", '')
-
-                        # video_id
-                        if 'video_id' not in videoList[i]:
-                            video_id = 0
-                        else:
-                            video_id = videoList[i]['video_id']
-
-                        # gid
-                        if 'gid' not in videoList[i]:
-                            gid = 0
-                        else:
-                            gid = videoList[i]['gid']
-
-                        # play_cnt
-                        if 'video_detail_info' not in videoList[i]:
-                            play_cnt = 0
-                        elif 'video_watch_count' not in videoList[i]['video_detail_info']:
-                            play_cnt = 0
-                        else:
-                            play_cnt = videoList[i]['video_detail_info']['video_watch_count']
-
-                        # comment_cnt
-                        if 'comment_count' not in videoList[i]:
-                            comment_cnt = 0
-                        else:
-                            comment_cnt = videoList[i]['comment_count']
-
-                        # like_cnt
-                        if 'digg_count' not in videoList[i]:
-                            like_cnt = 0
-                        else:
-                            like_cnt = videoList[i]['digg_count']
-
-                        # share_cnt
-                        share_cnt = 0
-
-                        # video_duration
-                        if 'video_duration' not in videoList[i]:
-                            video_duration = 0
-                        else:
-                            video_duration = int(videoList[i]['video_duration'])
-
-                        # send_time
-                        if 'publish_time' not in videoList[i]:
-                            publish_time = 0
-                        else:
-                            publish_time = videoList[i]['publish_time']
-
-                        publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time))
-
-                        # is_top
-                        if 'is_top' not in videoList[i]:
-                            is_top = 0
-                        else:
-                            is_top = videoList[i]['is_top']
-
-                        # user_name
-                        if 'user_info' not in videoList[i]:
-                            user_name = 0
-                        elif 'name' not in videoList[i]['user_info']:
-                            user_name = 0
-                        else:
-                            user_name = videoList[i]['user_info']['name']
-
-                        # user_id
-                        if 'user_info' not in videoList[i]:
-                            user_id = 0
-                        elif 'user_id' not in videoList[i]['user_info']:
-                            user_id = 0
-                        else:
-                            user_id = videoList[i]['user_info']['user_id']
-
-                        # avatar_url
-                        if 'user_info' not in videoList[i]:
-                            avatar_url = 0
-                        elif 'avatar_url' not in videoList[i]['user_info']:
-                            avatar_url = 0
-                        else:
-                            avatar_url = videoList[i]['user_info']['avatar_url']
-
-                        # cover_url
-                        if 'video_detail_info' not in videoList[i]:
-                            cover_url = 0
-                        elif 'detail_video_large_image' not in videoList[i]['video_detail_info']:
-                            cover_url = 0
-                        elif 'url' in videoList[i]['video_detail_info']['detail_video_large_image']:
-                            cover_url = videoList[i]['video_detail_info']['detail_video_large_image']['url']
-                        else:
-                            cover_url = videoList[i]['video_detail_info']['detail_video_large_image']['url_list'][0][
-                                'url']
-
-                        Common.logger(log_type, crawler).info(f'---开始读取规则---')
-                        rule_dict = cls.get_rule(log_type, crawler)
-                        Common.logger(log_type, crawler).info(f'---读取规则完成---')
-
-                        if gid == 0 or video_id == 0 or cover_url == 0:
-                            Common.logger(log_type, crawler).info('无效视频\n')
-                        elif is_top is True and int(time.time()) - int(publish_time) > 3600 * 24 * rule_dict[
-                            'publish_time']:
-                            Common.logger(log_type, crawler).info(
-                                f'置顶视频,且发布时间:{publish_time_str} 超过{rule_dict["publish_time"]}天\n')
-                        elif int(time.time()) - int(publish_time) > 3600 * 24 * rule_dict['publish_time']:
-                            Common.logger(log_type, crawler).info(
-                                f'发布时间:{publish_time_str}超过{rule_dict["publish_time"]}天\n')
-                            cls.offset = 0
-                            return
-                        else:
-                            video_url_dict = cls.get_video_url(log_type, crawler, gid)
-                            video_url = video_url_dict["video_url"]
-                            audio_url = video_url_dict["audio_url"]
-                            video_width = video_url_dict["video_width"]
-                            video_height = video_url_dict["video_height"]
-
-                            video_dict = {'video_title': video_title,
-                                          'video_id': video_id,
-                                          'gid': gid,
-                                          'play_cnt': play_cnt,
-                                          'comment_cnt': comment_cnt,
-                                          'like_cnt': like_cnt,
-                                          'share_cnt': share_cnt,
-                                          'video_width': video_width,
-                                          'video_height': video_height,
-                                          'duration': video_duration,
-                                          'publish_time_stamp': publish_time,
-                                          'publish_time_str': publish_time_str,
-                                          'is_top': is_top,
-                                          'user_name': user_name,
-                                          'user_id': user_id,
-                                          'avatar_url': avatar_url,
-                                          'cover_url': cover_url,
-                                          'audio_url': audio_url,
-                                          'video_url': video_url,
-                                          'session': signature}
-                            for k, v in video_dict.items():
-                                Common.logger(log_type, crawler).info(f"{k}:{v}")
-                            cls.download_publish(log_type=log_type,
-                                                 crawler=crawler,
-                                                 video_dict=video_dict,
-                                                 rule_dict=rule_dict,
-                                                 strategy=strategy,
-                                                 our_uid=our_uid,
-                                                 oss_endpoint=oss_endpoint,
-                                                 env=env,
-                                                 machine=machine)
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"get_videolist:{e}\n")
-
-    @classmethod
-    def repeat_video(cls, log_type, crawler, video_id, env, machine):
-        sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{video_id}"; """
-        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env, machine)
-        return len(repeat_video)
-
-    # 下载 / 上传
-    @classmethod
-    def download_publish(cls, log_type, crawler, strategy, video_dict, rule_dict, our_uid, oss_endpoint, env, machine):
-        filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
-        for filter_word in filter_words:
-            if filter_word in video_dict['video_title']:
-                Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
-                return
-        if cls.download_rule(video_dict, rule_dict) is False:
-            Common.logger(log_type, crawler).info('不满足抓取规则\n')
-        elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
-            Common.logger(log_type, crawler).info('视频已下载\n')
-        else:
-            # 下载视频
-            Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video',
-                                   title=video_dict['video_title'], url=video_dict['video_url'])
-            # 下载音频
-            Common.download_method(log_type=log_type, crawler=crawler, text='xigua_audio',
-                                   title=video_dict['video_title'], url=video_dict['audio_url'])
-            # 合成音视频
-            Common.video_compose(log_type=log_type, crawler=crawler,
-                                 video_dir=f"./{crawler}/videos/{video_dict['video_title']}")
-            md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
-            try:
-                if os.path.getsize(f"./{crawler}/videos/{md_title}/video.mp4") == 0:
-                    # 删除视频文件夹
-                    shutil.rmtree(f"./{crawler}/videos/{md_title}")
-                    Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
-                    return
-            except FileNotFoundError:
-                # 删除视频文件夹
-                shutil.rmtree(f"./{crawler}/videos/{md_title}")
-                Common.logger(log_type, crawler).info("视频文件不存在,删除文件夹成功\n")
-                return
-            # 下载封面
-            Common.download_method(log_type=log_type, crawler=crawler, text='cover',
-                                   title=video_dict['video_title'], url=video_dict['cover_url'])
-            # 保存视频信息至txt
-            Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
-
-            # 上传视频
-            Common.logger(log_type, crawler).info("开始上传视频...")
-            our_video_id = Publish.upload_and_publish(log_type=log_type,
-                                                      crawler=crawler,
-                                                      strategy=strategy,
-                                                      our_uid=our_uid,
-                                                      env=env,
-                                                      oss_endpoint=oss_endpoint)
-            if env == 'dev':
-                our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-            else:
-                our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-            Common.logger(log_type, crawler).info("视频上传完成")
-
-            if our_video_id is None:
-                # 删除视频文件夹
-                shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
-                return
-
-            # 视频写入飞书
-            Feishu.insert_columns(log_type, 'xigua', "e075e9", "ROWS", 1, 2)
-            upload_time = int(time.time())
-            values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
-                       "定向榜",
-                       video_dict['video_title'],
-                       str(video_dict['video_id']),
-                       our_video_link,
-                       video_dict['gid'],
-                       video_dict['play_cnt'],
-                       video_dict['comment_cnt'],
-                       video_dict['like_cnt'],
-                       video_dict['share_cnt'],
-                       video_dict['duration'],
-                       str(video_dict['video_width']) + '*' + str(video_dict['video_height']),
-                       video_dict['publish_time_str'],
-                       video_dict['user_name'],
-                       video_dict['user_id'],
-                       video_dict['avatar_url'],
-                       video_dict['cover_url'],
-                       video_dict['video_url'],
-                       video_dict['audio_url']]]
-            time.sleep(1)
-            Feishu.update_values(log_type, 'xigua', "e075e9", "F2:Z2", values)
-            Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
-
-            # 视频信息保存数据库
-            insert_sql = f""" insert into crawler_video(video_id,
-                            user_id,
-                            out_user_id,
-                            platform,
-                            strategy,
-                            out_video_id,
-                            video_title,
-                            cover_url,
-                            video_url,
-                            duration,
-                            publish_time,
-                            play_cnt,
-                            crawler_rule,
-                            width,
-                            height)
-                            values({our_video_id},
-                            {our_uid},
-                            "{video_dict['user_id']}",
-                            "{cls.platform}",
-                            "定向爬虫策略",
-                            "{video_dict['video_id']}",
-                            "{video_dict['video_title']}",
-                            "{video_dict['cover_url']}",
-                            "{video_dict['video_url']}",
-                            {int(video_dict['duration'])},
-                            "{video_dict['publish_time_str']}",
-                            {int(video_dict['play_cnt'])},
-                            '{json.dumps(rule_dict)}',
-                            {int(video_dict['video_width'])},
-                            {int(video_dict['video_height'])}) """
-            Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
-            MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine)
-            Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
-
-    @classmethod
-    def get_follow_videos(cls, log_type, crawler, strategy, oss_endpoint, env, machine):
-        user_list = get_user_from_mysql(log_type, crawler, crawler, env)
-        for user in user_list:
-            try:
-                spider_link = user["link"]
-                out_uid = spider_link.split('/')[-1]
-                user_name = user["nick_name"]
-                our_uid = user["uid"]
-
-                Common.logger(log_type, crawler).info(f"开始抓取 {user_name} 用户主页视频\n")
-                cls.get_videolist(log_type=log_type,
-                                  crawler=crawler,
-                                  strategy=strategy,
-                                  our_uid=our_uid,
-                                  out_uid=out_uid,
-                                  oss_endpoint=oss_endpoint,
-                                  env=env,
-                                  machine=machine)
-                cls.offset = 0
-            except Exception as e:
-                continue
-
-
-if __name__ == '__main__':
-    Follow.get_follow_videos('follow', 'xigua', '定向抓取策略', 'inner', 'prod', 'aliyun')

+ 0 - 895
xigua/xigua_follow/xigua_follow_scheduling.py

@@ -1,895 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/2/17
-import base64
-import json
-import os
-import random
-import shutil
-import string
-import sys
-import time
-from datetime import date, timedelta
-from hashlib import md5
-
-import requests
-import urllib3
-from requests.adapters import HTTPAdapter
-
-sys.path.append(os.getcwd())
-from common.scheduling_db import MysqlHelper
-from common.common import Common
-from common.feishu import Feishu
-from common.publish import Publish
-from common.public import get_user_from_mysql, get_config_from_mysql, download_rule
-
-
-class ScheduleXiguaFollow:
-    # 个人主页视频翻页参数
-    offset = 0
-    platform = "西瓜视频"
-
-    @classmethod
-    def download_rule(cls, video_info_dict, rule_dict):
-        if video_info_dict['play_cnt'] >= rule_dict['play_cnt']['min']:
-            if video_info_dict['comment_cnt'] >= rule_dict['comment_cnt']['min']:
-                if video_info_dict['like_cnt'] >= rule_dict['like_cnt']['min']:
-                    if video_info_dict['duration'] >= rule_dict['duration']['min']:
-                        if video_info_dict['video_width'] >= rule_dict['width']['min'] \
-                                or video_info_dict['video_height'] >= rule_dict['height']['min']:
-                            return True
-                        else:
-                            return False
-                    else:
-                        return False
-                else:
-                    return False
-            else:
-                return False
-        else:
-            return False
-
-    @classmethod
-    def get_users(cls, log_type, crawler, task, env):
-        link_list = task['spider_link']
-        user_list = []
-        for link in link_list:
-            out_uid = int(link.split("https://www.ixigua.com/home/")[-1].replace("/", "").strip())
-            sql = f""" select * from crawler_author_map where spider_link="{link}" """
-            our_user_info = MysqlHelper.get_values(log_type=log_type, crawler=crawler, sql=sql, env=env)
-            if len(our_user_info) == 0:
-                our_uid = 0
-                Common.logger(log_type, crawler).info(f"没有站内虚拟账号: {link}\n")
-            else:
-                # print(type(our_user_info[0]))
-                # print(our_user_info[0])
-                our_uid = our_user_info[0]["media_id"]
-            user_dict = {
-                "out_uid": out_uid,
-                "our_uid": our_uid
-            }
-            user_list.append(user_dict)
-        Common.logger(log_type, crawler).info(f"user_list:{user_list}")
-        return user_list
-
-    # 过滤词库
-    @classmethod
-    def filter_words(cls, log_type, crawler):
-        try:
-            while True:
-                filter_words_sheet = Feishu.get_values_batch(log_type, crawler, 'KGB4Hc')
-                if filter_words_sheet is None:
-                    Common.logger(log_type, crawler).warning(f"filter_words_sheet:{filter_words_sheet} 10秒钟后重试")
-                    continue
-                filter_words_list = []
-                for x in filter_words_sheet:
-                    for y in x:
-                        if y is None:
-                            pass
-                        else:
-                            filter_words_list.append(y)
-                return filter_words_list
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
-
-    @classmethod
-    def random_signature(cls):
-        src_digits = string.digits  # string_数字
-        src_uppercase = string.ascii_uppercase  # string_大写字母
-        src_lowercase = string.ascii_lowercase  # string_小写字母
-        digits_num = random.randint(1, 6)
-        uppercase_num = random.randint(1, 26 - digits_num - 1)
-        lowercase_num = 26 - (digits_num + uppercase_num)
-        password = random.sample(src_digits, digits_num) + random.sample(src_uppercase, uppercase_num) + random.sample(
-            src_lowercase, lowercase_num)
-        random.shuffle(password)
-        new_password = 'AAAAAAAAAA' + ''.join(password)[10:-4] + 'AAAB'
-        new_password_start = new_password[0:18]
-        new_password_end = new_password[-7:]
-        if new_password[18] == '8':
-            new_password = new_password_start + 'w' + new_password_end
-        elif new_password[18] == '9':
-            new_password = new_password_start + 'x' + new_password_end
-        elif new_password[18] == '-':
-            new_password = new_password_start + 'y' + new_password_end
-        elif new_password[18] == '.':
-            new_password = new_password_start + 'z' + new_password_end
-        else:
-            new_password = new_password_start + 'y' + new_password_end
-        return new_password
-
-    # 获取视频详情
-    @classmethod
-    def get_video_url(cls, log_type, crawler, gid):
-        try:
-            url = 'https://www.ixigua.com/api/mixVideo/information?'
-            headers = {
-                "accept-encoding": "gzip, deflate",
-                "accept-language": "zh-CN,zh-Hans;q=0.9",
-                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
-                              "AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15",
-                "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
-            }
-            params = {
-                'mixId': gid,
-                'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC'
-                           'NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
-                'X-Bogus': 'DFSzswVupYTANCJOSBk0P53WxM-r',
-                '_signature': '_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px'
-                              'fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94',
-            }
-            cookies = {
-                'ixigua-a-s': '1',
-                'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB'
-                           'NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
-                'ttwid': '1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7'
-                         '6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8',
-                'tt_scid': 'QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3',
-                'MONITOR_WEB_ID': '0a49204a-7af5-4e96-95f0-f4bafb7450ad',
-                '__ac_nonce': '06304878000964fdad287',
-                '__ac_signature': '_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb'
-                                  'FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8',
-                'ttcid': 'e56fabf6e85d4adf9e4d91902496a0e882',
-                '_tea_utm_cache_1300': 'undefined',
-                'support_avif': 'false',
-                'support_webp': 'false',
-                'xiguavideopcwebid': '7134967546256016900',
-                'xiguavideopcwebid.sig': 'xxRww5R1VEMJN_dQepHorEu_eAc',
-            }
-            urllib3.disable_warnings()
-            s = requests.session()
-            # max_retries=3 重试3次
-            s.mount('http://', HTTPAdapter(max_retries=3))
-            s.mount('https://', HTTPAdapter(max_retries=3))
-            response = s.get(url=url, headers=headers, params=params, cookies=cookies, verify=False,
-                             proxies=Common.tunnel_proxies(), timeout=5)
-            response.close()
-            if 'data' not in response.json() or response.json()['data'] == '':
-                Common.logger(log_type, crawler).warning('get_video_info: response: {}', response)
-            else:
-                video_info = response.json()['data']['gidInformation']['packerData']['video']
-                video_url_dict = {}
-                # video_url
-                if 'videoResource' not in video_info:
-                    video_url_dict["video_url"] = ''
-                    video_url_dict["audio_url"] = ''
-                    video_url_dict["video_width"] = 0
-                    video_url_dict["video_height"] = 0
-
-                elif 'dash_120fps' in video_info['videoResource']:
-                    if "video_list" in video_info['videoResource']['dash_120fps'] and 'video_4' in \
-                            video_info['videoResource']['dash_120fps']['video_list']:
-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_4']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_4']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vwidth']
-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_3' in \
-                            video_info['videoResource']['dash_120fps']['video_list']:
-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_3']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_3']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vwidth']
-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_2' in \
-                            video_info['videoResource']['dash_120fps']['video_list']:
-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_2']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_2']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vwidth']
-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_1' in \
-                            video_info['videoResource']['dash_120fps']['video_list']:
-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_1']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_1']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vwidth']
-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-
-                    elif 'dynamic_video' in video_info['videoResource']['dash_120fps'] \
-                            and 'dynamic_video_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
-                            and 'dynamic_audio_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
-                            and len(
-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list']) != 0 \
-                            and len(
-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list']) != 0:
-
-                        video_url = \
-                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
-                                'backup_url_1']
-                        audio_url = \
-                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list'][-1][
-                                'backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = \
-                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
-                                'vwidth']
-                        video_height = \
-                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
-                                'vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    else:
-                        video_url_dict["video_url"] = ''
-                        video_url_dict["audio_url"] = ''
-                        video_url_dict["video_width"] = 0
-                        video_url_dict["video_height"] = 0
-
-                elif 'dash' in video_info['videoResource']:
-                    if "video_list" in video_info['videoResource']['dash'] and 'video_4' in \
-                            video_info['videoResource']['dash']['video_list']:
-                        video_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash']['video_list']['video_4']['vwidth']
-                        video_height = video_info['videoResource']['dash']['video_list']['video_4']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['dash'] and 'video_3' in \
-                            video_info['videoResource']['dash']['video_list']:
-                        video_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash']['video_list']['video_3']['vwidth']
-                        video_height = video_info['videoResource']['dash']['video_list']['video_3']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['dash'] and 'video_2' in \
-                            video_info['videoResource']['dash']['video_list']:
-                        video_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash']['video_list']['video_2']['vwidth']
-                        video_height = video_info['videoResource']['dash']['video_list']['video_2']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['dash'] and 'video_1' in \
-                            video_info['videoResource']['dash']['video_list']:
-                        video_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash']['video_list']['video_1']['vwidth']
-                        video_height = video_info['videoResource']['dash']['video_list']['video_1']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-
-                    elif 'dynamic_video' in video_info['videoResource']['dash'] \
-                            and 'dynamic_video_list' in video_info['videoResource']['dash']['dynamic_video'] \
-                            and 'dynamic_audio_list' in video_info['videoResource']['dash']['dynamic_video'] \
-                            and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list']) != 0 \
-                            and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list']) != 0:
-
-                        video_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
-                            'backup_url_1']
-                        audio_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list'][-1][
-                            'backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
-                            'vwidth']
-                        video_height = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
-                            'vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    else:
-                        video_url_dict["video_url"] = ''
-                        video_url_dict["audio_url"] = ''
-                        video_url_dict["video_width"] = 0
-                        video_url_dict["video_height"] = 0
-
-                elif 'normal' in video_info['videoResource']:
-                    if "video_list" in video_info['videoResource']['normal'] and 'video_4' in \
-                            video_info['videoResource']['normal']['video_list']:
-                        video_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
-                        audio_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['normal']['video_list']['video_4']['vwidth']
-                        video_height = video_info['videoResource']['normal']['video_list']['video_4']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['normal'] and 'video_3' in \
-                            video_info['videoResource']['normal']['video_list']:
-                        video_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
-                        audio_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['normal']['video_list']['video_3']['vwidth']
-                        video_height = video_info['videoResource']['normal']['video_list']['video_3']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['normal'] and 'video_2' in \
-                            video_info['videoResource']['normal']['video_list']:
-                        video_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
-                        audio_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['normal']['video_list']['video_2']['vwidth']
-                        video_height = video_info['videoResource']['normal']['video_list']['video_2']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['normal'] and 'video_1' in \
-                            video_info['videoResource']['normal']['video_list']:
-                        video_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
-                        audio_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['normal']['video_list']['video_1']['vwidth']
-                        video_height = video_info['videoResource']['normal']['video_list']['video_1']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-
-                    elif 'dynamic_video' in video_info['videoResource']['normal'] \
-                            and 'dynamic_video_list' in video_info['videoResource']['normal']['dynamic_video'] \
-                            and 'dynamic_audio_list' in video_info['videoResource']['normal']['dynamic_video'] \
-                            and len(video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list']) != 0 \
-                            and len(video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list']) != 0:
-
-                        video_url = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
-                            'backup_url_1']
-                        audio_url = video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list'][-1][
-                            'backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
-                            'vwidth']
-                        video_height = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
-                            'vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    else:
-                        video_url_dict["video_url"] = ''
-                        video_url_dict["audio_url"] = ''
-                        video_url_dict["video_width"] = 0
-                        video_url_dict["video_height"] = 0
-
-                else:
-                    video_url_dict["video_url"] = ''
-                    video_url_dict["audio_url"] = ''
-                    video_url_dict["video_width"] = 0
-                    video_url_dict["video_height"] = 0
-
-                return video_url_dict
-
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f'get_video_url:{e}\n')
-
-    @classmethod
-    def get_videolist(cls, log_type, crawler, strategy, task, our_uid, out_uid, oss_endpoint, env):
-        try:
-            signature = cls.random_signature()
-            while True:
-                url = "https://www.ixigua.com/api/videov2/author/new_video_list?"
-                params = {
-                    'to_user_id': str(out_uid),
-                    'offset': str(cls.offset),
-                    'limit': '30',
-                    'maxBehotTime': '0',
-                    'order': 'new',
-                    'isHome': '0',
-                    # 'msToken': 'G0eRzNkw189a8TLaXjc6nTHVMQwh9XcxVAqTbGKi7iPJdQcLwS3-XRrJ3MZ7QBfqErpxp3EX1WtvWOIcZ3NIgr41hgcd-v64so_RRj3YCRw1UsKW8mIssNLlIMspsg==',
-                    # 'X-Bogus': 'DFSzswVuEkUANjW9ShFTgR/F6qHt',
-                    '_signature': signature,
-                }
-                headers = {
-                    'referer': f'https://www.ixigua.com/home/{out_uid}/video/?preActiveKey=hotsoon&list_entrance=userdetail',
-                    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41',
-                }
-                urllib3.disable_warnings()
-                s = requests.session()
-                # max_retries=3 重试3次
-                s.mount('http://', HTTPAdapter(max_retries=3))
-                s.mount('https://', HTTPAdapter(max_retries=3))
-                response = s.get(url=url, headers=headers, params=params, proxies=Common.tunnel_proxies(), verify=False,
-                                 timeout=5)
-                response.close()
-                cls.offset += 30
-                if response.status_code != 200:
-                    Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
-                    cls.offset = 0
-                    return
-                elif 'data' not in response.text:
-                    Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
-                    cls.offset = 0
-                    return
-                elif not response.json()["data"]['videoList']:
-                    Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.json()}\n")
-                    cls.offset = 0
-                    return
-                else:
-                    videoList = response.json()['data']['videoList']
-                    for i in range(len(videoList)):
-                        # video_title
-                        if 'title' not in videoList[i]:
-                            video_title = 0
-                        else:
-                            video_title = videoList[i]['title'].strip().replace('手游', '') \
-                                .replace('/', '').replace('\/', '').replace('\n', '').replace('"', '').replace("'", '')
-
-                        # video_id
-                        if 'video_id' not in videoList[i]:
-                            video_id = 0
-                        else:
-                            video_id = videoList[i]['video_id']
-
-                        # gid
-                        if 'gid' not in videoList[i]:
-                            gid = 0
-                        else:
-                            gid = videoList[i]['gid']
-
-                        # play_cnt
-                        if 'video_detail_info' not in videoList[i]:
-                            play_cnt = 0
-                        elif 'video_watch_count' not in videoList[i]['video_detail_info']:
-                            play_cnt = 0
-                        else:
-                            play_cnt = videoList[i]['video_detail_info']['video_watch_count']
-
-                        # comment_cnt
-                        if 'comment_count' not in videoList[i]:
-                            comment_cnt = 0
-                        else:
-                            comment_cnt = videoList[i]['comment_count']
-
-                        # like_cnt
-                        if 'digg_count' not in videoList[i]:
-                            like_cnt = 0
-                        else:
-                            like_cnt = videoList[i]['digg_count']
-
-                        # share_cnt
-                        share_cnt = 0
-
-                        # video_duration
-                        if 'video_duration' not in videoList[i]:
-                            video_duration = 0
-                        else:
-                            video_duration = int(videoList[i]['video_duration'])
-
-                        # send_time
-                        if 'publish_time' not in videoList[i]:
-                            publish_time = 0
-                        else:
-                            publish_time = videoList[i]['publish_time']
-
-                        publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time))
-
-                        # is_top
-                        if 'is_top' not in videoList[i]:
-                            is_top = 0
-                        else:
-                            is_top = videoList[i]['is_top']
-
-                        # user_name
-                        if 'user_info' not in videoList[i]:
-                            user_name = 0
-                        elif 'name' not in videoList[i]['user_info']:
-                            user_name = 0
-                        else:
-                            user_name = videoList[i]['user_info']['name']
-
-                        # user_id
-                        if 'user_info' not in videoList[i]:
-                            user_id = 0
-                        elif 'user_id' not in videoList[i]['user_info']:
-                            user_id = 0
-                        else:
-                            user_id = videoList[i]['user_info']['user_id']
-
-                        # avatar_url
-                        if 'user_info' not in videoList[i]:
-                            avatar_url = 0
-                        elif 'avatar_url' not in videoList[i]['user_info']:
-                            avatar_url = 0
-                        else:
-                            avatar_url = videoList[i]['user_info']['avatar_url']
-
-                        # cover_url
-                        if 'video_detail_info' not in videoList[i]:
-                            cover_url = 0
-                        elif 'detail_video_large_image' not in videoList[i]['video_detail_info']:
-                            cover_url = 0
-                        elif 'url' in videoList[i]['video_detail_info']['detail_video_large_image']:
-                            cover_url = videoList[i]['video_detail_info']['detail_video_large_image']['url']
-                        else:
-                            cover_url = videoList[i]['video_detail_info']['detail_video_large_image']['url_list'][0][
-                                'url']
-                        video_url_dict = cls.get_video_url(log_type, crawler, gid)
-                        video_url = video_url_dict["video_url"]
-                        audio_url = video_url_dict["audio_url"]
-                        video_width = video_url_dict["video_width"]
-                        video_height = video_url_dict["video_height"]
-
-                        video_dict = {'video_title': video_title,
-                                      'video_id': video_id,
-                                      'gid': gid,
-                                      'play_cnt': play_cnt,
-                                      'comment_cnt': comment_cnt,
-                                      'like_cnt': like_cnt,
-                                      'share_cnt': share_cnt,
-                                      'video_width': video_width,
-                                      'video_height': video_height,
-                                      'duration': video_duration,
-                                      'publish_time_stamp': publish_time,
-                                      'publish_time_str': publish_time_str,
-                                      'is_top': is_top,
-                                      'user_name': user_name,
-                                      'user_id': user_id,
-                                      'avatar_url': avatar_url,
-                                      'cover_url': cover_url,
-                                      'audio_url': audio_url,
-                                      'video_url': video_url,
-                                      'session': signature}
-                        for k, v in video_dict.items():
-                            Common.logger(log_type, crawler).info(f"{k}:{v}")
-                        cls.download_publish(log_type=log_type,
-                                             crawler=crawler,
-                                             strategy=strategy,
-                                             video_dict=video_dict,
-                                             task=task,
-                                             our_uid=our_uid,
-                                             oss_endpoint=oss_endpoint,
-                                             env=env)
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"get_videolist:{e}\n")
-
-    @classmethod
-    def repeat_video(cls, log_type, crawler, video_id, env):
-        sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{video_id}"; """
-        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
-        return len(repeat_video)
-
-    # 下载 / 上传
-    @classmethod
-    def download_publish(cls, log_type, crawler, strategy, video_dict, task, our_uid, oss_endpoint, env):
-        try:
-            filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
-            for filter_word in filter_words:
-                if filter_word in video_dict['video_title']:
-                    Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
-                    return
-            if download_rule(log_type, crawler, video_dict, task['rule_dict']) is False:
-                Common.logger(log_type, crawler).info('不满足抓取规则\n')
-
-            elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
-                Common.logger(log_type, crawler).info('视频已下载\n')
-            else:
-                # 下载视频
-                Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video',
-                                       title=video_dict['video_title'], url=video_dict['video_url'])
-                # 下载音频
-                Common.download_method(log_type=log_type, crawler=crawler, text='xigua_audio',
-                                       title=video_dict['video_title'], url=video_dict['audio_url'])
-                # 合成音视频
-                Common.video_compose(log_type=log_type, crawler=crawler,
-                                     video_dir=f"./{crawler}/videos/{video_dict['video_title']}")
-                md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
-                if os.path.getsize(f"./{crawler}/videos/{md_title}/video.mp4") == 0:
-                    # 删除视频文件夹
-                    shutil.rmtree(f"./{crawler}/videos/{md_title}")
-                    Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
-                    return
-                # ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
-                # if ffmpeg_dict is None or ffmpeg_dict['size'] == 0:
-                #     Common.logger(log_type, crawler).warning(f"下载的视频无效,已删除\n")
-                #     # 删除视频文件夹
-                #     shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
-                #     return
-                # 下载封面
-                Common.download_method(log_type=log_type, crawler=crawler, text='cover',
-                                       title=video_dict['video_title'], url=video_dict['cover_url'])
-                # 保存视频信息至txt
-                Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
-
-                # 上传视频
-                Common.logger(log_type, crawler).info("开始上传视频...")
-                our_video_id = Publish.upload_and_publish(log_type=log_type,
-                                                          crawler=crawler,
-                                                          strategy=strategy,
-                                                          our_uid=our_uid,
-                                                          env=env,
-                                                          oss_endpoint=oss_endpoint)
-                if env == 'dev':
-                    our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-                else:
-                    our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-                Common.logger(log_type, crawler).info("视频上传完成")
-
-                if our_video_id is None:
-                    # 删除视频文件夹
-                    shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
-                    return
-
-                # 视频写入飞书
-                Feishu.insert_columns(log_type, 'xigua', "e075e9", "ROWS", 1, 2)
-                upload_time = int(time.time())
-                values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
-                           "定向榜",
-                           video_dict['video_title'],
-                           str(video_dict['video_id']),
-                           our_video_link,
-                           video_dict['gid'],
-                           video_dict['play_cnt'],
-                           video_dict['comment_cnt'],
-                           video_dict['like_cnt'],
-                           video_dict['share_cnt'],
-                           video_dict['duration'],
-                           str(video_dict['video_width']) + '*' + str(video_dict['video_height']),
-                           video_dict['publish_time_str'],
-                           video_dict['user_name'],
-                           video_dict['user_id'],
-                           video_dict['avatar_url'],
-                           video_dict['cover_url'],
-                           video_dict['video_url'],
-                           video_dict['audio_url']]]
-                time.sleep(1)
-                Feishu.update_values(log_type, 'xigua', "e075e9", "F2:Z2", values)
-                Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
-
-                rule_dict = {
-                    "play_cnt": task["play_cnt"],
-                    "video_width": task["video_width"],
-                    "video_height": task["video_height"],
-                    "video_like": task["video_like"],
-                    "share_cnt": task["share_cnt"],
-                    "duration": {"min": task["duration_min"], "max": task["duration_max"]}
-                }
-
-                # 视频信息保存数据库
-                insert_sql = f""" insert into crawler_video(video_id,
-                                user_id,
-                                out_user_id,
-                                platform,
-                                strategy,
-                                out_video_id,
-                                video_title,
-                                cover_url,
-                                video_url,
-                                duration,
-                                publish_time,
-                                play_cnt,
-                                crawler_rule,
-                                width,
-                                height)
-                                values({our_video_id},
-                                {our_uid},
-                                "{video_dict['user_id']}",
-                                "{cls.platform}",
-                                "定向爬虫策略",
-                                "{video_dict['video_id']}",
-                                "{video_dict['video_title']}",
-                                "{video_dict['cover_url']}",
-                                "{video_dict['video_url']}",
-                                {int(video_dict['duration'])},
-                                "{video_dict['publish_time_str']}",
-                                {int(video_dict['play_cnt'])},
-                                '{json.dumps(rule_dict)}',
-                                {int(video_dict['video_width'])},
-                                {int(video_dict['video_height'])}) """
-                Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
-                MysqlHelper.update_values(log_type, crawler, insert_sql, env)
-                Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f'download_publish异常:{e}\n')
-
-    @classmethod
-    def get_follow_videos(cls, log_type, crawler, task, oss_endpoint, env):
-        user_list = get_user_from_mysql(log_type, crawler, crawler, env)
-        strategy = '定向抓取策略'
-        for user in user_list:
-            try:
-                spider_link = user["link"]
-                out_uid = spider_link.split('/')[-1]
-                user_name = user["nick_name"]
-                our_uid = user["uid"]
-                Common.logger(log_type, crawler).info(f"开始抓取 {user_name} 用户主页视频\n")
-                cls.get_videolist(log_type=log_type,
-                                  crawler=crawler,
-                                  strategy=strategy,
-                                  task=task,
-                                  our_uid=our_uid,
-                                  out_uid=out_uid,
-                                  oss_endpoint=oss_endpoint,
-                                  env=env)
-                cls.offset = 0
-            except Exception as e:
-                Common.logger(log_type, crawler).error(f"get_follow_videos:{e}\n")
-
-
-if __name__ == '__main__':
-    # SchedulingFollow.get_users(log_type="follow",
-    #                            crawler="xigua",
-    #                            spider_rule="['https://www.ixigua.com/home/95420624045', 'https://www.ixigua.com/home/6431477489']",
-    #                            env="dev",
-    #                            machine="local")
-
-    print(ScheduleXiguaFollow.repeat_video("follow", "xigua", "v0201ag10000ce3jcjbc77u8jsplpgrg", "dev"))
-    pass

+ 0 - 43
xigua/xigua_main/run_xigua_author_scheduling.py

@@ -1,43 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/5/26
-import argparse
-import os
-import sys
-sys.path.append(os.getcwd())
-from common.common import Common
-from common.public import task_fun
-from common.scheduling_db import MysqlHelper
-from xigua.xigua_author.xigua_author_scheduling import XiguaauthorScheduling
-
-
-def main(log_type, crawler, task, env):
-    task_dict = task_fun(task)['task_dict']
-    rule_dict = task_fun(task)['rule_dict']
-    task_id = task_dict['task_id']
-    select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
-    user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
-    Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
-    Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
-    # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
-    Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["task_name"]}\n')
-    XiguaauthorScheduling.get_author_videos(log_type=log_type,
-                                            crawler=crawler,
-                                            rule_dict=rule_dict,
-                                            user_list=user_list,
-                                            env=env)
-    Common.del_logs(log_type, crawler)
-    Common.logger(log_type, crawler).info('抓取完一轮\n')
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
-    parser.add_argument('--log_type', default='recommend')  ## 添加参数,注明参数类型
-    parser.add_argument('--crawler', default='kuaishou')  ## 添加参数
-    parser.add_argument('--task')  ## 添加参数
-    parser.add_argument('--env', default='prod')  ## 添加参数
-    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
-    main(log_type=args.log_type,
-         crawler=args.crawler,
-         task=args.task,
-         env=args.env)

+ 0 - 41
xigua/xigua_main/run_xigua_follow.py

@@ -1,41 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/2/17
-import argparse
-import os
-import sys
-import time
-
-sys.path.append(os.getcwd())
-from common.common import Common
-from xigua.xigua_follow.xigua_follow import Follow
-from common.feishu import Feishu
-
-
-def main(log_type, crawler, strategy, oss_endpoint, env, machine):
-    try:
-        Common.logger(log_type, crawler).info('开始抓取 西瓜视频 定向榜\n')
-        Follow.get_follow_videos(log_type, crawler, strategy, oss_endpoint, env, machine)
-        Common.del_logs(log_type, crawler)
-        Common.logger(log_type, crawler).info('抓取完一轮,休眠 1 分钟\n')
-    except Exception as e:
-        Common.logger(log_type, crawler).info(f"西瓜视频异常,触发报警:{e}\n")
-        Feishu.bot(log_type, crawler, f"{e}")
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
-    parser.add_argument('--log_type', default='follow', type=str)  ## 添加参数,注明参数类型
-    parser.add_argument('--crawler', default='xigua')  ## 添加参数
-    parser.add_argument('--strategy', default='定向抓取')  ## 添加参数
-    parser.add_argument('--oss_endpoint', default='inner')  ## 添加参数
-    parser.add_argument('--env', default='prod')  ## 添加参数
-    parser.add_argument('--machine', default='aliyun')  ## 添加参数
-    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
-    # print(args)
-    main(log_type=args.log_type,
-         crawler=args.crawler,
-         strategy=args.strategy,
-         oss_endpoint=args.oss_endpoint,
-         env=args.env,
-         machine=args.machine)

+ 0 - 30
xigua/xigua_main/run_xigua_recommend.py

@@ -1,30 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/4/11
-import argparse
-import os
-import sys
-
-sys.path.append(os.getcwd())
-from common.common import Common
-from xigua.xigua_recommend.xigua_recommend import XiguaRecommend
-
-
-def main(log_type, crawler, env):
-    if env == "dev":
-        oss_endpoint = "out"
-    else:
-        oss_endpoint = "inner"
-    Common.logger(log_type, crawler).info('开始抓取 西瓜视频 推荐榜\n')
-    XiguaRecommend.get_videoList(log_type, crawler, oss_endpoint, env)
-    Common.del_logs(log_type, crawler)
-    Common.logger(log_type, crawler).info('抓取完一轮,休眠 1 分钟\n')
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
-    parser.add_argument('--crawler')  ## 添加参数
-    parser.add_argument('--env')  ## 添加参数
-    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
-    main(log_type=args.log_type, crawler=args.crawler, env=args.env)

+ 0 - 48
xigua/xigua_main/run_xigua_recommend_scheduling.py

@@ -1,48 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/5/25
-import argparse
-import os
-import random
-import sys
-sys.path.append(os.getcwd())
-from common.common import Common
-from common.public import task_fun
-from common.scheduling_db import MysqlHelper
-from xigua.xigua_recommend.xigua_recommend_scheduling import XiguarecommendScheduling
-
-
-def main(log_type, crawler, task, env):
-    task_dict = task_fun(task)['task_dict']
-    rule_dict = task_fun(task)['rule_dict']
-    task_id = task_dict['task_id']
-    select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
-    user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
-    our_uid_list = []
-    for user in user_list:
-        our_uid_list.append(user["uid"])
-    our_uid = random.choice(our_uid_list)
-    Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
-    Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
-    # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
-    Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["task_name"]}\n')
-    XiguarecommendScheduling.get_videoList(log_type=log_type,
-                                           crawler=crawler,
-                                           rule_dict=rule_dict,
-                                           our_uid=our_uid,
-                                           env=env)
-    Common.del_logs(log_type, crawler)
-    Common.logger(log_type, crawler).info('抓取任务结束\n')
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
-    parser.add_argument('--log_type', default='recommend')  ## 添加参数,注明参数类型
-    parser.add_argument('--crawler', default='kuaishou')  ## 添加参数
-    parser.add_argument('--task')  ## 添加参数
-    parser.add_argument('--env', default='prod')  ## 添加参数
-    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
-    main(log_type=args.log_type,
-         crawler=args.crawler,
-         task=args.task,
-         env=args.env)

+ 0 - 42
xigua/xigua_main/run_xigua_search.py

@@ -1,42 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: lierqiang
-# @Time: 2023/3/13
-import argparse
-import os
-import sys
-import time
-
-sys.path.append(os.getcwd())
-from common.common import Common
-from xigua.xigua_search.xigua_search import XiguaSearch
-from common.feishu import Feishu
-
-
-def main(log_type, crawler, strategy, oss_endpoint, env, machine):
-    try:
-        Common.logger(log_type, crawler).info('开始抓取 西瓜视频 搜索\n')
-        XiguaSearch.get_search_videos(log_type, crawler, strategy, oss_endpoint, env, machine)
-        Common.del_logs(log_type, crawler)
-        Common.logger(log_type, crawler).info('抓取完一轮,休眠 1 分钟\n')
-    except Exception as e:
-        Common.logger(log_type, crawler).info(f"西瓜视频异常,触发报警:{e}\n")
-        Feishu.bot(log_type, crawler, f"{e}")
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
-    parser.add_argument('--crawler')  ## 添加参数
-    parser.add_argument('--strategy')  ## 添加参数
-    parser.add_argument('--our_uid')  ## 添加参数
-    parser.add_argument('--oss_endpoint')  ## 添加参数
-    parser.add_argument('--env')  ## 添加参数
-    parser.add_argument('--machine')  ## 添加参数
-    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
-    # print(args)
-    main(log_type=args.log_type,
-         crawler=args.crawler,
-         strategy=args.strategy,
-         oss_endpoint=args.oss_endpoint,
-         env=args.env,
-         machine=args.machine)

+ 0 - 28
xigua/xigua_main/run_xigua_search_new.py

@@ -1,28 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/5/12
-import argparse
-import os
-import sys
-sys.path.append(os.getcwd())
-from common.common import Common
-from xigua.xigua_search.xigua_search_new import XiguaSearchNew
-
-
-def main(log_type, crawler, env):
-    Common.logger(log_type, crawler).info('开始抓取 西瓜视频 搜索策略\n')
-    XiguaSearchNew.get_search_videos(log_type, crawler, env)
-    os.system("ps aux | grep Chrome | grep -v grep | awk '{print $2}' | xargs kill -9")
-    os.system("ps aux | grep chromedriver | grep -v grep | awk '{print $2}' | xargs kill -9")
-    Common.del_logs(log_type, crawler)
-    Common.logger(log_type, crawler).info('抓取完一轮\n')
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
-    parser.add_argument('--crawler')  ## 添加参数
-    parser.add_argument('--env')  ## 添加参数
-    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
-    # print(args)
-    main(log_type=args.log_type, crawler=args.crawler, env=args.env)

+ 0 - 45
xigua/xigua_main/run_xigua_search_scheduling.py

@@ -1,45 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/5/26
-import argparse
-import os
-import sys
-sys.path.append(os.getcwd())
-from common.common import Common
-from common.public import task_fun
-from common.scheduling_db import MysqlHelper
-from xigua.xigua_search.xigua_search_scheduling import XiguasearchScheduling
-
-
-def main(log_type, crawler, task, env):
-    task_dict = task_fun(task)['task_dict']
-    rule_dict = task_fun(task)['rule_dict']
-    task_id = task_dict['task_id']
-    select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
-    user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
-    Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
-    Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
-    # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
-    Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["task_name"]}\n')
-    XiguasearchScheduling.get_search_videos(log_type=log_type,
-                                            crawler=crawler,
-                                            rule_dict=rule_dict,
-                                            user_list=user_list,
-                                            env=env)
-    os.system("ps aux | grep Chrome | grep -v grep | awk '{print $2}' | xargs kill -9")
-    os.system("ps aux | grep chromedriver | grep -v grep | awk '{print $2}' | xargs kill -9")
-    Common.del_logs(log_type, crawler)
-    Common.logger(log_type, crawler).info('抓取完一轮\n')
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
-    parser.add_argument('--log_type', default='recommend')  ## 添加参数,注明参数类型
-    parser.add_argument('--crawler', default='kuaishou')  ## 添加参数
-    parser.add_argument('--task')  ## 添加参数
-    parser.add_argument('--env', default='prod')  ## 添加参数
-    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
-    main(log_type=args.log_type,
-         crawler=args.crawler,
-         task=args.task,
-         env=args.env)

+ 0 - 850
xigua/xigua_recommend/xigua_recommend.py

@@ -1,850 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/4/7
-import base64
-import json
-import os
-import random
-import shutil
-import string
-import sys
-import time
-from datetime import date, timedelta
-from hashlib import md5
-
-import requests
-import urllib3
-from requests.adapters import HTTPAdapter
-from selenium import webdriver
-from selenium.webdriver import DesiredCapabilities
-from selenium.webdriver.chrome.service import Service
-
-sys.path.append(os.getcwd())
-from common.publish import Publish
-from common.common import Common
-from common.feishu import Feishu
-from common.scheduling_db import MysqlHelper
-
-
-class XiguaRecommend:
-    platform = "西瓜视频"
-
-    @classmethod
-    def xigua_config(cls, log_type, crawler, text, env):
-        select_sql = f"""select * from crawler_config where source="xigua" """
-        contents = MysqlHelper.get_values(log_type, crawler, select_sql, env, action='')
-        title_list = []
-        filter_list = []
-        for content in contents:
-            config = content['config']
-            config_dict = eval(config)
-            for k, v in config_dict.items():
-                if k == "title":
-                    title_list_config = v.split(",")
-                    for title in title_list_config:
-                        title_list.append(title)
-                if k == "filter":
-                    filter_list_config = v.split(",")
-                    for filter_word in filter_list_config:
-                        filter_list.append(filter_word)
-        if text == "title":
-            return title_list
-        elif text == "filter":
-            return filter_list
-
-    @classmethod
-    def download_rule(cls, video_dict):
-        publish_time_str_rule = (date.today() + timedelta(days=-30)).strftime("%Y-%m-%d %H:%M:%S")
-        publish_time_stamp_rule = int(time.mktime(time.strptime(publish_time_str_rule, "%Y-%m-%d %H:%M:%S")))
-        if int(video_dict['play_cnt']) >= 10000:
-            if 60*30 >= int(video_dict['duration']) >= 60:
-                if int(video_dict['publish_time_stamp']) >= publish_time_stamp_rule:
-                    return True
-                else:
-                    return False
-            else:
-                return False
-        else:
-            return False
-
-    @classmethod
-    def random_signature(cls):
-        src_digits = string.digits  # string_数字
-        src_uppercase = string.ascii_uppercase  # string_大写字母
-        src_lowercase = string.ascii_lowercase  # string_小写字母
-        digits_num = random.randint(1, 6)
-        uppercase_num = random.randint(1, 26 - digits_num - 1)
-        lowercase_num = 26 - (digits_num + uppercase_num)
-        password = random.sample(src_digits, digits_num) + random.sample(src_uppercase, uppercase_num) + random.sample(
-            src_lowercase, lowercase_num)
-        random.shuffle(password)
-        new_password = 'AAAAAAAAAA' + ''.join(password)[10:-4] + 'AAAB'
-        new_password_start = new_password[0:18]
-        new_password_end = new_password[-7:]
-        if new_password[18] == '8':
-            new_password = new_password_start + 'w' + new_password_end
-        elif new_password[18] == '9':
-            new_password = new_password_start + 'x' + new_password_end
-        elif new_password[18] == '-':
-            new_password = new_password_start + 'y' + new_password_end
-        elif new_password[18] == '.':
-            new_password = new_password_start + 'z' + new_password_end
-        else:
-            new_password = new_password_start + 'y' + new_password_end
-        return new_password
-
-    @classmethod
-    def get_signature(cls, log_type, crawler, env):
-        try:
-            # 打印请求配置
-            ca = DesiredCapabilities.CHROME
-            ca["goog:loggingPrefs"] = {"performance": "ALL"}
-
-            # 不打开浏览器运行
-            chrome_options = webdriver.ChromeOptions()
-            chrome_options.add_argument("headless")
-            chrome_options.add_argument(
-                f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
-            chrome_options.add_argument("--no-sandbox")
-
-            # driver初始化
-            if env == "dev":
-                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
-                                          service=Service('/Users/wangkun/Downloads/chromedriver/chromedriver_v111/chromedriver'))
-            else:
-                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
-
-
-            driver.implicitly_wait(10)
-            driver.get('https://www.ixigua.com/')
-            time.sleep(1)
-
-            # 向上滑动 1000 个像素
-            driver.execute_script('window.scrollBy(0, 2000)')
-            # Common.logger(log_type, crawler).info('刷新页面')
-            driver.refresh()
-            logs = driver.get_log("performance")
-            # Common.logger(log_type, crawler).info('已获取logs:{}\n', logs)
-            driver.quit()
-            for line in logs:
-                msg = json.loads(line['message'])
-                if 'params' not in msg['message']:
-                    pass
-                elif 'documentURL' not in msg['message']['params']:
-                    pass
-                elif 'www.ixigua.com' not in msg['message']['params']['documentURL']:
-                    pass
-                elif 'url' not in msg['message']['params']['request']:
-                    pass
-                elif '_signature' not in msg['message']['params']['request']['url']:
-                    pass
-                else:
-                    url = msg['message']['params']['request']['url']
-                    signature = url.split('_signature=')[-1].split('&')[0]
-                    return signature
-
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f'get_signature异常:{e}\n')
-
-    # 获取视频详情
-    @classmethod
-    def get_video_url(cls, log_type, crawler, gid):
-        try:
-            url = 'https://www.ixigua.com/api/mixVideo/information?'
-            headers = {
-                "accept-encoding": "gzip, deflate",
-                "accept-language": "zh-CN,zh-Hans;q=0.9",
-                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
-                              "AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15",
-                "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
-            }
-            params = {
-                'mixId': gid,
-                'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC'
-                           'NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
-                'X-Bogus': 'DFSzswVupYTANCJOSBk0P53WxM-r',
-                '_signature': '_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px'
-                              'fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94',
-            }
-            cookies = {
-                'ixigua-a-s': '1',
-                'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB'
-                           'NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
-                'ttwid': '1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7'
-                         '6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8',
-                'tt_scid': 'QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3',
-                'MONITOR_WEB_ID': '0a49204a-7af5-4e96-95f0-f4bafb7450ad',
-                '__ac_nonce': '06304878000964fdad287',
-                '__ac_signature': '_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb'
-                                  'FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8',
-                'ttcid': 'e56fabf6e85d4adf9e4d91902496a0e882',
-                '_tea_utm_cache_1300': 'undefined',
-                'support_avif': 'false',
-                'support_webp': 'false',
-                'xiguavideopcwebid': '7134967546256016900',
-                'xiguavideopcwebid.sig': 'xxRww5R1VEMJN_dQepHorEu_eAc',
-            }
-            urllib3.disable_warnings()
-            s = requests.session()
-            # max_retries=3 重试3次
-            s.mount('http://', HTTPAdapter(max_retries=3))
-            s.mount('https://', HTTPAdapter(max_retries=3))
-            response = s.get(url=url, headers=headers, params=params, cookies=cookies, verify=False,
-                             proxies=Common.tunnel_proxies(), timeout=5)
-            response.close()
-            if 'data' not in response.json() or response.json()['data'] == '':
-                Common.logger(log_type, crawler).warning('get_video_info: response: {}', response)
-            else:
-                video_info = response.json()['data']['gidInformation']['packerData']['video']
-                video_url_dict = {}
-                # video_url
-                if 'videoResource' not in video_info:
-                    video_url_dict["video_url"] = ''
-                    video_url_dict["audio_url"] = ''
-                    video_url_dict["video_width"] = 0
-                    video_url_dict["video_height"] = 0
-
-                elif 'dash_120fps' in video_info['videoResource']:
-                    if "video_list" in video_info['videoResource']['dash_120fps'] and 'video_4' in \
-                            video_info['videoResource']['dash_120fps']['video_list']:
-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_4'][
-                            'backup_url_1']
-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_4'][
-                            'backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vwidth']
-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_4'][
-                            'vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_3' in \
-                            video_info['videoResource']['dash_120fps']['video_list']:
-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_3'][
-                            'backup_url_1']
-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_3'][
-                            'backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vwidth']
-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_3'][
-                            'vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_2' in \
-                            video_info['videoResource']['dash_120fps']['video_list']:
-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_2'][
-                            'backup_url_1']
-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_2'][
-                            'backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vwidth']
-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_2'][
-                            'vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_1' in \
-                            video_info['videoResource']['dash_120fps']['video_list']:
-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_1'][
-                            'backup_url_1']
-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_1'][
-                            'backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vwidth']
-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_1'][
-                            'vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-
-                    elif 'dynamic_video' in video_info['videoResource']['dash_120fps'] \
-                            and 'dynamic_video_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
-                            and 'dynamic_audio_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
-                            and len(
-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list']) != 0 \
-                            and len(
-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list']) != 0:
-
-                        video_url = \
-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
-                            'backup_url_1']
-                        audio_url = \
-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list'][-1][
-                            'backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = \
-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
-                            'vwidth']
-                        video_height = \
-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
-                            'vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    else:
-                        video_url_dict["video_url"] = ''
-                        video_url_dict["audio_url"] = ''
-                        video_url_dict["video_width"] = 0
-                        video_url_dict["video_height"] = 0
-
-                elif 'dash' in video_info['videoResource']:
-                    if "video_list" in video_info['videoResource']['dash'] and 'video_4' in \
-                            video_info['videoResource']['dash']['video_list']:
-                        video_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash']['video_list']['video_4']['vwidth']
-                        video_height = video_info['videoResource']['dash']['video_list']['video_4']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['dash'] and 'video_3' in \
-                            video_info['videoResource']['dash']['video_list']:
-                        video_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash']['video_list']['video_3']['vwidth']
-                        video_height = video_info['videoResource']['dash']['video_list']['video_3']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['dash'] and 'video_2' in \
-                            video_info['videoResource']['dash']['video_list']:
-                        video_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash']['video_list']['video_2']['vwidth']
-                        video_height = video_info['videoResource']['dash']['video_list']['video_2']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['dash'] and 'video_1' in \
-                            video_info['videoResource']['dash']['video_list']:
-                        video_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash']['video_list']['video_1']['vwidth']
-                        video_height = video_info['videoResource']['dash']['video_list']['video_1']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-
-                    elif 'dynamic_video' in video_info['videoResource']['dash'] \
-                            and 'dynamic_video_list' in video_info['videoResource']['dash']['dynamic_video'] \
-                            and 'dynamic_audio_list' in video_info['videoResource']['dash']['dynamic_video'] \
-                            and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list']) != 0 \
-                            and len(
-                        video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list']) != 0:
-
-                        video_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
-                            'backup_url_1']
-                        audio_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list'][-1][
-                            'backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = \
-                        video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1]['vwidth']
-                        video_height = \
-                        video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1]['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    else:
-                        video_url_dict["video_url"] = ''
-                        video_url_dict["audio_url"] = ''
-                        video_url_dict["video_width"] = 0
-                        video_url_dict["video_height"] = 0
-
-                elif 'normal' in video_info['videoResource']:
-                    if "video_list" in video_info['videoResource']['normal'] and 'video_4' in \
-                            video_info['videoResource']['normal']['video_list']:
-                        video_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
-                        audio_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['normal']['video_list']['video_4']['vwidth']
-                        video_height = video_info['videoResource']['normal']['video_list']['video_4']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['normal'] and 'video_3' in \
-                            video_info['videoResource']['normal']['video_list']:
-                        video_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
-                        audio_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['normal']['video_list']['video_3']['vwidth']
-                        video_height = video_info['videoResource']['normal']['video_list']['video_3']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['normal'] and 'video_2' in \
-                            video_info['videoResource']['normal']['video_list']:
-                        video_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
-                        audio_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['normal']['video_list']['video_2']['vwidth']
-                        video_height = video_info['videoResource']['normal']['video_list']['video_2']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['normal'] and 'video_1' in \
-                            video_info['videoResource']['normal']['video_list']:
-                        video_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
-                        audio_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['normal']['video_list']['video_1']['vwidth']
-                        video_height = video_info['videoResource']['normal']['video_list']['video_1']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-
-                    elif 'dynamic_video' in video_info['videoResource']['normal'] \
-                            and 'dynamic_video_list' in video_info['videoResource']['normal']['dynamic_video'] \
-                            and 'dynamic_audio_list' in video_info['videoResource']['normal']['dynamic_video'] \
-                            and len(
-                        video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list']) != 0 \
-                            and len(
-                        video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list']) != 0:
-
-                        video_url = \
-                        video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
-                            'backup_url_1']
-                        audio_url = \
-                        video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list'][-1][
-                            'backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = \
-                        video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
-                            'vwidth']
-                        video_height = \
-                        video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
-                            'vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    else:
-                        video_url_dict["video_url"] = ''
-                        video_url_dict["audio_url"] = ''
-                        video_url_dict["video_width"] = 0
-                        video_url_dict["video_height"] = 0
-
-                else:
-                    video_url_dict["video_url"] = ''
-                    video_url_dict["audio_url"] = ''
-                    video_url_dict["video_width"] = 0
-                    video_url_dict["video_height"] = 0
-
-                return video_url_dict
-
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f'get_video_url:{e}\n')
-
-    @classmethod
-    def repeat_video(cls, log_type, crawler, video_id, env):
-        sql = f""" select * from crawler_video where platform="西瓜视频" and out_video_id="{video_id}"; """
-        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
-        return len(repeat_video)
-
-    @classmethod
-    def get_videoList(cls, log_type, crawler, oss_endpoint, env):
-        queryCount = 1
-        while True:
-            signature = cls.get_signature(log_type, crawler, env)
-            if signature is None:
-                Common.logger(log_type, crawler).warning(f"signature:{signature}")
-                continue
-            url = "https://www.ixigua.com/api/feedv2/feedById?"
-            params = {
-                "channelId": "94349543909",
-                "count": "9",
-                "maxTime": str(int(time.time())),
-                # "maxTime": "1683190690",
-                "queryCount": str(queryCount),
-                "_signature": signature,
-                "request_from": "701",
-                "offset": "0",
-                "referrer:": "https://open.weixin.qq.com/",
-                "aid": "1768",
-                "msToken": "XDpSA6_ZPP-gAkkBV-_WRQvNpG20uUUGPwf3E-S-txhznjBcXNbK2sbOuSpF3U7Jki6R9HwLDPeW4Gj7n6PURPTKrKLEs8J-ieFrwXDvMp2DX94ZoMua",
-                # "X-Bogus": "DFSzswVOx7bANt0TtCAcOFm4pIkR",
-            }
-            headers = {
-                'referer': 'https://www.ixigua.com/',
-                'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
-                'authority': 'www.ixigua.com',
-                'accept': 'application/json, text/plain, */*',
-                'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
-                'cache-control': 'no-cache',
-                # 'cookie': 'ttcid=5d8f917a525e46759dc886296bf1111b69; MONITOR_WEB_ID=ad1c8360-d4c9-4fa2-a801-d9fd68dfc1b2; s_v_web_id=verify_lh8vaa6v_VI4RQ0ET_nVbq_4PXw_8mfN_7Xp6wdLOZi08; passport_csrf_token=0e7c6992cb6170c9db034c3696191fff; passport_csrf_token_default=0e7c6992cb6170c9db034c3696191fff; odin_tt=b102690fef38bf07c400e3c69cdc27627701802bdd816fa827e3721c33607c4d2c0cbef09fe99c7d370e4a9e9e11c263; sid_guard=8dec4ecbe52cbdcff99dafe622b586b4%7C1683189144%7C3024002%7CThu%2C+08-Jun-2023+08%3A32%3A26+GMT; uid_tt=1dccbeaf685e24afd018fec335f3151d; uid_tt_ss=1dccbeaf685e24afd018fec335f3151d; sid_tt=8dec4ecbe52cbdcff99dafe622b586b4; sessionid=8dec4ecbe52cbdcff99dafe622b586b4; sessionid_ss=8dec4ecbe52cbdcff99dafe622b586b4; sid_ucp_v1=1.0.0-KGVhZTIxYjFlNzRlZTNhZjk5MjNlNzk2NGRhOWJlYzZiNGI5NzBhMzYKFQiu3d-eqQIQmNvNogYYGCAMOAhACxoCaGwiIDhkZWM0ZWNiZTUyY2JkY2ZmOTlkYWZlNjIyYjU4NmI0; ssid_ucp_v1=1.0.0-KGVhZTIxYjFlNzRlZTNhZjk5MjNlNzk2NGRhOWJlYzZiNGI5NzBhMzYKFQiu3d-eqQIQmNvNogYYGCAMOAhACxoCaGwiIDhkZWM0ZWNiZTUyY2JkY2ZmOTlkYWZlNjIyYjU4NmI0; support_webp=true; support_avif=true; csrf_session_id=9dd5d8287d4f075ae24ff163cd22e51f; msToken=XDpSA6_ZPP-gAkkBV-_WRQvNpG20uUUGPwf3E-S-txhznjBcXNbK2sbOuSpF3U7Jki6R9HwLDPeW4Gj7n6PURPTKrKLEs8J-ieFrwXDvMp2DX94ZoMua; ixigua-a-s=1; tt_scid=UTduWO4ij7cX6YKx23sDuV4zjvFkGFtFk5ZBhEnd1lJ1EZBykStzU7tbWQOSzGdE0fc6; ttwid=1%7C4zaTJmlaHpEa8rAB-KjREdxT3sNBUJWrAzRJnNvqExQ%7C1683198318%7Cffc2eef612caab19a0db93b4cec27e21a6230f9b82ab4bf5b1c6193d082baab1',
-                'pragma': 'no-cache',
-                'sec-ch-ua': '"Chromium";v="112", "Google Chrome";v="112", "Not:A-Brand";v="99"',
-                'sec-ch-ua-mobile': '?0',
-                'sec-ch-ua-platform': '"macOS"',
-                'sec-fetch-dest': 'empty',
-                'sec-fetch-mode': 'cors',
-                'sec-fetch-site': 'same-origin',
-                # 'tt-anti-token': '95Ny0vj4Q-90dd9b91193b34ce554cc2861439b9629d897723f4d33719b9747d7d18a2ff7c',
-                # 'x-secsdk-csrf-token': '000100000001ecb8f07e247a89e289b3ab55f3c967a8e88f88aa0addb1ddca9d3e36f35d7999175be79b8699c881'
-            }
-            urllib3.disable_warnings()
-            s = requests.session()
-            # max_retries=3 重试3次
-            s.mount('http://', HTTPAdapter(max_retries=3))
-            s.mount('https://', HTTPAdapter(max_retries=3))
-            response = requests.get(url=url, headers=headers, params=params, proxies=Common.tunnel_proxies(), verify=False, timeout=5)
-            response.close()
-            queryCount += 1
-            Common.logger(log_type, crawler).info(f"queryCount:{queryCount}")
-            if response.status_code != 200:
-                Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
-                return
-            elif 'data' not in response.text:
-                Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
-                return
-            elif 'channelFeed' not in response.json()['data']:
-                Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.json()}\n")
-                return
-            elif 'Data' not in response.json()['data']['channelFeed']:
-                Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.json()}\n")
-                return
-            elif len(response.json()['data']['channelFeed']['Data']) == 0:
-                Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.json()}\n")
-                return
-            else:
-                videoList = response.json()['data']['channelFeed']['Data']
-                for i in range(len(videoList)):
-                    if 'data' not in videoList[i]:
-                        continue
-                    # video_title
-                    video_title = videoList[i]['data'].get('title', '').replace('"' ,'').replace("'", '')
-                    if video_title == '':
-                        video_title = random.choice(cls.xigua_config(log_type, crawler, "title", env))
-                    # video_id
-                    video_id = videoList[i]['data'].get('vid', '')
-                    # play_cnt
-                    play_cnt = int(videoList[i]['data'].get('playNum', 0))
-                    # comment_cnt
-                    comment_cnt = int(videoList[i]['data'].get('commentNum', 0))
-                    # gid
-                    gid = videoList[i]['data'].get('item_id', 0)
-                    # share_cnt / like_cnt
-                    share_cnt = 0
-                    like_cnt = 0
-                    # duration
-                    duration = int(videoList[i]['data'].get('duration', 0))
-                    # publish_time_stamp
-                    publish_time_stamp = int(videoList[i]['data'].get('publish_time', 0))
-                    # publish_time_str
-                    publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
-                    # cover_url
-                    cover_url = videoList[i]['data'].get('image_url', '')
-                    # user_name
-                    user_name = videoList[i]['data']['user_info'].get('name', '')
-                    # user_id
-                    user_id = videoList[i]['data']['user_info'].get('user_id', '')
-                    # avatar_url
-                    avatar_url = videoList[i]['data']['user_info'].get('avatar_url', '')
-
-                    video_dict = {
-                        'video_title': video_title,
-                        'video_id': video_id,
-                        'gid': gid,
-                        'play_cnt': play_cnt,
-                        'comment_cnt': comment_cnt,
-                        'like_cnt': like_cnt,
-                        'share_cnt': share_cnt,
-                        'duration': duration,
-                        'publish_time_stamp': publish_time_stamp,
-                        'publish_time_str': publish_time_str,
-                        'user_name': user_name,
-                        'user_id': user_id,
-                        'avatar_url': avatar_url,
-                        'cover_url': cover_url,
-                        'session': signature
-                    }
-                    for k, v in video_dict.items():
-                        Common.logger(log_type, crawler).info(f"{k}:{v}")
-
-                    if gid == 0 or video_id == '' or cover_url == '':
-                        Common.logger(log_type, crawler).info('无效视频\n')
-                    elif cls.download_rule(video_dict) is False:
-                        Common.logger(log_type, crawler).info('不满足抓取规则\n')
-                    elif any(str(word) if str(word) in video_title else False for word in cls.xigua_config(log_type, crawler, "filter", env)) is True:
-                        Common.logger(log_type, crawler).info('已中过滤词\n')
-                    elif cls.repeat_video(log_type, crawler, video_id, env) != 0:
-                        Common.logger(log_type, crawler).info('视频已下载\n')
-                    else:
-                        video_url_dict = cls.get_video_url(log_type, crawler, gid)
-                        video_dict['video_url'] = video_url_dict["video_url"]
-                        video_dict["audio_url"] = video_url_dict["audio_url"]
-                        video_dict["video_width"] = video_url_dict["video_width"]
-                        video_dict["video_height"] = video_url_dict["video_height"]
-
-                        cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
-
-    @classmethod
-    def download_publish(cls, log_type, crawler, video_dict, oss_endpoint, env):
-        # 下载视频
-        Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video', title=video_dict['video_title'],
-                               url=video_dict['video_url'])
-        # 下载音频
-        Common.download_method(log_type=log_type, crawler=crawler, text='xigua_audio', title=video_dict['video_title'],
-                               url=video_dict['audio_url'])
-        # 合成音视频
-        Common.video_compose(log_type=log_type, crawler=crawler,
-                             video_dir=f"./{crawler}/videos/{video_dict['video_title']}")
-        md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
-        if os.path.getsize(f"./{crawler}/videos/{md_title}/video.mp4") == 0:
-            # 删除视频文件夹
-            shutil.rmtree(f"./{crawler}/videos/{md_title}")
-            Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
-            return
-        # 下载封面
-        Common.download_method(log_type=log_type, crawler=crawler, text='cover', title=video_dict['video_title'],
-                               url=video_dict['cover_url'])
-        # 保存视频信息至txt
-        Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
-
-        # 上传视频
-        Common.logger(log_type, crawler).info("开始上传视频...")
-        our_video_id = Publish.upload_and_publish(log_type=log_type,
-                                                  crawler=crawler,
-                                                  strategy="推荐榜爬虫策略",
-                                                  our_uid="recommend",
-                                                  env=env,
-                                                  oss_endpoint=oss_endpoint)
-        if env == 'dev':
-            our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-        else:
-            our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-        Common.logger(log_type, crawler).info("视频上传完成")
-
-        if our_video_id is None:
-            # 删除视频文件夹
-            shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
-            return
-
-        # 视频写入飞书
-        Feishu.insert_columns(log_type, 'xigua', "1iKGF1", "ROWS", 1, 2)
-        upload_time = int(time.time())
-        values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
-                   "推荐榜爬虫策略",
-                   video_dict['video_title'],
-                   str(video_dict['video_id']),
-                   our_video_link,
-                   video_dict['gid'],
-                   video_dict['play_cnt'],
-                   video_dict['comment_cnt'],
-                   video_dict['like_cnt'],
-                   video_dict['share_cnt'],
-                   video_dict['duration'],
-                   str(video_dict['video_width']) + '*' + str(video_dict['video_height']),
-                   video_dict['publish_time_str'],
-                   video_dict['user_name'],
-                   video_dict['user_id'],
-                   video_dict['avatar_url'],
-                   video_dict['cover_url'],
-                   video_dict['audio_url'],
-                   video_dict['video_url']]]
-        time.sleep(1)
-        Feishu.update_values(log_type, 'xigua', "1iKGF1", "F2:Z2", values)
-        Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
-
-        rule_dict = {
-            "play_cnt": {"min": 10000},
-            "duration": {"min": 60, "max": 60*30},
-            "publish_day": {"min": 30}
-        }
-
-        # 视频信息保存数据库
-        insert_sql = f""" insert into crawler_video(video_id,
-                                        user_id,
-                                        out_user_id,
-                                        platform,
-                                        strategy,
-                                        out_video_id,
-                                        video_title,
-                                        cover_url,
-                                        video_url,
-                                        duration,
-                                        publish_time,
-                                        play_cnt,
-                                        crawler_rule,
-                                        width,
-                                        height)
-                                        values({our_video_id},
-                                        {int(50322238)},
-                                        "{video_dict['user_id']}",
-                                        "{cls.platform}",
-                                        "推荐榜爬虫策略",
-                                        "{video_dict['video_id']}",
-                                        "{video_dict['video_title']}",
-                                        "{video_dict['cover_url']}",
-                                        "{video_dict['video_url']}",
-                                        {int(video_dict['duration'])},
-                                        "{video_dict['publish_time_str']}",
-                                        {int(video_dict['play_cnt'])},
-                                        '{json.dumps(rule_dict)}',
-                                        {int(video_dict['video_width'])},
-                                        {int(video_dict['video_height'])}) """
-        Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
-        MysqlHelper.update_values(log_type, crawler, insert_sql, env, action='')
-        Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
-
-
-if __name__ == "__main__":
-    # XiguaRecommend.get_signature("recommend", "xigua", "dev")
-    # XiguaRecommend.get_videolist("recommend", "xigua", "dev")
-    # print(XiguaRecommend.get_video_url("recommend", "xigua", "7218171653242094139"))
-    # print(XiguaRecommend.filter_words("recommend", "xigua"))
-    print(XiguaRecommend.xigua_config("recommend", "xigua", "title", "dev"))
-    pass

+ 0 - 959
xigua/xigua_search/xigua_search.py

@@ -1,959 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/2/17
-import base64
-import json
-import os
-import random
-import shutil
-import string
-import sys
-import time
-from hashlib import md5
-
-import requests
-import urllib3
-from urllib.parse import quote
-from requests.adapters import HTTPAdapter
-
-
-sys.path.append(os.getcwd())
-from common.db import MysqlHelper
-from common.getuser import getUser
-from common.common import Common
-from common.feishu import Feishu
-from common.publish import Publish
-from common.public import get_config_from_mysql
-from common.userAgent import get_random_user_agent, get_random_header
-
-
-class XiguaSearch:
-    platform = "西瓜视频"
-    tag = "西瓜视频爬虫,搜索爬虫策略"
-
-    @classmethod
-    def get_rule(cls, log_type, crawler):
-        try:
-            while True:
-                rule_sheet = Feishu.get_values_batch(log_type, crawler, "shxOl7")
-                if rule_sheet is None:
-                    Common.logger(log_type, crawler).warning("rule_sheet is None! 10秒后重新获取")
-                    time.sleep(10)
-                    continue
-                rule_dict = {
-                    "play_cnt": int(rule_sheet[1][2]),
-                    "min_duration": int(rule_sheet[2][2]),
-                    "max_duration": int(rule_sheet[3][2]),
-                    "publish_time": int(rule_sheet[4][2]),
-                }
-                return rule_dict
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"get_rule:{e}\n")
-
-    # 下载规则
-    @classmethod
-    def download_rule(cls, video_info_dict, rule_dict):
-        if video_info_dict['play_cnt'] >= rule_dict['play_cnt']:
-            if video_info_dict['comment_cnt'] >= rule_dict['comment_cnt']:
-                if video_info_dict['like_cnt'] >= rule_dict['like_cnt']:
-                    if video_info_dict['duration'] >= rule_dict['duration']:
-                        if video_info_dict['video_width'] >= rule_dict['video_width'] \
-                                or video_info_dict['video_height'] >= rule_dict['video_height']:
-                            return True
-                        else:
-                            return False
-                    else:
-                        return False
-                else:
-                    return False
-            else:
-                return False
-        else:
-            return False
-
-    # 过滤词库
-    @classmethod
-    def filter_words(cls, log_type, crawler):
-        try:
-            while True:
-                filter_words_sheet = Feishu.get_values_batch(log_type, crawler, 'KGB4Hc')
-                if filter_words_sheet is None:
-                    Common.logger(log_type, crawler).warning(f"filter_words_sheet:{filter_words_sheet} 10秒钟后重试")
-                    continue
-                filter_words_list = []
-                for x in filter_words_sheet:
-                    for y in x:
-                        if y is None:
-                            pass
-                        else:
-                            filter_words_list.append(y)
-                return filter_words_list
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
-
-    # 获取用户信息(字典格式). 注意:部分 user_id 字符类型是 int / str
-    @classmethod
-    def get_user_list(cls, log_type, crawler, sheetid, env, machine):
-        try:
-            while True:
-                user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
-                if user_sheet is None:
-                    Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet} 10秒钟后重试")
-                    continue
-                our_user_list = []
-                for i in range(1, len(user_sheet)):
-                    our_uid = user_sheet[i][6]
-                    search_word = user_sheet[i][4]
-                    tag1 = user_sheet[i][8]
-                    tag2 = user_sheet[i][9]
-                    tag3 = user_sheet[i][10]
-                    tag4 = user_sheet[i][11]
-                    tag5 = user_sheet[i][12]
-                    tag6 = user_sheet[i][13]
-                    tag7 = user_sheet[i][14]
-                    Common.logger(log_type, crawler).info(f"正在更新 {search_word} 关键词信息\n")
-                    if our_uid is None:
-                        default_user = getUser.get_default_user()
-                        # 用来创建our_id的信息
-                        user_dict = {
-                            'recommendStatus': -6,
-                            'appRecommendStatus': -6,
-                            'nickName': default_user['nickName'],
-                            'avatarUrl': default_user['avatarUrl'],
-                            'tagName': f'{tag1},{tag2},{tag3},{tag4},{tag5},{tag6},{tag7}',
-                        }
-                        Common.logger(log_type, crawler).info(f'新创建的站内UID:{our_uid}')
-                        our_uid = getUser.create_uid(log_type, crawler, user_dict, env)
-                        if env == 'prod':
-                            our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
-                        else:
-                            our_user_link = f'https://testadmin.piaoquantv.com/ums/user/{our_uid}/post'
-                        Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}',
-                                             [[our_uid, our_user_link]])
-                        Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功!\n')
-                    our_user_dict = {
-                        'out_uid': '',
-                        'search_word': search_word,
-                        'our_uid': our_uid,
-                        'our_user_link': f'https://admin.piaoquantv.com/ums/user/{our_uid}/post',
-                    }
-                    our_user_list.append(our_user_dict)
-
-                return our_user_list
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f'get_user_id_from_feishu异常:{e}\n')
-
-    @classmethod
-    def random_signature(cls):
-        src_digits = string.digits  # string_数字
-        src_uppercase = string.ascii_uppercase  # string_大写字母
-        src_lowercase = string.ascii_lowercase  # string_小写字母
-        digits_num = random.randint(1, 6)
-        uppercase_num = random.randint(1, 26 - digits_num - 1)
-        lowercase_num = 26 - (digits_num + uppercase_num)
-        password = random.sample(src_digits, digits_num) + random.sample(src_uppercase, uppercase_num) + random.sample(
-            src_lowercase, lowercase_num)
-        random.shuffle(password)
-        new_password = 'AAAAAAAAAA' + ''.join(password)[10:-4] + 'AAAB'
-        new_password_start = new_password[0:18]
-        new_password_end = new_password[-7:]
-        if new_password[18] == '8':
-            new_password = new_password_start + 'w' + new_password_end
-        elif new_password[18] == '9':
-            new_password = new_password_start + 'x' + new_password_end
-        elif new_password[18] == '-':
-            new_password = new_password_start + 'y' + new_password_end
-        elif new_password[18] == '.':
-            new_password = new_password_start + 'z' + new_password_end
-        else:
-            new_password = new_password_start + 'y' + new_password_end
-        return new_password
-
-    # 获取视频详情
-    @classmethod
-    def get_video_url(cls, log_type, crawler, gid):
-        try:
-            url = 'https://www.ixigua.com/api/mixVideo/information?'
-            headers = {
-                "accept-encoding": "gzip, deflate",
-                "accept-language": "zh-CN,zh-Hans;q=0.9",
-                "user-agent": get_random_user_agent('pc'),
-                "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
-            }
-            params = {
-                'mixId': gid,
-                'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC'
-                           'NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
-                'X-Bogus': 'DFSzswVupYTANCJOSBk0P53WxM-r',
-                '_signature': '_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px'
-                              'fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94',
-            }
-            cookies = {
-                'ixigua-a-s': '1',
-                'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB'
-                           'NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
-                'ttwid': '1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7'
-                         '6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8',
-                'tt_scid': 'QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3',
-                'MONITOR_WEB_ID': '0a49204a-7af5-4e96-95f0-f4bafb7450ad',
-                '__ac_nonce': '06304878000964fdad287',
-                '__ac_signature': '_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb'
-                                  'FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8',
-                'ttcid': 'e56fabf6e85d4adf9e4d91902496a0e882',
-                '_tea_utm_cache_1300': 'undefined',
-                'support_avif': 'false',
-                'support_webp': 'false',
-                'xiguavideopcwebid': '7134967546256016900',
-                'xiguavideopcwebid.sig': 'xxRww5R1VEMJN_dQepHorEu_eAc',
-            }
-            urllib3.disable_warnings()
-            s = requests.session()
-            # max_retries=3 重试3次
-            s.mount('http://', HTTPAdapter(max_retries=3))
-            s.mount('https://', HTTPAdapter(max_retries=3))
-            response = s.get(url=url, headers=headers, params=params, cookies=cookies, verify=False,
-                             proxies=Common.tunnel_proxies(), timeout=5)
-            # response = s.get(url=url, headers=headers, params=params, cookies=cookies, verify=False)
-            response.close()
-            if 'data' not in response.json() or response.json()['data'] == '':
-                Common.logger(log_type, crawler).warning('get_video_info: response: {}', response)
-            else:
-                video_info = response.json()['data']['gidInformation']['packerData']['video']
-                video_url_dict = {}
-                # video_url
-                if 'videoResource' not in video_info:
-                    video_url_dict["video_url"] = ''
-                    video_url_dict["audio_url"] = ''
-                    video_url_dict["video_width"] = 0
-                    video_url_dict["video_height"] = 0
-
-                elif 'dash_120fps' in video_info['videoResource']:
-                    if "video_list" in video_info['videoResource']['dash_120fps'] and 'video_4' in \
-                            video_info['videoResource']['dash_120fps']['video_list']:
-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_4']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_4']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vwidth']
-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_3' in \
-                            video_info['videoResource']['dash_120fps']['video_list']:
-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_3']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_3']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vwidth']
-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_2' in \
-                            video_info['videoResource']['dash_120fps']['video_list']:
-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_2']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_2']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vwidth']
-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_1' in \
-                            video_info['videoResource']['dash_120fps']['video_list']:
-                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_1']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_1']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vwidth']
-                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-
-                    elif 'dynamic_video' in video_info['videoResource']['dash_120fps'] \
-                            and 'dynamic_video_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
-                            and 'dynamic_audio_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
-                            and len(
-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list']) != 0 \
-                            and len(
-                        video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list']) != 0:
-
-                        video_url = \
-                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
-                                'backup_url_1']
-                        audio_url = \
-                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list'][-1][
-                                'backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = \
-                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
-                                'vwidth']
-                        video_height = \
-                            video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
-                                'vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    else:
-                        video_url_dict["video_url"] = ''
-                        video_url_dict["audio_url"] = ''
-                        video_url_dict["video_width"] = 0
-                        video_url_dict["video_height"] = 0
-
-                elif 'dash' in video_info['videoResource']:
-                    if "video_list" in video_info['videoResource']['dash'] and 'video_4' in \
-                            video_info['videoResource']['dash']['video_list']:
-                        video_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash']['video_list']['video_4']['vwidth']
-                        video_height = video_info['videoResource']['dash']['video_list']['video_4']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['dash'] and 'video_3' in \
-                            video_info['videoResource']['dash']['video_list']:
-                        video_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash']['video_list']['video_3']['vwidth']
-                        video_height = video_info['videoResource']['dash']['video_list']['video_3']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['dash'] and 'video_2' in \
-                            video_info['videoResource']['dash']['video_list']:
-                        video_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash']['video_list']['video_2']['vwidth']
-                        video_height = video_info['videoResource']['dash']['video_list']['video_2']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['dash'] and 'video_1' in \
-                            video_info['videoResource']['dash']['video_list']:
-                        video_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
-                        audio_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash']['video_list']['video_1']['vwidth']
-                        video_height = video_info['videoResource']['dash']['video_list']['video_1']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-
-                    elif 'dynamic_video' in video_info['videoResource']['dash'] \
-                            and 'dynamic_video_list' in video_info['videoResource']['dash']['dynamic_video'] \
-                            and 'dynamic_audio_list' in video_info['videoResource']['dash']['dynamic_video'] \
-                            and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list']) != 0 \
-                            and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list']) != 0:
-
-                        video_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
-                            'backup_url_1']
-                        audio_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list'][-1][
-                            'backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
-                            'vwidth']
-                        video_height = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
-                            'vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    else:
-                        video_url_dict["video_url"] = ''
-                        video_url_dict["audio_url"] = ''
-                        video_url_dict["video_width"] = 0
-                        video_url_dict["video_height"] = 0
-
-                elif 'normal' in video_info['videoResource']:
-                    if "video_list" in video_info['videoResource']['normal'] and 'video_4' in \
-                            video_info['videoResource']['normal']['video_list']:
-                        video_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
-                        audio_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['normal']['video_list']['video_4']['vwidth']
-                        video_height = video_info['videoResource']['normal']['video_list']['video_4']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['normal'] and 'video_3' in \
-                            video_info['videoResource']['normal']['video_list']:
-                        video_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
-                        audio_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['normal']['video_list']['video_3']['vwidth']
-                        video_height = video_info['videoResource']['normal']['video_list']['video_3']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['normal'] and 'video_2' in \
-                            video_info['videoResource']['normal']['video_list']:
-                        video_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
-                        audio_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['normal']['video_list']['video_2']['vwidth']
-                        video_height = video_info['videoResource']['normal']['video_list']['video_2']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    elif "video_list" in video_info['videoResource']['normal'] and 'video_1' in \
-                            video_info['videoResource']['normal']['video_list']:
-                        video_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
-                        audio_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['normal']['video_list']['video_1']['vwidth']
-                        video_height = video_info['videoResource']['normal']['video_list']['video_1']['vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-
-                    elif 'dynamic_video' in video_info['videoResource']['normal'] \
-                            and 'dynamic_video_list' in video_info['videoResource']['normal']['dynamic_video'] \
-                            and 'dynamic_audio_list' in video_info['videoResource']['normal']['dynamic_video'] \
-                            and len(video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list']) != 0 \
-                            and len(video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list']) != 0:
-
-                        video_url = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
-                            'backup_url_1']
-                        audio_url = video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list'][-1][
-                            'backup_url_1']
-                        if len(video_url) % 3 == 1:
-                            video_url += '=='
-                        elif len(video_url) % 3 == 2:
-                            video_url += '='
-                        elif len(audio_url) % 3 == 1:
-                            audio_url += '=='
-                        elif len(audio_url) % 3 == 2:
-                            audio_url += '='
-                        video_url = base64.b64decode(video_url).decode('utf8')
-                        audio_url = base64.b64decode(audio_url).decode('utf8')
-                        video_width = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
-                            'vwidth']
-                        video_height = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
-                            'vheight']
-                        video_url_dict["video_url"] = video_url
-                        video_url_dict["audio_url"] = audio_url
-                        video_url_dict["video_width"] = video_width
-                        video_url_dict["video_height"] = video_height
-                    else:
-                        video_url_dict["video_url"] = ''
-                        video_url_dict["audio_url"] = ''
-                        video_url_dict["video_width"] = 0
-                        video_url_dict["video_height"] = 0
-
-                else:
-                    video_url_dict["video_url"] = ''
-                    video_url_dict["audio_url"] = ''
-                    video_url_dict["video_width"] = 0
-                    video_url_dict["video_height"] = 0
-
-                return video_url_dict
-
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f'get_video_url:{e}\n')
-
-    @classmethod
-    def get_video_info(cls, log_type, crawler, item_id):
-        d_url = "http://a6.pstatp.com/article/full/11/1/{video_id}/{video_id}/1/0/?iid=3636030325&device_id=5787057242" \
-                "&ac=wifi&channel=wandoujia&aid=13&app_name=news_article&version_code=532&version_name=5.3.2&device_platform" \
-                "=android&ab_client=a1%2Cc2%2Ce1%2Cf2%2Cg2%2Cb3%2Cf4&abflag=3&ssmix=a&device_type=SM705" \
-                "&device_brand=smartisan&os_api=19&os_version=4.4.2&uuid=864593021012562&openudid=e23a5ff037ef2d1a" \
-                "&manifest_version_code=532&resolution=1080*1920&dpi=480&update_version_code=5320".format(
-            video_id=item_id)
-        res = requests.get(url=d_url, headers=get_random_header('pc'), proxies=Common.tunnel_proxies())
-        data = json.loads(res.text)['data']
-        item_counter = data['h5_extra']['itemCell']['itemCounter']
-        user_info = data['user_info']
-        detail_info = data['video_detail_info']
-        video_dict = {'video_title': data['title'].replace('"', '').replace("'", ''),
-                      'video_id': detail_info['video_id'],
-                      'gid': data['group_id'],
-                      'play_cnt': item_counter['videoWatchCount'],
-                      'comment_cnt': item_counter['commentCount'],
-                      'like_cnt': item_counter['diggCount'],
-                      'share_cnt': item_counter['shareCount'],
-
-                      'duration': data['video_duration'],
-                      'publish_time_stamp': data['publish_time'],
-                      'publish_time_str': time.strftime("%Y-%m-%d %H:%M:%S",
-                                                        time.localtime(data['publish_time'])),
-                      'user_name': user_info['name'],
-                      'user_id': user_info['user_id'],
-                      'avatar_url': user_info['avatar_url'],
-                      'cover_url': data['large_image']['url'].replace('\u0026', '&'),
-
-                      }
-        return video_dict
-
-    @classmethod
-    def is_ruled(cls, log_type, crawler, video_dict, rule_dict):
-        old_time = int(time.time()) - (3600 * 24 * rule_dict['publish_time'])
-        if video_dict['publish_time_stamp'] <= old_time:
-            return False
-        elif video_dict['play_cnt'] <= rule_dict['play_cnt']:
-            return False
-        elif video_dict['duration'] < rule_dict['min_duration'] or video_dict['duration'] > rule_dict['max_duration']:
-            return False
-        else:
-            return True
-
-    @classmethod
-    def get_videolist(cls, log_type, crawler, strategy, our_uid, search_word, oss_endpoint, env, machine):
-        total_count = 1
-        offset = 0
-        while True:
-
-            signature = cls.random_signature()
-            # url = "https://www.ixigua.com/api/searchv2/complex/{}/{}?order_type=publish_time&click_position=new".format(
-            #     quote(search_word), offset, signature)
-            url = f'https://www.ixigua.com/api/searchv2/complex/{quote(search_word)}/{offset}?' \
-                  f'search_id=202305111126371489381ECEC7FE277E3F&' \
-                  f'aid=1768&' \
-                  f'msToken=lPfIf3aps6EktQAeOl9yRgnL44MtMeGt2WnHjahIR0IysASB_zdhGiY0J9WWxNDpLd7aVdQx_36MpyPI5f2zRUHFYyNNsX5cl-or6GkiVuLLiRsU3ylxj9vt7Upubw==&' \
-                  f'X-Bogus=DFSzswVY4h0ANGD7tC7G/Mm4pIkV&' \
-                  f'_signature={signature}'
-
-            headers = {
-                'referer': 'https://www.ixigua.com/search/{}/?logTag=594535e3690f17a88cdb&tab_name=search'.format(
-                    quote(search_word)),
-                'cookie': 'ttcid=5d8f917a525e46759dc886296bf1111b69; MONITOR_WEB_ID=ad1c8360-d4c9-4fa2-a801-d9fd68dfc1b2; s_v_web_id=verify_lh8vaa6v_VI4RQ0ET_nVbq_4PXw_8mfN_7Xp6wdLOZi08; passport_csrf_token=0e7c6992cb6170c9db034c3696191fff; passport_csrf_token_default=0e7c6992cb6170c9db034c3696191fff; support_webp=true; support_avif=true; csrf_session_id=a5355d954d3c63ed1ba35faada452b4d; odin_tt=3072e827705bd5aa707fb8d432524d7f8fad972b02b31a2d3458a3e5209d5492; sid_guard=46a52ce83dacb0b871dae675476a3e42%7C1683773717%7C21600%7CThu%2C+11-May-2023+08%3A55%3A17+GMT; uid_tt=4126f296856e6042f195253e9a01c4cb; uid_tt_ss=4126f296856e6042f195253e9a01c4cb; sid_tt=46a52ce83dacb0b871dae675476a3e42; sessionid=46a52ce83dacb0b871dae675476a3e42; sessionid_ss=46a52ce83dacb0b871dae675476a3e42; sid_ucp_v1=1.0.0-KDMyMzg5NWI3YzAxMGFkN2Y4MjZiMzE5Njc0MGFmMWQ5NGExY2MyYzgKCBCVsvGiBhgNGgJobCIgNDZhNTJjZTgzZGFjYjBiODcxZGFlNjc1NDc2YTNlNDI; ssid_ucp_v1=1.0.0-KDMyMzg5NWI3YzAxMGFkN2Y4MjZiMzE5Njc0MGFmMWQ5NGExY2MyYzgKCBCVsvGiBhgNGgJobCIgNDZhNTJjZTgzZGFjYjBiODcxZGFlNjc1NDc2YTNlNDI; ixigua-a-s=1; tt_scid=sblZQP6nSw2f6A.XS-yHFqB.R3o9UFsRTUCKAoWlHWzNrOf8R01qeIBbu6TDeXtMa3fb; ttwid=1%7C4zaTJmlaHpEa8rAB-KjREdxT3sNBUJWrAzRJnNvqExQ%7C1683775619%7Cf4fc6fa51baf2e302242da412ead6500c3d3f5bfb0be6253cbae00301d5773ae; msToken=lPfIf3aps6EktQAeOl9yRgnL44MtMeGt2WnHjahIR0IysASB_zdhGiY0J9WWxNDpLd7aVdQx_36MpyPI5f2zRUHFYyNNsX5cl-or6GkiVuLLiRsU3ylxj9vt7Upubw==',
-                'user-agent': get_random_user_agent('pc'),
-            }
-            try:
-                proxies = Common.tunnel_proxies()
-                s = requests.session()
-                # max_retries=3 重试3次
-                s.mount('http://', HTTPAdapter(max_retries=3))
-                s.mount('https://', HTTPAdapter(max_retries=3))
-                res = s.request("GET", url, headers=headers, proxies=proxies, timeout=5)
-                # Common.logger(log_type, crawler).info(f"proxies:{proxies}\n")
-                Common.logger(log_type, crawler).info(f"get_videolist:{res.json()}\n")
-                search_list = res.json()['data']['data']
-            except Exception as e:
-                Common.logger(log_type, crawler).warning(f"get_videolist:{e}\n")
-                continue
-            if not search_list:
-                Common.logger(log_type, crawler).error(f'关键词:{search_word},没有获取到视频列表:offset{offset}')
-                return
-            for video_info in search_list:
-                v_type = video_info['type']
-                rule_dict = cls.get_rule(log_type, crawler)
-                publish_time = video_info['data']['publish_time']
-                old_time = int(time.time()) - (3600 * 24 * rule_dict['publish_time'])
-                if publish_time <= old_time:
-                    Common.logger(log_type, crawler).error(f'关键词:{search_word},抓取完毕,退出抓取\n')
-                    return
-
-                if v_type == 'video':
-                    item_id = video_info['data']['group_id']
-                    if video_info['data']['publish_time'] <= old_time:
-                        Common.logger(log_type, crawler).error(f'关键词:{search_word},视频:{item_id},不符合抓取规则\n')
-                        continue
-                    elif video_info['data']['video_watch_count'] <= rule_dict['play_cnt']:
-                        Common.logger(log_type, crawler).error(f'关键词:{search_word},视频:{item_id},不符合抓取规则\n')
-                        continue
-                    elif video_info['data']['video_time'] < rule_dict['min_duration'] or video_info['data'][
-                        'video_time'] > rule_dict['max_duration']:
-                        Common.logger(log_type, crawler).error(f'关键词:{search_word},视频:{item_id},不符合抓取规则\n')
-                        continue
-
-
-                    try:
-                        video_dict = cls.get_video_info(log_type, crawler, item_id)
-                        filter_words = get_config_from_mysql(log_type, crawler, env, text='filter')
-                        is_filter = False
-                        for filter_word in filter_words:
-                            if filter_word in video_dict['video_title']:
-                                is_filter = True
-                                break
-                        if is_filter:
-                            Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
-                            continue
-                        video_url_dict = cls.get_video_url(log_type, crawler, video_dict['gid'])
-                        video_dict['video_width'] = video_url_dict["video_width"]
-                        video_dict['video_height'] = video_url_dict["video_height"]
-                        video_dict['audio_url'] = video_url_dict["audio_url"]
-                        video_dict['video_url'] = video_url_dict["video_url"]
-                        video_dict['session'] = signature
-                    except Exception as e:
-                        Common.logger(log_type, crawler).error(
-                            f'关键词:{search_word},视频:{item_id},获取详情失败,原因:{e}')
-                        continue
-
-                    if cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
-                        Common.logger(log_type, crawler).info(
-                            f'关键词:{search_word},gid:{video_dict["gid"]},视频已下载,无需重复下载\n')
-                        continue
-                    for k, v in video_dict.items():
-                        Common.logger(log_type, crawler).info(f"{k}:{v}")
-
-                    try:
-                        # print(
-                        #     f'search_word:{search_word},title:{video_dict["video_title"]},gid:{video_dict["gid"]},offset:{offset}, total:{total_count}')
-                        cls.download_publish(
-                            search_word=search_word,
-                            log_type=log_type,
-                            crawler=crawler,
-                            video_dict=video_dict,
-                            rule_dict=rule_dict,
-                            strategy=strategy,
-                            our_uid=our_uid,
-                            oss_endpoint=oss_endpoint,
-                            env=env,
-                            machine=machine
-                        )
-
-                    except Exception as e:
-                        Common.logger(log_type, crawler).error(f'关键词:{search_word},视频:{item_id},下载失败,原因:{e}')
-                        continue
-                    total_count += 1
-                    Common.logger(log_type, crawler).info(
-                        f'search_word:{search_word},title:{video_dict["video_title"]},gid:{video_dict["gid"]},offset:{offset}, total:{total_count}')
-                    if total_count >= 30:
-                        return
-                # elif v_type == 'pseries':
-                #     try:
-                #         item_id = video_info['data']['group_id']
-                #         p_url = "https://www.ixigua.com/api/videov2/pseries_more_v2?pSeriesId={}&rank=0&tailCount=30&aid=1768&msToken=wHEafKFLx0k3hihOPbhXYNsfMBxWiq2AB0K5R-34kEFixyq3ATi_DuXbL4Q47J9C2uK2zgWItMa1g2yc4FyDxM4dMijmSdwF4c4T8sSmOkoOI0wGzeEcPw==&X-Bogus=DFSzswVOzdUANG3ItaVHYr7TlqCv&_signature=_02B4Z6wo00001vB6l3QAAIDBZKzMeTihTmbwepPAANgh1Ai3JgFFo4e6anoezmBEpHfEMEYlWISGhXI-QKfev4N-2bwgXsHOuNGLnOsGqMbANIjFPh7Yj6OakQWrkbACenlv0P-arswtB6Zn45".format(
-                #             item_id)
-                #         p_headers = {
-                #             'referer': 'https://www.ixigua.com/{}?series_flow=1&logTag=cfec9d927da968feff89'.format(
-                #                 item_id),
-                #             'user-agent': get_random_user_agent('pc'),
-                #         }
-                #         p_res = requests.request("GET", p_url, headers=p_headers,
-                #                                  proxies=Common.tunnel_proxies()).json()
-                #     except Exception as e:
-                #         Common.logger(log_type, crawler).error(f'合集:{item_id},没有获取到合集详情,原因:{e}')
-                #         continue
-                #     for video in p_res['data']:
-                #         item_id = video['item_id']
-                #         try:
-                #             video_dict = cls.get_video_info(log_type, crawler, item_id)
-                #             video_url_dict = cls.get_video_url(log_type, crawler, video_dict['gid'])
-                #             video_dict['video_width'] = video_url_dict["video_width"]
-                #             video_dict['video_height'] = video_url_dict["video_height"]
-                #             video_dict['audio_url'] = video_url_dict["audio_url"]
-                #             video_dict['video_url'] = video_url_dict["video_url"]
-                #             video_dict['session'] = signature
-                #         except Exception as e:
-                #             Common.logger(log_type, crawler).error(f'视频:{item_id},没有获取到视频详情,原因:{e}')
-                #             continue
-                #         if cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
-                #             Common.logger(log_type, crawler).info(
-                #                 f'gid:{video_dict["gid"]},视频已下载,无需重复下载\n')
-                #             continue
-                #         if not cls.is_ruled(log_type, crawler, video_dict, rule_dict):
-                #             Common.logger(log_type, crawler).error(f'视频:{item_id},不符合抓取规则\n')
-                #             continue
-                #         for k, v in video_dict.items():
-                #             Common.logger(log_type, crawler).info(f"{k}:{v}")
-                #         try:
-                #             # print(
-                #             #     f'search_word:{search_word},title:{video_dict["video_title"]},gid:{video_dict["gid"]},offset:{offset}, total:{total_count}')
-                #             cls.download_publish(
-                #                 search_word=search_word,
-                #                 log_type=log_type,
-                #                 crawler=crawler,
-                #                 video_dict=video_dict,
-                #                 rule_dict=rule_dict,
-                #                 strategy=strategy,
-                #                 our_uid=our_uid,
-                #                 oss_endpoint=oss_endpoint,
-                #                 env=env,
-                #                 machine=machine
-                #             )
-                #             total_count += 1
-                #             if total_count >= 30:
-                #                 return
-                #             else:
-                #                 break
-                #         except Exception as e:
-                #             Common.logger(log_type, crawler).error(f'视频:{item_id},download_publish异常:{e}\n')
-
-            offset += 10
-
-    @classmethod
-    def repeat_video(cls, log_type, crawler, video_id, env, machine):
-        sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{video_id}"; """
-        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env, machine)
-        return len(repeat_video)
-
-    # 下载 / 上传
-    @classmethod
-    def download_publish(cls, log_type, crawler, search_word, strategy, video_dict, rule_dict, our_uid, oss_endpoint,
-                         env, machine):
-
-        Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video',
-                               title=video_dict['video_title'], url=video_dict['video_url'])
-        # 下载音频
-        Common.download_method(log_type=log_type, crawler=crawler, text='xigua_audio',
-                               title=video_dict['video_title'], url=video_dict['audio_url'])
-        # 合成音视频
-        Common.video_compose(log_type=log_type, crawler=crawler,
-                             video_dir=f"./{crawler}/videos/{video_dict['video_title']}")
-        md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
-        if os.path.getsize(f"./{crawler}/videos/{md_title}/video.mp4") == 0:
-            # 删除视频文件夹
-            shutil.rmtree(f"./{crawler}/videos/{md_title}")
-            Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
-            return
-        # ffmpeg_dict = Common.ffmpeg(log_type, crawler,
-        #                             f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
-        # if ffmpeg_dict is None or ffmpeg_dict['size'] == 0:
-        #     Common.logger(log_type, crawler).warning(f"下载的视频无效,已删除\n")
-        #     # 删除视频文件夹
-        #     shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
-        #     return
-        # 下载封面
-        Common.download_method(log_type=log_type, crawler=crawler, text='cover',
-                               title=video_dict['video_title'], url=video_dict['cover_url'])
-        # 保存视频信息至txt
-        Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
-
-        # 上传视频
-        Common.logger(log_type, crawler).info("开始上传视频...")
-        our_video_id = Publish.upload_and_publish(log_type=log_type,
-                                                  crawler=crawler,
-                                                  strategy=strategy,
-                                                  our_uid=our_uid,
-                                                  env=env,
-                                                  oss_endpoint=oss_endpoint)
-        if env == 'dev':
-            our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-        else:
-            our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-        Common.logger(log_type, crawler).info("视频上传完成")
-
-        if our_video_id is None:
-            # 删除视频文件夹
-            shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
-            return
-
-        # 视频写入飞书
-        Feishu.insert_columns(log_type, 'xigua', "BUNvGC", "ROWS", 1, 2)
-        upload_time = int(time.time())
-        values = [[
-            search_word,
-            time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
-            "关键词搜索",
-            video_dict['video_title'],
-            str(video_dict['video_id']),
-            our_video_link,
-            video_dict['gid'],
-            video_dict['play_cnt'],
-            video_dict['comment_cnt'],
-            video_dict['like_cnt'],
-            video_dict['share_cnt'],
-            video_dict['duration'],
-            str(video_dict['video_width']) + '*' + str(video_dict['video_height']),
-            video_dict['publish_time_str'],
-            video_dict['user_name'],
-            video_dict['user_id'],
-            video_dict['avatar_url'],
-            video_dict['cover_url'],
-            video_dict['video_url'],
-            video_dict['audio_url']]]
-        time.sleep(1)
-        Feishu.update_values(log_type, 'xigua', "BUNvGC", "E2:Z2", values)
-        Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
-
-        # 视频信息保存数据库
-        insert_sql = f""" insert into crawler_video(video_id,
-                                user_id,
-                                out_user_id,
-                                platform,
-                                strategy,
-                                out_video_id,
-                                video_title,
-                                cover_url,
-                                video_url,
-                                duration,
-                                publish_time,
-                                play_cnt,
-                                crawler_rule,
-                                width,
-                                height)
-                                values({our_video_id},
-                                {our_uid},
-                                "{video_dict['user_id']}",
-                                "{cls.platform}",
-                                "搜索爬虫策略",
-                                "{video_dict['video_id']}",
-                                "{video_dict['video_title']}",
-                                "{video_dict['cover_url']}",
-                                "{video_dict['video_url']}",
-                                {int(video_dict['duration'])},
-                                "{video_dict['publish_time_str']}",
-                                {int(video_dict['play_cnt'])},
-                                '{json.dumps(rule_dict)}',
-                                {int(video_dict['video_width'])},
-                                {int(video_dict['video_height'])}) """
-        Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
-        MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine)
-        Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
-
-    @classmethod
-    def get_search_videos(cls, log_type, crawler, strategy, oss_endpoint, env, machine):
-        user_list = cls.get_user_list(log_type=log_type, crawler=crawler, sheetid="SSPNPW", env=env,
-                                      machine=machine)
-        for user in user_list:
-            try:
-                search_word = user["search_word"]
-                our_uid = user["our_uid"]
-                Common.logger(log_type, crawler).info(f"开始抓取 {search_word} 用户主页视频\n")
-                cls.get_videolist(log_type=log_type,
-                                  crawler=crawler,
-                                  strategy=strategy,
-                                  our_uid=our_uid,
-                                  search_word=search_word,
-                                  oss_endpoint=oss_endpoint,
-                                  env=env,
-                                  machine=machine)
-            except Exception as e:
-                Common.logger(log_type, crawler).error(f"get_search_videos:{e}\n")
-
-
-if __name__ == '__main__':
-    XiguaSearch.get_search_videos('search', 'xigua', 'xigua_search', 'out', 'dev', 'aliyun')
-

+ 0 - 904
xigua/xigua_search/xigua_search_new.py

@@ -1,904 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/2/17
-import base64
-import json
-import os
-import random
-import shutil
-import string
-import sys
-import time
-from hashlib import md5
-import requests
-import urllib3
-from requests.adapters import HTTPAdapter
-from selenium.webdriver import DesiredCapabilities
-from selenium.webdriver.chrome.service import Service
-from selenium import webdriver
-from selenium.webdriver.common.by import By
-sys.path.append(os.getcwd())
-from common.scheduling_db import MysqlHelper
-from common.getuser import getUser
-from common.common import Common
-from common.feishu import Feishu
-from common.publish import Publish
-from common.public import get_config_from_mysql
-from common.userAgent import get_random_user_agent
-
-
-class XiguaSearchNew:
-    # 抓取视频数
-    i = 0
-    # 已下载视频数
-    videos_cnt = 0
-    platform = "西瓜视频"
-    tag = "西瓜视频爬虫,搜索爬虫策略"
-
-    @classmethod
-    def get_rule_dict(cls, log_type, crawler):
-        while True:
-            rule_sheet = Feishu.get_values_batch(log_type, crawler, "shxOl7")
-            if rule_sheet is None:
-                Common.logger(log_type, crawler).info(f"get_rule:{rule_sheet},2秒钟后重试")
-                time.sleep(2)
-                continue
-            rule_dict = {
-                "play_cnt": int(rule_sheet[1][2]),
-                "duration_min": int(rule_sheet[2][2]),
-                "duration_max": int(rule_sheet[3][2]),
-                "publish_time": int(rule_sheet[4][2]),
-                "like_cnt": int(rule_sheet[5][2]),
-                "comment_cnt": int(rule_sheet[6][2])
-            }
-            return rule_dict
-
-    # 下载规则
-    @classmethod
-    def download_rule(cls, log_type, crawler, video_dict, rule_dict):
-        Common.logger(log_type, crawler).info(f'play_cnt: {video_dict["play_cnt"]} >= {rule_dict["play_cnt"]}')
-        Common.logger(log_type, crawler).info(f'duration: {rule_dict["duration_max"]} >= {video_dict["duration"]} >= {rule_dict["duration_min"]}')
-        Common.logger(log_type, crawler).info(f'publish_time: {int(time.time())} - {video_dict["publish_time_stamp"]} = {int(time.time())-video_dict["publish_time_stamp"]} <= {rule_dict["publish_time"] * 3600 * 24}')
-        Common.logger(log_type, crawler).info(f'like_cnt: {video_dict["like_cnt"]} >= {rule_dict["like_cnt"]}')
-        Common.logger(log_type, crawler).info(f'comment_cnt: {video_dict["comment_cnt"]} >= {rule_dict["comment_cnt"]}')
-        if video_dict["play_cnt"] >= rule_dict["play_cnt"] \
-            and rule_dict["duration_max"] >= video_dict["duration"] >= rule_dict["duration_min"] \
-            and int(time.time()) - video_dict["publish_time_stamp"] <= rule_dict["publish_time"]*3600*24 \
-            and video_dict["like_cnt"] >= rule_dict["like_cnt"] \
-            and video_dict["comment_cnt"] >= rule_dict["comment_cnt"]:
-            return True
-        else:
-            return False
-
-    # 过滤词库
-    @classmethod
-    def filter_words(cls, log_type, crawler, env):
-        filter_words_list = get_config_from_mysql(log_type, crawler, env, "filter")
-        return filter_words_list
-
-    # 获取用户信息(字典格式). 注意:部分 user_id 字符类型是 int / str
-    @classmethod
-    def get_user_list(cls, log_type, crawler, sheetid, env):
-        try:
-            while True:
-                user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
-                if user_sheet is None:
-                    Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet} 10秒钟后重试")
-                    continue
-                our_user_list = []
-                for i in range(1, len(user_sheet)):
-                    our_uid = user_sheet[i][6]
-                    search_word = user_sheet[i][4]
-                    tag1 = user_sheet[i][8]
-                    tag2 = user_sheet[i][9]
-                    tag3 = user_sheet[i][10]
-                    tag4 = user_sheet[i][11]
-                    tag5 = user_sheet[i][12]
-                    tag6 = user_sheet[i][13]
-                    tag7 = user_sheet[i][14]
-                    Common.logger(log_type, crawler).info(f"正在更新 {search_word} 关键词信息\n")
-                    if our_uid is None:
-                        default_user = getUser.get_default_user()
-                        # 用来创建our_id的信息
-                        user_dict = {
-                            'recommendStatus': -6,
-                            'appRecommendStatus': -6,
-                            'nickName': default_user['nickName'],
-                            'avatarUrl': default_user['avatarUrl'],
-                            'tagName': f'{tag1},{tag2},{tag3},{tag4},{tag5},{tag6},{tag7}',
-                        }
-                        Common.logger(log_type, crawler).info(f'新创建的站内UID:{our_uid}')
-                        our_uid = getUser.create_uid(log_type, crawler, user_dict, env)
-                        if env == 'prod':
-                            our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
-                        else:
-                            our_user_link = f'https://testadmin.piaoquantv.com/ums/user/{our_uid}/post'
-                        Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}',
-                                             [[our_uid, our_user_link]])
-                        Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功!\n')
-                    our_user_dict = {
-                        'out_uid': '',
-                        'search_word': search_word,
-                        'our_uid': our_uid,
-                        'our_user_link': f'https://admin.piaoquantv.com/ums/user/{our_uid}/post',
-                    }
-                    our_user_list.append(our_user_dict)
-
-                return our_user_list
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f'get_user_id_from_feishu异常:{e}\n')
-
-    @classmethod
-    def videos_cnt_rule(cls, log_type, crawler):
-        while True:
-            videos_cnt_sheet = Feishu.get_values_batch(log_type, crawler, "shxOl7")
-            if videos_cnt_sheet is None:
-                time.sleep(2)
-                continue
-            return int(videos_cnt_sheet[7][2])
-
-    @classmethod
-    def random_signature(cls):
-        src_digits = string.digits  # string_数字
-        src_uppercase = string.ascii_uppercase  # string_大写字母
-        src_lowercase = string.ascii_lowercase  # string_小写字母
-        digits_num = random.randint(1, 6)
-        uppercase_num = random.randint(1, 26 - digits_num - 1)
-        lowercase_num = 26 - (digits_num + uppercase_num)
-        password = random.sample(src_digits, digits_num) + random.sample(src_uppercase, uppercase_num) + random.sample(
-            src_lowercase, lowercase_num)
-        random.shuffle(password)
-        new_password = 'AAAAAAAAAA' + ''.join(password)[10:-4] + 'AAAB'
-        new_password_start = new_password[0:18]
-        new_password_end = new_password[-7:]
-        if new_password[18] == '8':
-            new_password = new_password_start + 'w' + new_password_end
-        elif new_password[18] == '9':
-            new_password = new_password_start + 'x' + new_password_end
-        elif new_password[18] == '-':
-            new_password = new_password_start + 'y' + new_password_end
-        elif new_password[18] == '.':
-            new_password = new_password_start + 'z' + new_password_end
-        else:
-            new_password = new_password_start + 'y' + new_password_end
-        return new_password
-
-    @classmethod
-    def get_video_url(cls, video_info):
-        video_url_dict = {}
-        # video_url
-        if 'videoResource' not in video_info:
-            video_url_dict["video_url"] = ''
-            video_url_dict["audio_url"] = ''
-            video_url_dict["video_width"] = 0
-            video_url_dict["video_height"] = 0
-
-        elif 'dash_120fps' in video_info['videoResource']:
-            if "video_list" in video_info['videoResource']['dash_120fps'] and 'video_4' in \
-                    video_info['videoResource']['dash_120fps']['video_list']:
-                video_url = video_info['videoResource']['dash_120fps']['video_list']['video_4']['backup_url_1']
-                audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_4']['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vwidth']
-                video_height = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-            elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_3' in \
-                    video_info['videoResource']['dash_120fps']['video_list']:
-                video_url = video_info['videoResource']['dash_120fps']['video_list']['video_3']['backup_url_1']
-                audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_3']['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vwidth']
-                video_height = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-            elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_2' in \
-                    video_info['videoResource']['dash_120fps']['video_list']:
-                video_url = video_info['videoResource']['dash_120fps']['video_list']['video_2']['backup_url_1']
-                audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_2']['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vwidth']
-                video_height = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-            elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_1' in \
-                    video_info['videoResource']['dash_120fps']['video_list']:
-                video_url = video_info['videoResource']['dash_120fps']['video_list']['video_1']['backup_url_1']
-                audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_1']['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vwidth']
-                video_height = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-
-            elif 'dynamic_video' in video_info['videoResource']['dash_120fps'] \
-                    and 'dynamic_video_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
-                    and 'dynamic_audio_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
-                    and len(
-                video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list']) != 0 \
-                    and len(
-                video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list']) != 0:
-
-                video_url = \
-                    video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
-                        'backup_url_1']
-                audio_url = \
-                    video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list'][-1][
-                        'backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = \
-                    video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
-                        'vwidth']
-                video_height = \
-                    video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
-                        'vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-            else:
-                video_url_dict["video_url"] = ''
-                video_url_dict["audio_url"] = ''
-                video_url_dict["video_width"] = 0
-                video_url_dict["video_height"] = 0
-
-        elif 'dash' in video_info['videoResource']:
-            if "video_list" in video_info['videoResource']['dash'] and 'video_4' in \
-                    video_info['videoResource']['dash']['video_list']:
-                video_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
-                audio_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['dash']['video_list']['video_4']['vwidth']
-                video_height = video_info['videoResource']['dash']['video_list']['video_4']['vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-            elif "video_list" in video_info['videoResource']['dash'] and 'video_3' in \
-                    video_info['videoResource']['dash']['video_list']:
-                video_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
-                audio_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['dash']['video_list']['video_3']['vwidth']
-                video_height = video_info['videoResource']['dash']['video_list']['video_3']['vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-            elif "video_list" in video_info['videoResource']['dash'] and 'video_2' in \
-                    video_info['videoResource']['dash']['video_list']:
-                video_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
-                audio_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['dash']['video_list']['video_2']['vwidth']
-                video_height = video_info['videoResource']['dash']['video_list']['video_2']['vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-            elif "video_list" in video_info['videoResource']['dash'] and 'video_1' in \
-                    video_info['videoResource']['dash']['video_list']:
-                video_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
-                audio_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['dash']['video_list']['video_1']['vwidth']
-                video_height = video_info['videoResource']['dash']['video_list']['video_1']['vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-
-            elif 'dynamic_video' in video_info['videoResource']['dash'] \
-                    and 'dynamic_video_list' in video_info['videoResource']['dash']['dynamic_video'] \
-                    and 'dynamic_audio_list' in video_info['videoResource']['dash']['dynamic_video'] \
-                    and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list']) != 0 \
-                    and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list']) != 0:
-
-                video_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
-                    'backup_url_1']
-                audio_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list'][-1][
-                    'backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
-                    'vwidth']
-                video_height = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
-                    'vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-            else:
-                video_url_dict["video_url"] = ''
-                video_url_dict["audio_url"] = ''
-                video_url_dict["video_width"] = 0
-                video_url_dict["video_height"] = 0
-
-        elif 'normal' in video_info['videoResource']:
-            if "video_list" in video_info['videoResource']['normal'] and 'video_4' in \
-                    video_info['videoResource']['normal']['video_list']:
-                video_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
-                audio_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['normal']['video_list']['video_4']['vwidth']
-                video_height = video_info['videoResource']['normal']['video_list']['video_4']['vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-            elif "video_list" in video_info['videoResource']['normal'] and 'video_3' in \
-                    video_info['videoResource']['normal']['video_list']:
-                video_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
-                audio_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['normal']['video_list']['video_3']['vwidth']
-                video_height = video_info['videoResource']['normal']['video_list']['video_3']['vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-            elif "video_list" in video_info['videoResource']['normal'] and 'video_2' in \
-                    video_info['videoResource']['normal']['video_list']:
-                video_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
-                audio_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['normal']['video_list']['video_2']['vwidth']
-                video_height = video_info['videoResource']['normal']['video_list']['video_2']['vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-            elif "video_list" in video_info['videoResource']['normal'] and 'video_1' in \
-                    video_info['videoResource']['normal']['video_list']:
-                video_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
-                audio_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['normal']['video_list']['video_1']['vwidth']
-                video_height = video_info['videoResource']['normal']['video_list']['video_1']['vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-
-            elif 'dynamic_video' in video_info['videoResource']['normal'] \
-                    and 'dynamic_video_list' in video_info['videoResource']['normal']['dynamic_video'] \
-                    and 'dynamic_audio_list' in video_info['videoResource']['normal']['dynamic_video'] \
-                    and len(video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list']) != 0 \
-                    and len(video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list']) != 0:
-
-                video_url = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
-                    'backup_url_1']
-                audio_url = video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list'][-1][
-                    'backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
-                    'vwidth']
-                video_height = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
-                    'vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-            else:
-                video_url_dict["video_url"] = ''
-                video_url_dict["audio_url"] = ''
-                video_url_dict["video_width"] = 0
-                video_url_dict["video_height"] = 0
-
-        else:
-            video_url_dict["video_url"] = ''
-            video_url_dict["audio_url"] = ''
-            video_url_dict["video_width"] = 0
-            video_url_dict["video_height"] = 0
-
-        return video_url_dict
-
-    @classmethod
-    def get_comment_cnt(cls, item_id):
-        url = "https://www.ixigua.com/tlb/comment/article/v5/tab_comments/?"
-        params = {
-            "tab_index": "0",
-            "count": "10",
-            "offset": "10",
-            "group_id": str(item_id),
-            "item_id": str(item_id),
-            "aid": "1768",
-            "msToken": "50-JJObWB07HfHs-BMJWT1eIDX3G-6lPSF_i-QwxBIXE9VVa-iN0jbEXR5pG2DKjXBmP299n6ZTuXzY-GAy968CCvouSAYIS4GzvGQT3pNlKNejr5G4-1g==",
-            "X-Bogus": "DFSzswVOyGtANVeWtCLMqR/F6q9U",
-            "_signature": cls.random_signature(),
-        }
-        headers = {
-            'authority': 'www.ixigua.com',
-            'accept': 'application/json, text/plain, */*',
-            'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
-            'cache-control': 'no-cache',
-            'cookie': 'MONITOR_WEB_ID=67cb5099-a022-4ec3-bb8e-c4de6ba51dd0; passport_csrf_token=72b2574f3c99f8ba670e42df430218fd; passport_csrf_token_default=72b2574f3c99f8ba670e42df430218fd; sid_guard=c7472b508ea631823ba765a60cf8757f%7C1680867422%7C3024002%7CFri%2C+12-May-2023+11%3A37%3A04+GMT; uid_tt=c13f47d51767f616befe32fb3e9f485a; uid_tt_ss=c13f47d51767f616befe32fb3e9f485a; sid_tt=c7472b508ea631823ba765a60cf8757f; sessionid=c7472b508ea631823ba765a60cf8757f; sessionid_ss=c7472b508ea631823ba765a60cf8757f; sid_ucp_v1=1.0.0-KGUzNWYxNmRkZGJiZjgxY2MzZWNkMTEzMTkwYjY1Yjg5OTY5NzVlNmMKFQiu3d-eqQIQ3oDAoQYYGCAMOAhACxoCaGwiIGM3NDcyYjUwOGVhNjMxODIzYmE3NjVhNjBjZjg3NTdm; ssid_ucp_v1=1.0.0-KGUzNWYxNmRkZGJiZjgxY2MzZWNkMTEzMTkwYjY1Yjg5OTY5NzVlNmMKFQiu3d-eqQIQ3oDAoQYYGCAMOAhACxoCaGwiIGM3NDcyYjUwOGVhNjMxODIzYmE3NjVhNjBjZjg3NTdm; odin_tt=b893608d4dde2e1e8df8cd5d97a0e2fbeafc4ca762ac72ebef6e6c97e2ed19859bb01d46b4190ddd6dd17d7f9678e1de; SEARCH_CARD_MODE=7168304743566296612_0; support_webp=true; support_avif=false; csrf_session_id=a5355d954d3c63ed1ba35faada452b4d; tt_scid=7Pux7s634-z8DYvCM20y7KigwH5u7Rh6D9C-RROpnT.aGMEcz6Vsxp.oai47wJqa4f86; ttwid=1%7CHHtv2QqpSGuSu8r-zXF1QoWsvjmNi1SJrqOrZzg-UCY%7C1683858689%7Ca5223fe1500578e01e138a0d71d6444692018296c4c24f5885af174a65873c95; ixigua-a-s=3; msToken=50-JJObWB07HfHs-BMJWT1eIDX3G-6lPSF_i-QwxBIXE9VVa-iN0jbEXR5pG2DKjXBmP299n6ZTuXzY-GAy968CCvouSAYIS4GzvGQT3pNlKNejr5G4-1g==; __ac_nonce=0645dcbf0005064517440; __ac_signature=_02B4Z6wo00f01FEGmAwAAIDBKchzCGqn-MBRJpyAAHAjieFC5GEg6gGiwz.I4PRrJl7f0GcixFrExKmgt6QI1i1S-dQyofPEj2ugWTCnmKUdJQv-wYuDofeKNe8VtMtZq2aKewyUGeKU-5Ud21; ixigua-a-s=3',
-            'pragma': 'no-cache',
-            'referer': f'https://www.ixigua.com/{item_id}?logTag=3c5aa86a8600b9ab8540',
-            'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
-            'sec-ch-ua-mobile': '?0',
-            'sec-ch-ua-platform': '"macOS"',
-            'sec-fetch-dest': 'empty',
-            'sec-fetch-mode': 'cors',
-            'sec-fetch-site': 'same-origin',
-            'tt-anti-token': 'cBITBHvmYjEygzv-f9c78c1297722cf1f559c74b084e4525ce4900bdcf9e8588f20cc7c2e3234422',
-            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.35',
-            'x-secsdk-csrf-token': '000100000001f8e733cf37f0cd255a51aea9a81ff7bc0c09490cfe41ad827c3c5c18ec809279175e4d9f5553d8a5'
-        }
-        urllib3.disable_warnings()
-        s = requests.session()
-        # max_retries=3 重试3次
-        s.mount('http://', HTTPAdapter(max_retries=3))
-        s.mount('https://', HTTPAdapter(max_retries=3))
-        response = s.get(url=url, headers=headers, params=params, verify=False, proxies=Common.tunnel_proxies(), timeout=5)
-        response.close()
-        if response.status_code != 200 or 'total_number' not in response.json() or response.json() == {}:
-            return 0
-        return response.json().get("total_number", 0)
-
-    # 获取视频详情
-    @classmethod
-    def get_video_info(cls, log_type, crawler, item_id):
-        url = 'https://www.ixigua.com/api/mixVideo/information?'
-        headers = {
-            "accept-encoding": "gzip, deflate",
-            "accept-language": "zh-CN,zh-Hans;q=0.9",
-            "user-agent": get_random_user_agent('pc'),
-            "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
-        }
-        params = {
-            'mixId': str(item_id),
-            'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC'
-                       'NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
-            'X-Bogus': 'DFSzswVupYTANCJOSBk0P53WxM-r',
-            '_signature': '_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px'
-                          'fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94',
-        }
-        cookies = {
-            'ixigua-a-s': '1',
-            'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB'
-                       'NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
-            'ttwid': '1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7'
-                     '6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8',
-            'tt_scid': 'QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3',
-            'MONITOR_WEB_ID': '0a49204a-7af5-4e96-95f0-f4bafb7450ad',
-            '__ac_nonce': '06304878000964fdad287',
-            '__ac_signature': '_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb'
-                              'FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8',
-            'ttcid': 'e56fabf6e85d4adf9e4d91902496a0e882',
-            '_tea_utm_cache_1300': 'undefined',
-            'support_avif': 'false',
-            'support_webp': 'false',
-            'xiguavideopcwebid': '7134967546256016900',
-            'xiguavideopcwebid.sig': 'xxRww5R1VEMJN_dQepHorEu_eAc',
-        }
-        urllib3.disable_warnings()
-        s = requests.session()
-        # max_retries=3 重试3次
-        s.mount('http://', HTTPAdapter(max_retries=3))
-        s.mount('https://', HTTPAdapter(max_retries=3))
-        response = s.get(url=url, headers=headers, params=params, cookies=cookies, verify=False, proxies=Common.tunnel_proxies(), timeout=5)
-        response.close()
-        if response.status_code != 200 or 'data' not in response.json() or response.json()['data'] == {}:
-            Common.logger(log_type, crawler).warning(f"get_video_info:{response.status_code}, {response.text}\n")
-            return None
-        else:
-            video_info = response.json()['data'].get("gidInformation", {}).get("packerData", {}).get("video", {})
-            if video_info == {}:
-                return None
-            video_dict = {
-                "video_title": video_info.get("title", ""),
-                "video_id": video_info.get("videoResource", {}).get("vid", ""),
-                "gid": str(item_id),
-                "play_cnt": int(video_info.get("video_watch_count", 0)),
-                "like_cnt": int(video_info.get("video_like_count", 0)),
-                "comment_cnt": int(cls.get_comment_cnt(item_id)),
-                "share_cnt": 0,
-                "favorite_cnt": 0,
-                "duration": int(video_info.get("video_duration", 0)),
-                "video_width": int(cls.get_video_url(video_info)["video_width"]),
-                "video_height": int(cls.get_video_url(video_info)["video_height"]),
-                "publish_time_stamp": int(video_info.get("video_publish_time", 0)),
-                "publish_time_str": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_info.get("video_publish_time", 0)))),
-                "user_name": video_info.get("user_info", {}).get("name", ""),
-                "user_id": str(video_info.get("user_info", {}).get("user_id", "")),
-                "avatar_url": str(video_info.get("user_info", {}).get("avatar_url", "")),
-                "cover_url": video_info.get("poster_url", ""),
-                "audio_url": cls.get_video_url(video_info)["audio_url"],
-                "video_url": cls.get_video_url(video_info)["video_url"],
-                "session": f"xigua-search-{int(time.time())}"
-            }
-            return video_dict
-
-    @classmethod
-    def get_videoList(cls, log_type, crawler, search_word, our_uid, env):
-        # 打印请求配置
-        ca = DesiredCapabilities.CHROME
-        ca["goog:loggingPrefs"] = {"performance": "ALL"}
-        # # 不打开浏览器运行
-        chrome_options = webdriver.ChromeOptions()
-        chrome_options.add_argument(f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
-        chrome_options.add_argument("--headless")
-        chrome_options.add_argument("--window-size=1920,1080")
-        chrome_options.add_argument("--no-sandbox")
-        if env == "dev":
-            chromedriver = "/Users/wangkun/Downloads/chromedriver/chromedriver_v112/chromedriver"
-        else:
-            chromedriver = "/usr/bin/chromedriver"
-        # driver初始化
-        driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(chromedriver))
-        driver.implicitly_wait(10)
-        Common.logger(log_type, crawler).info(f"打开搜索页:{search_word}")
-        driver.get(f"https://www.ixigua.com/search/{search_word}/")
-        time.sleep(3)
-        # if len(driver.find_elements(By.XPATH, '//*[@class="xg-notification-close"]')) != 0:
-        #     driver.find_element(By.XPATH, '//*[@class="xg-notification-close"]').click()
-        # Common.logger(log_type, crawler).info("点击筛选")
-        # driver.find_element(By.XPATH, '//*[@class="searchPageV2__header-icons-categories"]').click()
-        # time.sleep(1)
-        # Common.logger(log_type, crawler).info("点击最新排序")
-        # driver.find_element(By.XPATH, '//*[@class="searchPageV2-category__wrapper"]/*[2]/*[1]').click()
-        # time.sleep(5)
-
-        index = 0
-        while True:
-            video_elements = driver.find_elements(By.XPATH, '//*[@class="HorizontalFeedCard searchPageV2__card"]')
-            video_element_temp = video_elements[index:]
-            if len(video_element_temp) == 0:
-                Common.logger(log_type, crawler).info('到底啦~~~~~~~~~~~~~\n')
-                cls.i = 0
-                cls.videos_cnt = 0
-                driver.quit()
-                return
-            for i, video_element in enumerate(video_element_temp):
-                try:
-                    if cls.videos_cnt >= cls.videos_cnt_rule(log_type, crawler):
-                        Common.logger(log_type, crawler).info(f"搜索词: {search_word},已下载视频数: {cls.videos_cnt}\n")
-                        cls.i = 0
-                        cls.videos_cnt = 0
-                        driver.quit()
-                        return
-                    # Common.logger(log_type, crawler).info(f"i:{i}, video_element:{video_element}")
-                    if video_element is None:
-                        Common.logger(log_type, crawler).info('到底啦~\n')
-                        cls.i = 0
-                        cls.videos_cnt = 0
-                        driver.quit()
-                        return
-                    cls.i += 1
-                    Common.logger(log_type, crawler).info(f'拖动"视频"列表第{cls.i}个至屏幕中间')
-                    # Common.logger(log_type, crawler).info(f"video_elements:{len(video_elements)}")
-                    # Common.logger(log_type, crawler).info(f"index+i:{index+i}")
-                    driver.execute_script("arguments[0].scrollIntoView({block:'center',inline:'center'})", video_element)
-                    time.sleep(1)
-                    item_id = video_element.find_elements(By.XPATH, '//*[@class="HorizontalFeedCard__coverWrapper disableZoomAnimation"]')[index+i].get_attribute('href')
-                    item_id = item_id.split("com/")[-1].split("?&")[0]
-                    video_dict = cls.get_video_info(log_type, crawler, item_id)
-                    if video_dict is None:
-                        Common.logger(log_type, crawler).info("无效视频")
-                    else:
-                        for k, v in video_dict.items():
-                            Common.logger(log_type, crawler).info(f"{k}:{v}")
-                        rule_dict = cls.get_rule_dict(log_type, crawler)
-                        # if int((int(time.time()) - int(video_dict["publish_time_stamp"])) / (3600 * 24)) > int(rule_dict["publish_time"]):
-                        #     Common.logger(log_type, crawler).info(f'发布时间超过{int(rule_dict["publish_time"])}天\n')
-                        #     driver.quit()
-                        #     return
-                        if cls.download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
-                            Common.logger(log_type, crawler).info("不满足抓取规则\n")
-                        elif any(str(word) if str(word) in video_dict["video_title"] else False for word in cls.filter_words(log_type, crawler, env)) is True:
-                            Common.logger(log_type, crawler).info("已中过滤词\n")
-                        elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
-                            Common.logger(log_type, crawler).info("视频已下载\n")
-                        else:
-                            cls.download_publish(log_type=log_type,
-                                                 crawler=crawler,
-                                                 search_word=search_word,
-                                                 video_dict=video_dict,
-                                                 rule_dict=rule_dict,
-                                                 our_uid=our_uid,
-                                                 env=env)
-                except Exception as e:
-                    Common.logger(log_type, crawler).warning(f"抓取单条视频异常:{e}\n")
-
-            Common.logger(log_type, crawler).info('已抓取完一组视频,休眠10秒\n')
-            time.sleep(10)
-            index = index + len(video_element_temp)
-
-    @classmethod
-    def repeat_video(cls, log_type, crawler, video_id, env):
-        sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{video_id}"; """
-        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env, action="")
-        return len(repeat_video)
-
-    # 下载 / 上传
-    @classmethod
-    def download_publish(cls, log_type, crawler, search_word, video_dict, rule_dict, our_uid, env):
-
-        Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video',
-                               title=video_dict['video_title'], url=video_dict['video_url'])
-        # 下载音频
-        Common.download_method(log_type=log_type, crawler=crawler, text='xigua_audio',
-                               title=video_dict['video_title'], url=video_dict['audio_url'])
-        # 合成音视频
-        Common.video_compose(log_type=log_type, crawler=crawler,
-                             video_dir=f"./{crawler}/videos/{video_dict['video_title']}")
-        md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
-        try:
-            if os.path.getsize(f"./{crawler}/videos/{md_title}/video.mp4") == 0:
-                # 删除视频文件夹
-                shutil.rmtree(f"./{crawler}/videos/{md_title}")
-                Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
-                return
-        except FileNotFoundError:
-            # 删除视频文件夹
-            shutil.rmtree(f"./{crawler}/videos/{md_title}")
-            Common.logger(log_type, crawler).info("视频文件不存在,删除文件夹成功\n")
-            return
-        # 下载封面
-        Common.download_method(log_type=log_type, crawler=crawler, text='cover',
-                               title=video_dict['video_title'], url=video_dict['cover_url'])
-        # 保存视频信息至txt
-        Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
-
-        # 上传视频
-        Common.logger(log_type, crawler).info("开始上传视频...")
-        if env == "dev":
-            oss_endpoint = "out"
-        else:
-            oss_endpoint = "inner"
-        our_video_id = Publish.upload_and_publish(log_type=log_type,
-                                                  crawler=crawler,
-                                                  strategy="搜索爬虫策略",
-                                                  our_uid=our_uid,
-                                                  env=env,
-                                                  oss_endpoint=oss_endpoint)
-        if env == 'dev':
-            our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-        else:
-            our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-        Common.logger(log_type, crawler).info("视频上传完成")
-
-        if our_video_id is None:
-            try:
-                # 删除视频文件夹
-                shutil.rmtree(f"./{crawler}/videos/{md_title}")
-                return
-            except FileNotFoundError:
-                return
-
-        # 视频信息保存数据库
-        insert_sql = f""" insert into crawler_video(video_id,
-                                user_id,
-                                out_user_id,
-                                platform,
-                                strategy,
-                                out_video_id,
-                                video_title,
-                                cover_url,
-                                video_url,
-                                duration,
-                                publish_time,
-                                play_cnt,
-                                crawler_rule,
-                                width,
-                                height)
-                                values({our_video_id},
-                                {our_uid},
-                                "{video_dict['user_id']}",
-                                "{cls.platform}",
-                                "搜索爬虫策略",
-                                "{video_dict['video_id']}",
-                                "{video_dict['video_title']}",
-                                "{video_dict['cover_url']}",
-                                "{video_dict['video_url']}",
-                                {int(video_dict['duration'])},
-                                "{video_dict['publish_time_str']}",
-                                {int(video_dict['play_cnt'])},
-                                '{json.dumps(rule_dict)}',
-                                {int(video_dict['video_width'])},
-                                {int(video_dict['video_height'])}) """
-        Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
-        MysqlHelper.update_values(log_type, crawler, insert_sql, env, action="")
-        Common.logger(log_type, crawler).info("视频信息写入数据库完成")
-
-        # 视频信息写入飞书
-        Feishu.insert_columns(log_type, crawler, "BUNvGC", "ROWS", 1, 2)
-        values = [[
-            search_word,
-            time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
-            "关键词搜索",
-            video_dict['video_title'],
-            str(video_dict['video_id']),
-            our_video_link,
-            video_dict['gid'],
-            video_dict['play_cnt'],
-            video_dict['comment_cnt'],
-            video_dict['like_cnt'],
-            video_dict['share_cnt'],
-            video_dict['duration'],
-            str(video_dict['video_width']) + '*' + str(video_dict['video_height']),
-            video_dict['publish_time_str'],
-            video_dict['user_name'],
-            video_dict['user_id'],
-            video_dict['avatar_url'],
-            video_dict['cover_url'],
-            video_dict['video_url'],
-            video_dict['audio_url']]]
-        time.sleep(0.5)
-        Feishu.update_values(log_type, crawler, "BUNvGC", "E2:Z2", values)
-        Common.logger(log_type, crawler).info('视频信息写入飞书完成\n')
-        cls.videos_cnt += 1
-
-    @classmethod
-    def get_search_videos(cls, log_type, crawler, env):
-        user_list = cls.get_user_list(log_type=log_type, crawler=crawler, sheetid="SSPNPW", env=env)
-        for user in user_list:
-            try:
-                cls.i = 0
-                cls.videos_cnt = 0
-                search_word = user["search_word"]
-                our_uid = user["our_uid"]
-                Common.logger(log_type, crawler).info(f"开始抓取 {search_word} 视频\n")
-                cls.get_videoList(log_type=log_type,
-                                  crawler=crawler,
-                                  search_word=search_word,
-                                  our_uid=our_uid,
-                                  env=env)
-            except Exception as e:
-                Common.logger(log_type, crawler).error(f"get_search_videos:{e}\n")
-
-
-if __name__ == '__main__':
-    # XiguaSearch.get_search_videos('search', 'xigua', 'dev')
-    # XiguaSearch.get_videoList("search", "xigua", "长寿食物", "dev")
-    # XiguaSearch.get_video_info("search", "xigua", "7027495456829768196")
-    # print(XiguaSearch.get_comment_cnt("7027495456829768196"))
-    # print(XiguaSearch.videos_cnt_rule("search", "xigua"))
-    # XiguaSearch.filter_words('search', 'xigua', 'dev')
-    # print(XiguaSearchNew.get_rule_dict('search', 'xigua'))
-    # os.system("ps aux | grep Chrome | grep -v grep | awk '{print $2}' | xargs kill -9")
-    pass
-

Деякі файли не було показано, через те що забагато файлів було змінено