wangkun 2 år sedan
förälder
incheckning
c9641db114
44 ändrade filer med 3831 tillägg och 1984 borttagningar
  1. 7 1
      README.MD
  2. 4 0
      common/feishu.py
  3. 12 0
      common/publish.py
  4. 150 147
      ganggangdouchuan/ganggangdouchuan_recommend/ganggangdouchuan_recommend.py
  5. BIN
      ganggangdouchuan/videos/.DS_Store
  6. 406 406
      gongzhonghao/gongzhonghao_follow/gongzhonghao_follow.py
  7. 406 406
      gongzhonghao/gongzhonghao_follow/gongzhonghao_follow_2.py
  8. 406 406
      gongzhonghao/gongzhonghao_follow/gongzhonghao_follow_3.py
  9. 16 16
      gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py
  10. 16 16
      gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_3.py
  11. BIN
      jixiangxingfu/.DS_Store
  12. 3 0
      jixiangxingfu/__init__.py
  13. 3 0
      jixiangxingfu/jixiangxingfu_main/__init__.py
  14. 25 0
      jixiangxingfu/jixiangxingfu_main/run_jixiangxingfu_recommend.py
  15. BIN
      jixiangxingfu/jixiangxingfu_recommend/.DS_Store
  16. 3 0
      jixiangxingfu/jixiangxingfu_recommend/__init__.py
  17. 161 0
      jixiangxingfu/jixiangxingfu_recommend/insert.py
  18. 403 0
      jixiangxingfu/jixiangxingfu_recommend/jixiangxingfu_recommend.py
  19. 3 0
      jixiangxingfu/logs/__init__.py
  20. BIN
      jixiangxingfu/videos/.DS_Store
  21. 63 45
      main/process_offline.sh
  22. 335 335
      xiaoniangao/xiaoniangao_follow/xiaoniangao_follow.py
  23. 59 59
      xiaoniangao/xiaoniangao_hour/xiaoniangao_hour.py
  24. 147 147
      xiaoniangao/xiaoniangao_play/xiaoniangao_play.py
  25. BIN
      zhiqingtiantiankan/.DS_Store
  26. 3 0
      zhiqingtiantiankan/__init__.py
  27. BIN
      zhiqingtiantiankan/logs/.DS_Store
  28. 3 0
      zhiqingtiantiankan/logs/__init__.py
  29. 3 0
      zhiqingtiantiankan/zhiqingtiantiankan_main/__init__.py
  30. 25 0
      zhiqingtiantiankan/zhiqingtiantiankan_main/run_zhiqingtiantiankan_recommend.py
  31. BIN
      zhiqingtiantiankan/zhiqingtiantiankan_recommend/.DS_Store
  32. 3 0
      zhiqingtiantiankan/zhiqingtiantiankan_recommend/__init__.py
  33. 158 0
      zhiqingtiantiankan/zhiqingtiantiankan_recommend/zhiqing_insert.py
  34. 410 0
      zhiqingtiantiankan/zhiqingtiantiankan_recommend/zhiqingtiantiankan_recommend.py
  35. BIN
      zhongmiaoyinxin/.DS_Store
  36. 3 0
      zhongmiaoyinxin/__init__.py
  37. BIN
      zhongmiaoyinxin/logs/.DS_Store
  38. 3 0
      zhongmiaoyinxin/logs/__init__.py
  39. 3 0
      zhongmiaoyinxin/zhongmiaoyinxin_main/__init__.py
  40. 25 0
      zhongmiaoyinxin/zhongmiaoyinxin_main/run_zhongmiaoyinxin_recommend.py
  41. BIN
      zhongmiaoyinxin/zhongmiaoyinxin_recommend/.DS_Store
  42. 3 0
      zhongmiaoyinxin/zhongmiaoyinxin_recommend/__init__.py
  43. 158 0
      zhongmiaoyinxin/zhongmiaoyinxin_recommend/insert.py
  44. 403 0
      zhongmiaoyinxin/zhongmiaoyinxin_recommend/zhongmiaoyinxin_recommend.py

+ 7 - 1
README.MD

@@ -197,7 +197,7 @@ ps aux | grep run_suisuiniannianyingfuqi
 ps aux | grep run_suisuiniannianyingfuqi | grep -v grep | awk '{print $2}' | xargs kill -9
 ```
 
-#### 刚刚都传小程序
+#### 线下爬虫: 刚刚都传 / 吉祥幸福 / 知青天天看 / 众妙音信
 ```commandline
 MacAir 设备, crontab定时任务
 * * * * * /bin/sh /Users/piaoquan/Desktop/piaoquan_crawler/main/process_offline.sh "prod"
@@ -205,5 +205,11 @@ MacAir 设备, crontab定时任务
 sh /Users/wangkun/Desktop/crawler/piaoquan_crawler/main/process_offline.sh "dev"
 检测进程
 ps aux | grep run_ganggangdouchuan
+ps aux | grep run_jixiangxingfu
+ps aux | grep run_zhongmiaoyinxin
+ps aux | grep run_zhiqingtiantiankan
 ps aux | grep run_ganggangdouchuan | grep -v grep | awk '{print $2}' | xargs kill -9
+ps aux | grep run_jixiangxingfu | grep -v grep | awk '{print $2}' | xargs kill -9
+ps aux | grep run_zhongmiaoyinxin | grep -v grep | awk '{print $2}' | xargs kill -9
+ps aux | grep run_zhiqingtiantiankan | grep -v grep | awk '{print $2}' | xargs kill -9
 ```

+ 4 - 0
common/feishu.py

@@ -61,6 +61,8 @@ class Feishu:
     crawler_shengshengyingyin = 'https://w42nne6hzg.feishu.cn/sheets/shtcnz1ymxHL1u8WHblfqfys7qe'
     # 刚刚都传
     crawler_ganggangdouchuan = 'https://w42nne6hzg.feishu.cn/sheets/shtcnTuJgeZU2bc7VaesAqk3QJx'
+    # 知青天天看
+    crawler_zhiqingtiantiankan = 'https://w42nne6hzg.feishu.cn/sheets/shtcnjmhKdJOKdqnEzJcZb5xaHc?'
     # 公众号_信欣
     crawler_gongzhonghao = 'https://w42nne6hzg.feishu.cn/sheets/shtcna98M2mX7TbivTj9Sb7WKBN?'
     # YouTube
@@ -128,6 +130,8 @@ class Feishu:
             return 'shtcna98M2mX7TbivTj9Sb7WKBN'
         elif crawler == 'douyin':
             return 'shtcnhq63MoXOpqbkuLuoapYIAh'
+        elif crawler == 'zhiqingtiantiankan':
+            return 'shtcnjmhKdJOKdqnEzJcZb5xaHc'
 
     # 获取飞书api token
     @classmethod

+ 12 - 0
common/publish.py

@@ -208,6 +208,18 @@ class Publish:
             uids_prod_ganggangdouchuan_recommend = [26117661, 26117662, 26117663]
             return random.choice(uids_prod_ganggangdouchuan_recommend)
 
+        elif crawler == 'jixiangxingfu' and env == 'prod' and strategy == '推荐榜爬虫策略':
+            uids_prod_jixiangxingfu_recommend = [26117478, 26117479, 26117480, 26117471, 26117473, 26117474, 26117475, 26117476, 26117477]
+            return random.choice(uids_prod_jixiangxingfu_recommend)
+
+        elif crawler == 'zhongmiaoyinxin' and env == 'prod' and strategy == '推荐榜爬虫策略':
+            uids_prod_zhongmiaoyinxin_recommend = [26117493, 26117494, 26117495, 26117496, 26117497, 26117498]
+            return random.choice(uids_prod_zhongmiaoyinxin_recommend)
+
+        elif crawler == 'zhiqingtiantiankan' and env == 'prod' and strategy == '推荐榜爬虫策略':
+            uids_prod_zhiqingtiantiankan_recommend = [20631253, 20631254, 20631255, 20631256, 20631257, 20631258, 20631259, 20631260, 20631261]
+            return random.choice(uids_prod_zhiqingtiantiankan_recommend)
+
         else:
             return our_uid
 

+ 150 - 147
ganggangdouchuan/ganggangdouchuan_recommend/ganggangdouchuan_recommend.py

@@ -123,7 +123,8 @@ class GanggangdouchuanRecommend:
                 time.sleep(1)
                 try:
                     elements = driver.find_elements(By.XPATH, xpath)
-                    return elements
+                    if elements:
+                        return elements
                 except NoSuchElementException:
                     pass
         except Exception as e:
@@ -169,167 +170,169 @@ class GanggangdouchuanRecommend:
         index = 0
 
         while True:
-            if cls.search_elements(log_type, crawler, driver, '//wx-view[@class="lists"]') is None:
-                Common.logger(log_type, crawler).info('窗口已销毁\n')
-                return
-
-            Common.logger(log_type, crawler).info('获取视频列表\n')
-            video_elements = cls.search_elements(log_type, crawler, driver, '//wx-view[@class="list"]')
-            if video_elements is None:
-                Common.logger(log_type, crawler).warning(f'video_elements:{video_elements}')
-                return
+            try:
+                if cls.search_elements(log_type, crawler, driver, '//wx-view[@class="lists"]') is None:
+                    Common.logger(log_type, crawler).info('窗口已销毁\n')
+                    return
 
-            video_element_temp = video_elements[index:]
-            if len(video_element_temp) == 0:
-                Common.logger(log_type, crawler).info('到底啦~~~~~~~~~~~~~\n')
-                return
+                Common.logger(log_type, crawler).info('获取视频列表\n')
+                video_elements = cls.search_elements(log_type, crawler, driver, '//wx-view[@class="list"]')
+                if video_elements is None:
+                    Common.logger(log_type, crawler).warning(f'video_elements:{video_elements}')
+                    return
 
-            for i, video_element in enumerate(video_element_temp):
-                if video_element is None:
-                    Common.logger(log_type, crawler).info('到底啦~\n')
+                video_element_temp = video_elements[index:]
+                if len(video_element_temp) == 0:
+                    Common.logger(log_type, crawler).info('到底啦~~~~~~~~~~~~~\n')
                     return
-                cls.i += 1
-                cls.search_elements(log_type, crawler, driver, '//wx-view[@class="list"]')
-                Common.logger(log_type, crawler).info(f'拖动"视频"列表第{cls.i}个至屏幕中间')
-                time.sleep(3)
-                driver.execute_script("arguments[0].scrollIntoView({block:'center',inline:'center'})", video_element)
 
-                # video_title = video_element.find_elements(By.XPATH, '//wx-view[@class="title"]//span[2]')[cls.i - 1].get_attribute('innerHTML')
-                video_title = video_element.find_elements(By.XPATH, '//wx-view[@class="title"]//span[2]')[cls.i - 1].text
-                cover_url = video_element.find_elements(By.XPATH, '//wx-image[@class="poster-img"]')[cls.i - 1].get_attribute('src')
-                out_video_id = md5(video_title.encode('utf8')).hexdigest()
-                video_dict = {
-                    'video_title': video_title,
-                    'video_id': out_video_id,
-                    'play_cnt': 0,
-                    'comment_cnt': 0,
-                    'like_cnt': 0,
-                    'share_cnt': 0,
-                    'publish_time_stamp': int(time.time()),
-                    'publish_time_str': time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
-                    'user_name': "ganggangdouchuan",
-                    'user_id': "ganggangdouchuan",
-                    'avatar_url': cover_url,
-                    'cover_url': cover_url,
-                    'session': f"ganggangdouchuan-{int(time.time())}"
-                }
-                for k, v in video_dict.items():
-                    Common.logger(log_type, crawler).info(f"{k}:{v}")
+                for i, video_element in enumerate(video_element_temp):
+                    if video_element is None:
+                        Common.logger(log_type, crawler).info('到底啦~\n')
+                        return
+                    cls.i += 1
+                    cls.search_elements(log_type, crawler, driver, '//wx-view[@class="list"]')
+                    Common.logger(log_type, crawler).info(f'拖动"视频"列表第{cls.i}个至屏幕中间')
+                    time.sleep(3)
+                    driver.execute_script("arguments[0].scrollIntoView({block:'center',inline:'center'})", video_element)
+
+                    # video_title = video_element.find_elements(By.XPATH, '//wx-view[@class="title"]//span[2]')[cls.i - 1].text
+                    # cover_url = video_element.find_elements(By.XPATH, '//wx-image[@class="poster-img"]')[cls.i - 1].get_attribute('src')
+
+                    video_title = video_element.find_elements(By.XPATH, '//wx-view[@class="title"]//span[2]')[index+i].text
+                    cover_url = video_element.find_elements(By.XPATH, '//wx-image[@class="poster-img"]')[index+i].get_attribute('src')
+
+                    out_video_id = md5(video_title.encode('utf8')).hexdigest()
+                    video_dict = {
+                        'video_title': video_title,
+                        'video_id': out_video_id,
+                        'play_cnt': 0,
+                        'comment_cnt': 0,
+                        'like_cnt': 0,
+                        'share_cnt': 0,
+                        'publish_time_stamp': int(time.time()),
+                        'publish_time_str': time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
+                        'user_name': "ganggangdouchuan",
+                        'user_id': "ganggangdouchuan",
+                        'avatar_url': cover_url,
+                        'cover_url': cover_url,
+                        'session': f"ganggangdouchuan-{int(time.time())}"
+                    }
+                    for k, v in video_dict.items():
+                        Common.logger(log_type, crawler).info(f"{k}:{v}")
 
-                if video_title is None or cover_url is None:
-                    Common.logger(log_type, crawler).info("无效视频\n")
-                elif cls.repeat_out_video_id(log_type, crawler, out_video_id, env) != 0:
-                    Common.logger(log_type, crawler).info('视频已下载\n')
-                else:
-                    video_url = cls.get_video_url(log_type, crawler, driver, video_element)
-                    if video_url is None:
-                        Common.logger(log_type, crawler).info("未获取到视频播放地址\n")
-                        driver.press_keycode(AndroidKey.BACK)
-                    elif cls.repeat_video_url(log_type, crawler, video_url, env) != 0:
+                    if video_title is None or cover_url is None:
+                        Common.logger(log_type, crawler).info("无效视频\n")
+                    elif cls.repeat_out_video_id(log_type, crawler, out_video_id, env) != 0:
                         Common.logger(log_type, crawler).info('视频已下载\n')
-                        driver.press_keycode(AndroidKey.BACK)
                     else:
-                        video_dict["video_url"]=video_url
-                        cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env, driver)
-            Common.logger(log_type, crawler).info('已抓取完一组视频,休眠10秒\n')
-            time.sleep(10)
-            index = index + len(video_element_temp)
+                        video_url = cls.get_video_url(log_type, crawler, driver, video_element)
+                        if video_url is None:
+                            Common.logger(log_type, crawler).info("未获取到视频播放地址\n")
+                            driver.press_keycode(AndroidKey.BACK)
+                        elif cls.repeat_video_url(log_type, crawler, video_url, env) != 0:
+                            Common.logger(log_type, crawler).info('视频已下载\n')
+                            driver.press_keycode(AndroidKey.BACK)
+                        else:
+                            video_dict["video_url"]=video_url
+                            cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env, driver)
+                Common.logger(log_type, crawler).info('已抓取完一组视频,休眠10秒\n')
+                time.sleep(10)
+                index = index + len(video_element_temp)
 
-        # except Exception as e:
-        #     Common.logger(log_type, crawler).error(f'get_recommend异常,重启APP:{e}\n')
+            except Exception as e:
+                Common.logger(log_type, crawler).error(f'get_videoList异常:{e}\n')
+                cls.i = 0
 
     @classmethod
     def download_publish(cls, log_type, crawler, video_dict, oss_endpoint, env, driver: WebDriver):
-        try:
-            # 下载视频
-            Common.download_method(log_type=log_type, crawler=crawler, text='video', title=video_dict['video_title'], url=video_dict['video_url'])
-            ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
-            if ffmpeg_dict is None:
-                md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
-                shutil.rmtree(f"./{crawler}/videos/{md_title}/")
-                Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
-                return
-            video_dict["duration"] = ffmpeg_dict["duration"]
-            video_dict["video_width"] = ffmpeg_dict["width"]
-            video_dict["video_height"] = ffmpeg_dict["height"]
-            # 下载封面
-            Common.download_method(log_type=log_type, crawler=crawler, text='cover', title=video_dict['video_title'],
-                                   url=video_dict['cover_url'])
-            # 保存视频信息至txt
-            Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
+        # 下载视频
+        Common.download_method(log_type=log_type, crawler=crawler, text='video', title=video_dict['video_title'], url=video_dict['video_url'])
+        ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
+        if ffmpeg_dict is None:
+            md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
+            shutil.rmtree(f"./{crawler}/videos/{md_title}/")
+            Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
+            return
+        video_dict["duration"] = ffmpeg_dict["duration"]
+        video_dict["video_width"] = ffmpeg_dict["width"]
+        video_dict["video_height"] = ffmpeg_dict["height"]
+        # 下载封面
+        Common.download_method(log_type=log_type, crawler=crawler, text='cover', title=video_dict['video_title'],
+                               url=video_dict['cover_url'])
+        # 保存视频信息至txt
+        Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
 
-            # 上传视频
-            Common.logger(log_type, crawler).info("开始上传视频...")
-            our_video_id = Publish.upload_and_publish(log_type=log_type,
-                                                      crawler=crawler,
-                                                      strategy="推荐榜爬虫策略",
-                                                      our_uid="recommend",
-                                                      env=env,
-                                                      oss_endpoint=oss_endpoint)
-            if env == 'dev':
-                our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-            else:
-                our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-            Common.logger(log_type, crawler).info("视频上传完成")
+        # 上传视频
+        Common.logger(log_type, crawler).info("开始上传视频...")
+        our_video_id = Publish.upload_and_publish(log_type=log_type,
+                                                  crawler=crawler,
+                                                  strategy="推荐榜爬虫策略",
+                                                  our_uid="recommend",
+                                                  env=env,
+                                                  oss_endpoint=oss_endpoint)
+        if env == 'dev':
+            our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
+        else:
+            our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
+        Common.logger(log_type, crawler).info("视频上传完成")
 
-            if our_video_id is None:
-                # 删除视频文件夹
-                shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
-                return
+        if our_video_id is None:
+            # 删除视频文件夹
+            shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
+            return
 
-            # 视频信息保存至飞书
-            Feishu.insert_columns(log_type, crawler, "070a67", "ROWS", 1, 2)
-            # 视频ID工作表,首行写入数据
-            upload_time = int(time.time())
-            values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
-                       "推荐榜爬虫策略",
-                       video_dict["video_title"],
-                       video_dict["video_id"],
-                       video_dict["duration"],
-                       f'{video_dict["video_width"]}*{video_dict["video_height"]}',
-                       our_video_link,
-                       video_dict["cover_url"],
-                       video_dict["video_url"]]]
-            time.sleep(1)
-            Feishu.update_values(log_type, crawler, "070a67", "F2:V2", values)
-            Common.logger(log_type, crawler).info(f"视频已保存至飞书文档\n")
+        # 视频信息保存至飞书
+        Feishu.insert_columns(log_type, crawler, "070a67", "ROWS", 1, 2)
+        # 视频ID工作表,首行写入数据
+        upload_time = int(time.time())
+        values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
+                   "推荐榜爬虫策略",
+                   video_dict["video_title"],
+                   video_dict["video_id"],
+                   video_dict["duration"],
+                   f'{video_dict["video_width"]}*{video_dict["video_height"]}',
+                   our_video_link,
+                   video_dict["cover_url"],
+                   video_dict["video_url"]]]
+        time.sleep(1)
+        Feishu.update_values(log_type, crawler, "070a67", "F2:V2", values)
+        Common.logger(log_type, crawler).info(f"视频已保存至飞书文档\n")
 
-            rule_dict = {}
-            # 视频信息保存数据库
-            insert_sql = f""" insert into crawler_video(video_id,
-                                                            out_user_id,
-                                                            platform,
-                                                            strategy,
-                                                            out_video_id,
-                                                            video_title,
-                                                            cover_url,
-                                                            video_url,
-                                                            duration,
-                                                            publish_time,
-                                                            play_cnt,
-                                                            crawler_rule,
-                                                            width,
-                                                            height)
-                                                            values({our_video_id},
-                                                            "{video_dict['user_id']}",
-                                                            "{cls.platform}",
-                                                            "推荐榜爬虫策略",
-                                                            "{video_dict['video_id']}",
-                                                            "{video_dict['video_title']}",
-                                                            "{video_dict['cover_url']}",
-                                                            "{video_dict['video_url']}",
-                                                            {int(video_dict['duration'])},
-                                                            "{video_dict['publish_time_str']}",
-                                                            {int(video_dict['play_cnt'])},
-                                                            '{json.dumps(rule_dict)}',
-                                                            {int(video_dict['video_width'])},
-                                                            {int(video_dict['video_height'])}) """
-            Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
-            MysqlHelper.update_values(log_type, crawler, insert_sql, env, action='')
-            Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
-            driver.press_keycode(AndroidKey.BACK)
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f'download_publish异常:{e}\n')
+        rule_dict = {}
+        # 视频信息保存数据库
+        insert_sql = f""" insert into crawler_video(video_id,
+                                                        out_user_id,
+                                                        platform,
+                                                        strategy,
+                                                        out_video_id,
+                                                        video_title,
+                                                        cover_url,
+                                                        video_url,
+                                                        duration,
+                                                        publish_time,
+                                                        play_cnt,
+                                                        crawler_rule,
+                                                        width,
+                                                        height)
+                                                        values({our_video_id},
+                                                        "{video_dict['user_id']}",
+                                                        "{cls.platform}",
+                                                        "推荐榜爬虫策略",
+                                                        "{video_dict['video_id']}",
+                                                        "{video_dict['video_title']}",
+                                                        "{video_dict['cover_url']}",
+                                                        "{video_dict['video_url']}",
+                                                        {int(video_dict['duration'])},
+                                                        "{video_dict['publish_time_str']}",
+                                                        {int(video_dict['play_cnt'])},
+                                                        '{json.dumps(rule_dict)}',
+                                                        {int(video_dict['video_width'])},
+                                                        {int(video_dict['video_height'])}) """
+        Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+        MysqlHelper.update_values(log_type, crawler, insert_sql, env, action='')
+        Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
+        driver.press_keycode(AndroidKey.BACK)
 
 
 if __name__ == '__main__':

BIN
ganggangdouchuan/videos/.DS_Store


+ 406 - 406
gongzhonghao/gongzhonghao_follow/gongzhonghao_follow.py

@@ -65,299 +65,299 @@ class GongzhonghaoFollow:
     @classmethod
     def get_token(cls, log_type, crawler):
         while True:
-            try:
-                sheet = Feishu.get_values_batch(log_type, "gongzhonghao", "OjyJqs")
-                if sheet is None:
-                    time.sleep(1)
-                    continue
-                token = sheet[0][1]
-                cookie = sheet[1][1]
-                gzh_name = sheet[2][1]
-                gzh_time = sheet[3][1]
-                token_dict = {'token': token, 'cookie': cookie, 'gzh_name': gzh_name, 'gzh_time': gzh_time}
-                print(type(token_dict['gzh_time']))
-                print(token_dict['gzh_time'])
-                return token_dict
-            except Exception as e:
-                Common.logger(log_type, crawler).error(f"get_cookie_token异常:{e}\n")
+            # try:
+            sheet = Feishu.get_values_batch(log_type, crawler, "OjyJqs")
+            if sheet is None:
+                time.sleep(1)
+                continue
+            token = sheet[0][1]
+            cookie = sheet[1][1]
+            gzh_name = sheet[2][1]
+            gzh_time = sheet[3][1]
+            token_dict = {'token': token, 'cookie': cookie, 'gzh_name': gzh_name, 'gzh_time': gzh_time}
+            print(type(token_dict['gzh_time']))
+            print(token_dict['gzh_time'])
+            return token_dict
+            # except Exception as e:
+            #     Common.logger(log_type, crawler).error(f"get_cookie_token异常:{e}\n")
 
     # 获取用户 fakeid
     @classmethod
     def get_fakeid(cls, log_type, crawler, user, index):
-        try:
-            while True:
-                token_dict = cls.get_token(log_type, crawler)
-                url = "https://mp.weixin.qq.com/cgi-bin/searchbiz?"
-                headers = {
-                    "accept": "*/*",
-                    "accept-encoding": "gzip, deflate, br",
-                    "accept-language": "zh-CN,zh;q=0.9",
-                    "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?"
-                               "t=media/appmsg_edit_v2&action=edit&isNew=1"
-                               "&type=77&createType=5&token=1011071554&lang=zh_CN",
-                    'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
-                    "sec-ch-ua-mobile": "?0",
-                    "sec-ch-ua-platform": '"Windows"',
-                    "sec-fetch-dest": "empty",
-                    "sec-fetch-mode": "cors",
-                    "sec-fetch-site": "same-origin",
-                    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
-                                  " (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
-                    "x-requested-with": "XMLHttpRequest",
-                    'cookie': token_dict['cookie'],
-                }
-                params = {
-                    "action": "search_biz",
-                    "begin": "0",
-                    "count": "5",
-                    "query": str(user),
-                    "token": token_dict['token'],
-                    "lang": "zh_CN",
-                    "f": "json",
-                    "ajax": "1",
-                }
-                urllib3.disable_warnings()
-                # s = requests.session()
-                # # max_retries=3 重试3次
-                # s.mount('http://', HTTPAdapter(max_retries=3))
-                # s.mount('https://', HTTPAdapter(max_retries=3))
-                # r = s.get(url=url, headers=headers, params=params, verify=False, proxies=Common.tunnel_proxies(), timeout=5)
-                r = requests.get(url=url, headers=headers, params=params, verify=False)
-                r.close()
-                if r.json()["base_resp"]["err_msg"] == "invalid session":
-                    Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
-                    Common.logger(log_type, crawler).warning(f"token_1:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 过期啦\n")
-                    if 20 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, f"token_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
-                    time.sleep(60 * 10)
-                    continue
-                if r.json()["base_resp"]["err_msg"] == "freq control":
-                    Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
-                    Common.logger(log_type, crawler).warning(f"公众号_1:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
-                    if 20 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                    time.sleep(60 * 10)
-                    continue
-                if "list" not in r.json() or len(r.json()["list"]) == 0:
-                    Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
-                    Common.logger(log_type, crawler).warning(f"公众号_1:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
-                    if 20 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                    time.sleep(60 * 10)
-                    continue
-
-                fakeid = r.json()["list"][int(index) - 1]["fakeid"]
-                head_url = r.json()["list"][int(index) - 1]["round_head_img"]
-                fakeid_dict = {'fakeid': fakeid, 'head_url': head_url}
-                return fakeid_dict
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"get_fakeid异常:{e}\n")
+        # try:
+        while True:
+            token_dict = cls.get_token(log_type, crawler)
+            url = "https://mp.weixin.qq.com/cgi-bin/searchbiz?"
+            headers = {
+                "accept": "*/*",
+                "accept-encoding": "gzip, deflate, br",
+                "accept-language": "zh-CN,zh;q=0.9",
+                "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?"
+                           "t=media/appmsg_edit_v2&action=edit&isNew=1"
+                           "&type=77&createType=5&token=1011071554&lang=zh_CN",
+                'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
+                "sec-ch-ua-mobile": "?0",
+                "sec-ch-ua-platform": '"Windows"',
+                "sec-fetch-dest": "empty",
+                "sec-fetch-mode": "cors",
+                "sec-fetch-site": "same-origin",
+                "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
+                              " (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
+                "x-requested-with": "XMLHttpRequest",
+                'cookie': token_dict['cookie'],
+            }
+            params = {
+                "action": "search_biz",
+                "begin": "0",
+                "count": "5",
+                "query": str(user),
+                "token": token_dict['token'],
+                "lang": "zh_CN",
+                "f": "json",
+                "ajax": "1",
+            }
+            urllib3.disable_warnings()
+            # s = requests.session()
+            # # max_retries=3 重试3次
+            # s.mount('http://', HTTPAdapter(max_retries=3))
+            # s.mount('https://', HTTPAdapter(max_retries=3))
+            # r = s.get(url=url, headers=headers, params=params, verify=False, proxies=Common.tunnel_proxies(), timeout=5)
+            r = requests.get(url=url, headers=headers, params=params, verify=False)
+            r.close()
+            if r.json()["base_resp"]["err_msg"] == "invalid session":
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
+                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"token_1:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 过期啦\n")
+                if 20 >= datetime.datetime.now().hour >= 10:
+                    Feishu.bot(log_type, crawler, f"token_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
+                time.sleep(60 * 10)
+                continue
+            if r.json()["base_resp"]["err_msg"] == "freq control":
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
+                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"公众号_1:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
+                if 20 >= datetime.datetime.now().hour >= 10:
+                    Feishu.bot(log_type, crawler, f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                time.sleep(60 * 10)
+                continue
+            if "list" not in r.json() or len(r.json()["list"]) == 0:
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
+                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"公众号_1:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
+                if 20 >= datetime.datetime.now().hour >= 10:
+                    Feishu.bot(log_type, crawler, f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                time.sleep(60 * 10)
+                continue
+
+            fakeid = r.json()["list"][int(index) - 1]["fakeid"]
+            head_url = r.json()["list"][int(index) - 1]["round_head_img"]
+            fakeid_dict = {'fakeid': fakeid, 'head_url': head_url}
+            return fakeid_dict
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).error(f"get_fakeid异常:{e}\n")
 
     # 获取腾讯视频下载链接
     @classmethod
-    def get_tencent_video_url(cls, log_type, crawler, video_id):
-        try:
-            url = 'https://vv.video.qq.com/getinfo?vids=' + str(video_id) + '&platform=101001&charge=0&otype=json'
-            response = requests.get(url=url).text.replace('QZOutputJson=', '').replace('"};', '"}')
-            response = json.loads(response)
-            url = response['vl']['vi'][0]['ul']['ui'][0]['url']
-            fvkey = response['vl']['vi'][0]['fvkey']
-            video_url = url + str(video_id) + '.mp4?vkey=' + fvkey
-            return video_url
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"get_tencent_video_url异常:{e}\n")
+    def get_tencent_video_url(cls, video_id):
+        # try:
+        url = 'https://vv.video.qq.com/getinfo?vids=' + str(video_id) + '&platform=101001&charge=0&otype=json'
+        response = requests.get(url=url).text.replace('QZOutputJson=', '').replace('"};', '"}')
+        response = json.loads(response)
+        url = response['vl']['vi'][0]['ul']['ui'][0]['url']
+        fvkey = response['vl']['vi'][0]['fvkey']
+        video_url = url + str(video_id) + '.mp4?vkey=' + fvkey
+        return video_url
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).error(f"get_tencent_video_url异常:{e}\n")
 
     @classmethod
-    def get_video_url(cls, log_type, crawler, article_url, env):
-        try:
-            # 打印请求配置
-            ca = DesiredCapabilities.CHROME
-            ca["goog:loggingPrefs"] = {"performance": "ALL"}
-
-            # 不打开浏览器运行
-            chrome_options = webdriver.ChromeOptions()
-            chrome_options.add_argument("headless")
-            chrome_options.add_argument(
-                f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
-            chrome_options.add_argument("--no-sandbox")
-
-            # driver初始化
-            if env == "prod":
-                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
-            else:
-                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(
-                    '/Users/wangkun/Downloads/chromedriver/chromedriver_v111/chromedriver'))
-
-            driver.implicitly_wait(10)
-            # Common.logger(log_type, crawler).info('打开文章链接')
-            driver.get(article_url)
-            time.sleep(1)
-
-            if len(driver.find_elements(By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]')) != 0:
-                video_url = driver.find_element(
-                    By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]').get_attribute('src')
-            elif len(driver.find_elements(By.XPATH, '//span[@class="js_tx_video_container"]/*[1]')) != 0:
-                iframe = driver.find_element(By.XPATH, '//span[@class="js_tx_video_container"]/*[1]').get_attribute(
-                    'src')
-                video_id = iframe.split('vid=')[-1].split('&')[0]
-                video_url = cls.get_tencent_video_url(log_type, crawler, video_id)
-            else:
-                video_url = 0
-            driver.quit()
-            return video_url
-        except Exception as e:
-            Common.logger(log_type, crawler).info(f'get_video_url异常:{e}\n')
+    def get_video_url(cls, article_url, env):
+        # try:
+        # 打印请求配置
+        ca = DesiredCapabilities.CHROME
+        ca["goog:loggingPrefs"] = {"performance": "ALL"}
+
+        # 不打开浏览器运行
+        chrome_options = webdriver.ChromeOptions()
+        chrome_options.add_argument("headless")
+        chrome_options.add_argument(
+            f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
+        chrome_options.add_argument("--no-sandbox")
+
+        # driver初始化
+        if env == "prod":
+            driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
+        else:
+            driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(
+                '/Users/wangkun/Downloads/chromedriver/chromedriver_v111/chromedriver'))
+
+        driver.implicitly_wait(10)
+        # Common.logger(log_type, crawler).info('打开文章链接')
+        driver.get(article_url)
+        time.sleep(1)
+
+        if len(driver.find_elements(By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]')) != 0:
+            video_url = driver.find_element(
+                By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]').get_attribute('src')
+        elif len(driver.find_elements(By.XPATH, '//span[@class="js_tx_video_container"]/*[1]')) != 0:
+            iframe = driver.find_element(By.XPATH, '//span[@class="js_tx_video_container"]/*[1]').get_attribute(
+                'src')
+            video_id = iframe.split('vid=')[-1].split('&')[0]
+            video_url = cls.get_tencent_video_url(video_id)
+        else:
+            video_url = 0
+        driver.quit()
+        return video_url
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).info(f'get_video_url异常:{e}\n')
 
     # 获取文章列表
     @classmethod
     def get_videoList(cls, log_type, crawler, user, index, oss_endpoint, env):
-        try:
-            while True:
-                token_dict = cls.get_token(log_type, crawler)
-                fakeid_dict = cls.get_fakeid(log_type, crawler, user, index)
-                url = "https://mp.weixin.qq.com/cgi-bin/appmsg?"
-                headers = {
-                    "accept": "*/*",
-                    "accept-encoding": "gzip, deflate, br",
-                    "accept-language": "zh-CN,zh;q=0.9",
-                    "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?"
-                               "t=media/appmsg_edit_v2&action=edit&isNew=1"
-                               "&type=77&createType=5&token=" + str(token_dict['token']) + "&lang=zh_CN",
-                    'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
-                    "sec-ch-ua-mobile": "?0",
-                    "sec-ch-ua-platform": '"Windows"',
-                    "sec-fetch-dest": "empty",
-                    "sec-fetch-mode": "cors",
-                    "sec-fetch-site": "same-origin",
-                    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
-                                  " (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
-                    "x-requested-with": "XMLHttpRequest",
-                    'cookie': token_dict['cookie'],
-                }
-                params = {
-                    "action": "list_ex",
-                    "begin": str(cls.begin),
-                    "count": "5",
-                    "fakeid": fakeid_dict['fakeid'],
-                    "type": "9",
-                    "query": "",
-                    "token": str(token_dict['token']),
-                    "lang": "zh_CN",
-                    "f": "json",
-                    "ajax": "1",
-                }
-                urllib3.disable_warnings()
-                # s = requests.session()
-                # # max_retries=3 重试3次
-                # s.mount('http://', HTTPAdapter(max_retries=3))
-                # s.mount('https://', HTTPAdapter(max_retries=3))
-                # r = s.get(url=url, headers=headers, params=params, verify=False, proxies=Common.tunnel_proxies(), timeout=5)
-                r = requests.get(url=url, headers=headers, params=params, verify=False)
-                r.close()
-                if r.json()["base_resp"]["err_msg"] == "invalid session":
-                    Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
-                    Common.logger(log_type, crawler).warning(
-                        f"token_1:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 过期啦\n")
-                    if 20 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, f"token_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
-                    time.sleep(60 * 10)
-                    continue
-                if r.json()["base_resp"]["err_msg"] == "freq control":
-                    Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
-                    Common.logger(log_type, crawler).warning(
-                        f"公众号_1:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
-                    if 20 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler,f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                    time.sleep(60 * 10)
-                    continue
-                if 'app_msg_list' not in r.json():
-                    Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
-                    Common.logger(log_type, crawler).warning(
-                        f"公众号_1:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
-                    if 20 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                    time.sleep(60 * 10)
-                    continue
-                if len(r.json()['app_msg_list']) == 0:
-                    Common.logger(log_type, crawler).info('没有更多视频了\n')
-                    return
-                else:
-                    cls.begin += 5
-                    app_msg_list = r.json()['app_msg_list']
-                    for article_url in app_msg_list:
-                        # title
-                        if 'title' in article_url:
-                            title = article_url['title'].replace('/', '').replace('\n', '') \
-                                .replace('.', '').replace('“', '').replace('”', '').replace(' ', '')\
-                                .replace('"', '').replace("'", "")
-                        else:
-                            title = 0
-
-                        # aid
-                        if 'aid' in article_url:
-                            aid = article_url['aid']
-                        else:
-                            aid = 0
-
-                        # create_time
-                        if 'create_time' in article_url:
-                            create_time = article_url['create_time']
-                        else:
-                            create_time = 0
-                        publish_time_stamp = int(create_time)
-                        publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
-
-                        avatar_url = fakeid_dict['head_url']
-
-                        # cover_url
-                        if 'cover' in article_url:
-                            cover_url = article_url['cover']
-                        else:
-                            cover_url = 0
-
-                        # article_url
-                        if 'link' in article_url:
-                            article_url = article_url['link']
-                        else:
-                            article_url = 0
-
-                        video_url = cls.get_video_url(log_type, crawler, article_url, env)
-
-                        video_dict = {
-                            'video_id': aid,
-                            'video_title': title,
-                            'publish_time_stamp': publish_time_stamp,
-                            'publish_time_str': publish_time_str,
-                            'user_name': user,
-                            'play_cnt': 0,
-                            'comment_cnt': 0,
-                            'like_cnt': 0,
-                            'share_cnt': 0,
-                            'user_id': fakeid_dict['fakeid'],
-                            'avatar_url': avatar_url,
-                            'cover_url': cover_url,
-                            'article_url': article_url,
-                            'video_url': video_url,
-                            'session': f'gongzhonghao-follow-{int(time.time())}'
-                        }
-                        for k, v in video_dict.items():
-                            Common.logger(log_type, crawler).info(f"{k}:{v}")
-                        if int(time.time()) - publish_time_stamp > 3600 * 24 * 3:
-                            Common.logger(log_type, crawler).info(f'发布时间{publish_time_str} > 3 天\n')
-                            cls.begin = 0
-                            return
-                        cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
-
-                    Common.logger(log_type, crawler).info('休眠 60 秒\n')
-                    time.sleep(60)
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"get_videoList异常:{e}\n")
+        # try:
+        while True:
+            token_dict = cls.get_token(log_type, crawler)
+            fakeid_dict = cls.get_fakeid(log_type, crawler, user, index)
+            url = "https://mp.weixin.qq.com/cgi-bin/appmsg?"
+            headers = {
+                "accept": "*/*",
+                "accept-encoding": "gzip, deflate, br",
+                "accept-language": "zh-CN,zh;q=0.9",
+                "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?"
+                           "t=media/appmsg_edit_v2&action=edit&isNew=1"
+                           "&type=77&createType=5&token=" + str(token_dict['token']) + "&lang=zh_CN",
+                'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
+                "sec-ch-ua-mobile": "?0",
+                "sec-ch-ua-platform": '"Windows"',
+                "sec-fetch-dest": "empty",
+                "sec-fetch-mode": "cors",
+                "sec-fetch-site": "same-origin",
+                "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
+                              " (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
+                "x-requested-with": "XMLHttpRequest",
+                'cookie': token_dict['cookie'],
+            }
+            params = {
+                "action": "list_ex",
+                "begin": str(cls.begin),
+                "count": "5",
+                "fakeid": fakeid_dict['fakeid'],
+                "type": "9",
+                "query": "",
+                "token": str(token_dict['token']),
+                "lang": "zh_CN",
+                "f": "json",
+                "ajax": "1",
+            }
+            urllib3.disable_warnings()
+            # s = requests.session()
+            # # max_retries=3 重试3次
+            # s.mount('http://', HTTPAdapter(max_retries=3))
+            # s.mount('https://', HTTPAdapter(max_retries=3))
+            # r = s.get(url=url, headers=headers, params=params, verify=False, proxies=Common.tunnel_proxies(), timeout=5)
+            r = requests.get(url=url, headers=headers, params=params, verify=False)
+            r.close()
+            if r.json()["base_resp"]["err_msg"] == "invalid session":
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
+                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(
+                    f"token_1:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 过期啦\n")
+                if 20 >= datetime.datetime.now().hour >= 10:
+                    Feishu.bot(log_type, crawler, f"token_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
+                time.sleep(60 * 10)
+                continue
+            if r.json()["base_resp"]["err_msg"] == "freq control":
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
+                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(
+                    f"公众号_1:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
+                if 20 >= datetime.datetime.now().hour >= 10:
+                    Feishu.bot(log_type, crawler,f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                time.sleep(60 * 10)
+                continue
+            if 'app_msg_list' not in r.json():
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
+                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(
+                    f"公众号_1:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
+                if 20 >= datetime.datetime.now().hour >= 10:
+                    Feishu.bot(log_type, crawler, f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                time.sleep(60 * 10)
+                continue
+            if len(r.json()['app_msg_list']) == 0:
+                Common.logger(log_type, crawler).info('没有更多视频了\n')
+                return
+            else:
+                cls.begin += 5
+                app_msg_list = r.json()['app_msg_list']
+                for article_url in app_msg_list:
+                    # title
+                    if 'title' in article_url:
+                        title = article_url['title'].replace('/', '').replace('\n', '') \
+                            .replace('.', '').replace('“', '').replace('”', '').replace(' ', '')\
+                            .replace('"', '').replace("'", "")
+                    else:
+                        title = 0
+
+                    # aid
+                    if 'aid' in article_url:
+                        aid = article_url['aid']
+                    else:
+                        aid = 0
+
+                    # create_time
+                    if 'create_time' in article_url:
+                        create_time = article_url['create_time']
+                    else:
+                        create_time = 0
+                    publish_time_stamp = int(create_time)
+                    publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
+
+                    avatar_url = fakeid_dict['head_url']
+
+                    # cover_url
+                    if 'cover' in article_url:
+                        cover_url = article_url['cover']
+                    else:
+                        cover_url = 0
+
+                    # article_url
+                    if 'link' in article_url:
+                        article_url = article_url['link']
+                    else:
+                        article_url = 0
+
+                    video_url = cls.get_video_url(article_url, env)
+
+                    video_dict = {
+                        'video_id': aid,
+                        'video_title': title,
+                        'publish_time_stamp': publish_time_stamp,
+                        'publish_time_str': publish_time_str,
+                        'user_name': user,
+                        'play_cnt': 0,
+                        'comment_cnt': 0,
+                        'like_cnt': 0,
+                        'share_cnt': 0,
+                        'user_id': fakeid_dict['fakeid'],
+                        'avatar_url': avatar_url,
+                        'cover_url': cover_url,
+                        'article_url': article_url,
+                        'video_url': video_url,
+                        'session': f'gongzhonghao-follow-{int(time.time())}'
+                    }
+                    for k, v in video_dict.items():
+                        Common.logger(log_type, crawler).info(f"{k}:{v}")
+                    if int(time.time()) - publish_time_stamp > 3600 * 24 * 3:
+                        Common.logger(log_type, crawler).info(f'发布时间{publish_time_str} > 3 天\n')
+                        cls.begin = 0
+                        return
+                    cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
+
+                Common.logger(log_type, crawler).info('休眠 60 秒\n')
+                time.sleep(60)
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).error(f"get_videoList异常:{e}\n")
 
     @classmethod
     def repeat_video(cls, log_type, crawler, video_id, env):
@@ -368,133 +368,133 @@ class GongzhonghaoFollow:
     # 下载/上传
     @classmethod
     def download_publish(cls, log_type, crawler, video_dict, oss_endpoint, env):
-        try:
-            if video_dict['article_url'] == 0 or video_dict['video_url'] == 0:
-                Common.logger(log_type, crawler).info("文章涉嫌违反相关法律法规和政策\n")
-            # 标题敏感词过滤
-            elif any(word if word in video_dict['video_title'] else False for word in
-                     filter_word(log_type, crawler, "公众号", env)) is True:
-                Common.logger(log_type, crawler).info("标题已中过滤词\n")
-            # 已下载判断
-            elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
-                Common.logger(log_type, crawler).info("视频已下载\n")
-            # 标题相似度
-            elif cls.title_like(log_type, crawler, video_dict['video_title'], env) is True:
-                Common.logger(log_type, crawler).info(f'标题相似度>=80%:{video_dict["video_title"]}\n')
+        # try:
+        if video_dict['article_url'] == 0 or video_dict['video_url'] == 0:
+            Common.logger(log_type, crawler).info("文章涉嫌违反相关法律法规和政策\n")
+        # 标题敏感词过滤
+        elif any(word if word in video_dict['video_title'] else False for word in
+                 filter_word(log_type, crawler, "公众号", env)) is True:
+            Common.logger(log_type, crawler).info("标题已中过滤词\n")
+        # 已下载判断
+        elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
+            Common.logger(log_type, crawler).info("视频已下载\n")
+        # 标题相似度
+        elif cls.title_like(log_type, crawler, video_dict['video_title'], env) is True:
+            Common.logger(log_type, crawler).info(f'标题相似度>=80%:{video_dict["video_title"]}\n')
+        else:
+            # 下载视频
+            Common.download_method(log_type=log_type, crawler=crawler, text="video",
+                                   title=video_dict["video_title"], url=video_dict["video_url"])
+            md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
+            # 获取视频时长
+            ffmpeg_dict = Common.ffmpeg(log_type, crawler,
+                                        f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
+            if ffmpeg_dict is None:
+                # 删除视频文件夹
+                shutil.rmtree(f"./{crawler}/videos/{md_title}")
+                Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
+                return
+            video_dict["video_width"] = ffmpeg_dict["width"]
+            video_dict["video_height"] = ffmpeg_dict["height"]
+            video_dict["duration"] = ffmpeg_dict["duration"]
+            video_size = ffmpeg_dict["size"]
+            Common.logger(log_type, crawler).info(f'video_width:{video_dict["video_width"]}')
+            Common.logger(log_type, crawler).info(f'video_height:{video_dict["video_height"]}')
+            Common.logger(log_type, crawler).info(f'duration:{video_dict["duration"]}')
+            Common.logger(log_type, crawler).info(f'video_size:{video_size}')
+            # 视频size=0,直接删除
+            if int(video_size) == 0 or cls.download_rule(video_dict) is False:
+                # 删除视频文件夹
+                shutil.rmtree(f"./{crawler}/videos/{md_title}")
+                Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
+                return
+            # 下载封面
+            Common.download_method(log_type=log_type, crawler=crawler, text="cover",
+                                   title=video_dict["video_title"], url=video_dict["cover_url"])
+            # 保存视频信息至 "./videos/{video_title}/info.txt"
+            Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
+
+            # 上传视频
+            Common.logger(log_type, crawler).info("开始上传视频...")
+            strategy = "定向爬虫策略"
+            our_video_id = Publish.upload_and_publish(log_type=log_type,
+                                                      crawler=crawler,
+                                                      strategy=strategy,
+                                                      our_uid="follow",
+                                                      oss_endpoint=oss_endpoint,
+                                                      env=env)
+            if env == 'prod':
+                our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{str(our_video_id)}/info"
             else:
-                # 下载视频
-                Common.download_method(log_type=log_type, crawler=crawler, text="video",
-                                       title=video_dict["video_title"], url=video_dict["video_url"])
-                md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
-                # 获取视频时长
-                ffmpeg_dict = Common.ffmpeg(log_type, crawler,
-                                            f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
-                if ffmpeg_dict is None:
-                    # 删除视频文件夹
-                    shutil.rmtree(f"./{crawler}/videos/{md_title}")
-                    Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
-                    return
-                video_dict["video_width"] = ffmpeg_dict["width"]
-                video_dict["video_height"] = ffmpeg_dict["height"]
-                video_dict["duration"] = ffmpeg_dict["duration"]
-                video_size = ffmpeg_dict["size"]
-                Common.logger(log_type, crawler).info(f'video_width:{video_dict["video_width"]}')
-                Common.logger(log_type, crawler).info(f'video_height:{video_dict["video_height"]}')
-                Common.logger(log_type, crawler).info(f'duration:{video_dict["duration"]}')
-                Common.logger(log_type, crawler).info(f'video_size:{video_size}')
-                # 视频size=0,直接删除
-                if int(video_size) == 0 or cls.download_rule(video_dict) is False:
-                    # 删除视频文件夹
-                    shutil.rmtree(f"./{crawler}/videos/{md_title}")
-                    Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
-                    return
-                # 下载封面
-                Common.download_method(log_type=log_type, crawler=crawler, text="cover",
-                                       title=video_dict["video_title"], url=video_dict["cover_url"])
-                # 保存视频信息至 "./videos/{video_title}/info.txt"
-                Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
-
-                # 上传视频
-                Common.logger(log_type, crawler).info("开始上传视频...")
-                strategy = "定向爬虫策略"
-                our_video_id = Publish.upload_and_publish(log_type=log_type,
-                                                          crawler=crawler,
-                                                          strategy=strategy,
-                                                          our_uid="follow",
-                                                          oss_endpoint=oss_endpoint,
-                                                          env=env)
-                if env == 'prod':
-                    our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{str(our_video_id)}/info"
-                else:
-                    our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{str(our_video_id)}/info"
-                Common.logger(log_type, crawler).info("视频上传完成")
-
-                if our_video_id is None:
-                    # 删除视频文件夹
-                    shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
-                    return
-
-                # 视频信息保存数据库
-                rule_dict = {
-                    "duration": {"min": 20, "max": 45 * 60},
-                    "publish_day": {"min": 3}
-                }
-
-                insert_sql = f""" insert into crawler_video(video_id,
-                                                            out_user_id,
-                                                            platform,
-                                                            strategy,
-                                                            out_video_id,
-                                                            video_title,
-                                                            cover_url,
-                                                            video_url,
-                                                            duration,
-                                                            publish_time,
-                                                            play_cnt,
-                                                            crawler_rule,
-                                                            width,
-                                                            height)
-                                                            values({our_video_id},
-                                                            "{video_dict['user_id']}",
-                                                            "{cls.platform}",
-                                                            "定向爬虫策略",
-                                                            "{video_dict['video_id']}",
-                                                            "{video_dict['video_title']}",
-                                                            "{video_dict['cover_url']}",
-                                                            "{video_dict['video_url']}",
-                                                            {int(video_dict['duration'])},
-                                                            "{video_dict['publish_time_str']}",
-                                                            {int(video_dict['play_cnt'])},
-                                                            '{json.dumps(rule_dict)}',
-                                                            {int(video_dict['video_width'])},
-                                                            {int(video_dict['video_height'])}) """
-                Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
-                MysqlHelper.update_values(log_type, crawler, insert_sql, env)
-                Common.logger(log_type, crawler).info('视频信息插入数据库成功!')
-
-                # 视频写入飞书
-                Feishu.insert_columns(log_type, crawler, "47e39d", "ROWS", 1, 2)
-                # 视频ID工作表,首行写入数据
-                upload_time = int(time.time())
-                values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
-                           "用户主页",
-                           video_dict['video_title'],
-                           video_dict['video_id'],
-                           our_video_link,
-                           int(video_dict['duration']),
-                           f"{video_dict['video_width']}*{video_dict['video_height']}",
-                           video_dict['publish_time_str'],
-                           video_dict['user_name'],
-                           video_dict['user_id'],
-                           video_dict['avatar_url'],
-                           video_dict['cover_url'],
-                           video_dict['article_url'],
-                           video_dict['video_url']]]
-                time.sleep(0.5)
-                Feishu.update_values(log_type, crawler, "47e39d", "F2:Z2", values)
-                Common.logger(log_type, crawler).info('视频下载/上传成功\n')
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"download_publish异常:{e}\n")
+                our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{str(our_video_id)}/info"
+            Common.logger(log_type, crawler).info("视频上传完成")
+
+            if our_video_id is None:
+                # 删除视频文件夹
+                shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
+                return
+
+            # 视频信息保存数据库
+            rule_dict = {
+                "duration": {"min": 20, "max": 45 * 60},
+                "publish_day": {"min": 3}
+            }
+
+            insert_sql = f""" insert into crawler_video(video_id,
+                                                        out_user_id,
+                                                        platform,
+                                                        strategy,
+                                                        out_video_id,
+                                                        video_title,
+                                                        cover_url,
+                                                        video_url,
+                                                        duration,
+                                                        publish_time,
+                                                        play_cnt,
+                                                        crawler_rule,
+                                                        width,
+                                                        height)
+                                                        values({our_video_id},
+                                                        "{video_dict['user_id']}",
+                                                        "{cls.platform}",
+                                                        "定向爬虫策略",
+                                                        "{video_dict['video_id']}",
+                                                        "{video_dict['video_title']}",
+                                                        "{video_dict['cover_url']}",
+                                                        "{video_dict['video_url']}",
+                                                        {int(video_dict['duration'])},
+                                                        "{video_dict['publish_time_str']}",
+                                                        {int(video_dict['play_cnt'])},
+                                                        '{json.dumps(rule_dict)}',
+                                                        {int(video_dict['video_width'])},
+                                                        {int(video_dict['video_height'])}) """
+            Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+            MysqlHelper.update_values(log_type, crawler, insert_sql, env)
+            Common.logger(log_type, crawler).info('视频信息插入数据库成功!')
+
+            # 视频写入飞书
+            Feishu.insert_columns(log_type, crawler, "47e39d", "ROWS", 1, 2)
+            # 视频ID工作表,首行写入数据
+            upload_time = int(time.time())
+            values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
+                       "用户主页",
+                       video_dict['video_title'],
+                       video_dict['video_id'],
+                       our_video_link,
+                       int(video_dict['duration']),
+                       f"{video_dict['video_width']}*{video_dict['video_height']}",
+                       video_dict['publish_time_str'],
+                       video_dict['user_name'],
+                       video_dict['user_id'],
+                       video_dict['avatar_url'],
+                       video_dict['cover_url'],
+                       video_dict['article_url'],
+                       video_dict['video_url']]]
+            time.sleep(0.5)
+            Feishu.update_values(log_type, crawler, "47e39d", "F2:Z2", values)
+            Common.logger(log_type, crawler).info('视频下载/上传成功\n')
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).error(f"download_publish异常:{e}\n")
 
     @classmethod
     def get_users(cls):

+ 406 - 406
gongzhonghao/gongzhonghao_follow/gongzhonghao_follow_2.py

@@ -66,299 +66,299 @@ class GongzhonghaoFollow2:
     @classmethod
     def get_token(cls, log_type, crawler):
         while True:
-            try:
-                sheet = Feishu.get_values_batch(log_type, crawler, "I4aeh3")
-                if sheet is None:
-                    time.sleep(1)
-                    continue
-                token = sheet[0][1]
-                cookie = sheet[1][1]
-                gzh_name = sheet[2][1]
-                gzh_time = sheet[3][1]
-                token_dict = {'token': token, 'cookie': cookie, 'gzh_name': gzh_name, 'gzh_time': gzh_time}
-                return token_dict
-            except Exception as e:
-                Common.logger(log_type, crawler).error(f"get_cookie_token异常:{e}\n")
+            # try:
+            sheet = Feishu.get_values_batch(log_type, crawler, "I4aeh3")
+            if sheet is None:
+                time.sleep(1)
+                continue
+            token = sheet[0][1]
+            cookie = sheet[1][1]
+            gzh_name = sheet[2][1]
+            gzh_time = sheet[3][1]
+            token_dict = {'token': token, 'cookie': cookie, 'gzh_name': gzh_name, 'gzh_time': gzh_time}
+            return token_dict
+            # except Exception as e:
+            #     Common.logger(log_type, crawler).error(f"get_cookie_token异常:{e}\n")
 
     # 获取用户 fakeid
     @classmethod
     def get_fakeid(cls, log_type, crawler, user, index):
-        try:
-            while True:
-                token_dict = cls.get_token(log_type, crawler)
-                url = "https://mp.weixin.qq.com/cgi-bin/searchbiz?"
-                headers = {
-                    "accept": "*/*",
-                    "accept-encoding": "gzip, deflate, br",
-                    "accept-language": "zh-CN,zh;q=0.9",
-                    "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?"
-                               "t=media/appmsg_edit_v2&action=edit&isNew=1"
-                               "&type=77&createType=5&token=1011071554&lang=zh_CN",
-                    'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
-                    "sec-ch-ua-mobile": "?0",
-                    "sec-ch-ua-platform": '"Windows"',
-                    "sec-fetch-dest": "empty",
-                    "sec-fetch-mode": "cors",
-                    "sec-fetch-site": "same-origin",
-                    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
-                                  " (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
-                    "x-requested-with": "XMLHttpRequest",
-                    'cookie': token_dict['cookie'],
-                }
-                params = {
-                    "action": "search_biz",
-                    "begin": "0",
-                    "count": "5",
-                    "query": str(user),
-                    "token": token_dict['token'],
-                    "lang": "zh_CN",
-                    "f": "json",
-                    "ajax": "1",
-                }
-                urllib3.disable_warnings()
-                # s = requests.session()
-                # # max_retries=3 重试3次
-                # s.mount('http://', HTTPAdapter(max_retries=3))
-                # s.mount('https://', HTTPAdapter(max_retries=3))
-                # r = s.get(url=url, headers=headers, params=params, verify=False, proxies=Common.tunnel_proxies(), timeout=5)
-                r = requests.get(url=url, headers=headers, params=params, verify=False)
-                r.close()
-                if r.json()["base_resp"]["err_msg"] == "invalid session":
-                    Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
-                    Common.logger(log_type, crawler).warning(
-                        f"公众号_2:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 过期啦\n")
-                    if 20 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, f"token_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
-                    time.sleep(60 * 10)
-                    continue
-                if r.json()["base_resp"]["err_msg"] == "freq control":
-                    Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
-                    Common.logger(log_type, crawler).warning(
-                        f"公众号_2:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
-                    if 20 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                    time.sleep(60 * 10)
-                    continue
-                if "list" not in r.json() or len(r.json()["list"]) == 0:
-                    Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
-                    Common.logger(log_type, crawler).warning(
-                        f"公众号_2:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
-                    if 20 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                    time.sleep(60 * 10)
-                    continue
-                fakeid = r.json()["list"][int(index) - 1]["fakeid"]
-                head_url = r.json()["list"][int(index) - 1]["round_head_img"]
-                fakeid_dict = {'fakeid': fakeid, 'head_url': head_url}
-                return fakeid_dict
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"get_fakeid异常:{e}\n")
+        # try:
+        while True:
+            token_dict = cls.get_token(log_type, crawler)
+            url = "https://mp.weixin.qq.com/cgi-bin/searchbiz?"
+            headers = {
+                "accept": "*/*",
+                "accept-encoding": "gzip, deflate, br",
+                "accept-language": "zh-CN,zh;q=0.9",
+                "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?"
+                           "t=media/appmsg_edit_v2&action=edit&isNew=1"
+                           "&type=77&createType=5&token=1011071554&lang=zh_CN",
+                'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
+                "sec-ch-ua-mobile": "?0",
+                "sec-ch-ua-platform": '"Windows"',
+                "sec-fetch-dest": "empty",
+                "sec-fetch-mode": "cors",
+                "sec-fetch-site": "same-origin",
+                "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
+                              " (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
+                "x-requested-with": "XMLHttpRequest",
+                'cookie': token_dict['cookie'],
+            }
+            params = {
+                "action": "search_biz",
+                "begin": "0",
+                "count": "5",
+                "query": str(user),
+                "token": token_dict['token'],
+                "lang": "zh_CN",
+                "f": "json",
+                "ajax": "1",
+            }
+            urllib3.disable_warnings()
+            # s = requests.session()
+            # # max_retries=3 重试3次
+            # s.mount('http://', HTTPAdapter(max_retries=3))
+            # s.mount('https://', HTTPAdapter(max_retries=3))
+            # r = s.get(url=url, headers=headers, params=params, verify=False, proxies=Common.tunnel_proxies(), timeout=5)
+            r = requests.get(url=url, headers=headers, params=params, verify=False)
+            r.close()
+            if r.json()["base_resp"]["err_msg"] == "invalid session":
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
+                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(
+                    f"公众号_2:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 过期啦\n")
+                if 20 >= datetime.datetime.now().hour >= 10:
+                    Feishu.bot(log_type, crawler, f"token_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
+                time.sleep(60 * 10)
+                continue
+            if r.json()["base_resp"]["err_msg"] == "freq control":
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
+                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(
+                    f"公众号_2:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
+                if 20 >= datetime.datetime.now().hour >= 10:
+                    Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                time.sleep(60 * 10)
+                continue
+            if "list" not in r.json() or len(r.json()["list"]) == 0:
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
+                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(
+                    f"公众号_2:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
+                if 20 >= datetime.datetime.now().hour >= 10:
+                    Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                time.sleep(60 * 10)
+                continue
+            fakeid = r.json()["list"][int(index) - 1]["fakeid"]
+            head_url = r.json()["list"][int(index) - 1]["round_head_img"]
+            fakeid_dict = {'fakeid': fakeid, 'head_url': head_url}
+            return fakeid_dict
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).error(f"get_fakeid异常:{e}\n")
 
     # 获取腾讯视频下载链接
     @classmethod
-    def get_tencent_video_url(cls, log_type, crawler, video_id):
-        try:
-            url = 'https://vv.video.qq.com/getinfo?vids=' + str(video_id) + '&platform=101001&charge=0&otype=json'
-            response = requests.get(url=url).text.replace('QZOutputJson=', '').replace('"};', '"}')
-            response = json.loads(response)
-            url = response['vl']['vi'][0]['ul']['ui'][0]['url']
-            fvkey = response['vl']['vi'][0]['fvkey']
-            video_url = url + str(video_id) + '.mp4?vkey=' + fvkey
-            return video_url
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"get_tencent_video_url异常:{e}\n")
+    def get_tencent_video_url(cls, video_id):
+        # try:
+        url = 'https://vv.video.qq.com/getinfo?vids=' + str(video_id) + '&platform=101001&charge=0&otype=json'
+        response = requests.get(url=url).text.replace('QZOutputJson=', '').replace('"};', '"}')
+        response = json.loads(response)
+        url = response['vl']['vi'][0]['ul']['ui'][0]['url']
+        fvkey = response['vl']['vi'][0]['fvkey']
+        video_url = url + str(video_id) + '.mp4?vkey=' + fvkey
+        return video_url
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).error(f"get_tencent_video_url异常:{e}\n")
 
     @classmethod
-    def get_video_url(cls, log_type, crawler, article_url, env):
-        try:
-            # 打印请求配置
-            ca = DesiredCapabilities.CHROME
-            ca["goog:loggingPrefs"] = {"performance": "ALL"}
-
-            # 不打开浏览器运行
-            chrome_options = webdriver.ChromeOptions()
-            chrome_options.add_argument("headless")
-            chrome_options.add_argument(
-                f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
-            chrome_options.add_argument("--no-sandbox")
-
-            # driver初始化
-            if env == "prod":
-                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
-            else:
-                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(
-                    '/Users/wangkun/Downloads/chromedriver/chromedriver_v111/chromedriver'))
-
-            driver.implicitly_wait(10)
-            # Common.logger(log_type, crawler).info('打开文章链接')
-            driver.get(article_url)
-            time.sleep(1)
-
-            if len(driver.find_elements(By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]')) != 0:
-                video_url = driver.find_element(
-                    By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]').get_attribute('src')
-            elif len(driver.find_elements(By.XPATH, '//span[@class="js_tx_video_container"]/*[1]')) != 0:
-                iframe = driver.find_element(By.XPATH, '//span[@class="js_tx_video_container"]/*[1]').get_attribute(
-                    'src')
-                video_id = iframe.split('vid=')[-1].split('&')[0]
-                video_url = cls.get_tencent_video_url(log_type, crawler, video_id)
-            else:
-                video_url = 0
-            driver.quit()
-            return video_url
-        except Exception as e:
-            Common.logger(log_type, crawler).info(f'get_video_url异常:{e}\n')
+    def get_video_url(cls, article_url, env):
+        # try:
+        # 打印请求配置
+        ca = DesiredCapabilities.CHROME
+        ca["goog:loggingPrefs"] = {"performance": "ALL"}
+
+        # 不打开浏览器运行
+        chrome_options = webdriver.ChromeOptions()
+        chrome_options.add_argument("headless")
+        chrome_options.add_argument(
+            f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
+        chrome_options.add_argument("--no-sandbox")
+
+        # driver初始化
+        if env == "prod":
+            driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
+        else:
+            driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(
+                '/Users/wangkun/Downloads/chromedriver/chromedriver_v111/chromedriver'))
+
+        driver.implicitly_wait(10)
+        # Common.logger(log_type, crawler).info('打开文章链接')
+        driver.get(article_url)
+        time.sleep(1)
+
+        if len(driver.find_elements(By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]')) != 0:
+            video_url = driver.find_element(
+                By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]').get_attribute('src')
+        elif len(driver.find_elements(By.XPATH, '//span[@class="js_tx_video_container"]/*[1]')) != 0:
+            iframe = driver.find_element(By.XPATH, '//span[@class="js_tx_video_container"]/*[1]').get_attribute(
+                'src')
+            video_id = iframe.split('vid=')[-1].split('&')[0]
+            video_url = cls.get_tencent_video_url(video_id)
+        else:
+            video_url = 0
+        driver.quit()
+        return video_url
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).info(f'get_video_url异常:{e}\n')
 
     # 获取文章列表
     @classmethod
     def get_videoList(cls, log_type, crawler, user, index, oss_endpoint, env):
-        try:
-            while True:
-                fakeid_dict = cls.get_fakeid(log_type, crawler, user, index)
-                token_dict = cls.get_token(log_type, crawler)
-                url = "https://mp.weixin.qq.com/cgi-bin/appmsg?"
-                headers = {
-                    "accept": "*/*",
-                    "accept-encoding": "gzip, deflate, br",
-                    "accept-language": "zh-CN,zh;q=0.9",
-                    "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?"
-                               "t=media/appmsg_edit_v2&action=edit&isNew=1"
-                               "&type=77&createType=5&token=" + str(token_dict['token']) + "&lang=zh_CN",
-                    'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
-                    "sec-ch-ua-mobile": "?0",
-                    "sec-ch-ua-platform": '"Windows"',
-                    "sec-fetch-dest": "empty",
-                    "sec-fetch-mode": "cors",
-                    "sec-fetch-site": "same-origin",
-                    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
-                                  " (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
-                    "x-requested-with": "XMLHttpRequest",
-                    'cookie': token_dict['cookie'],
-                }
-                params = {
-                    "action": "list_ex",
-                    "begin": str(cls.begin),
-                    "count": "5",
-                    "fakeid": fakeid_dict['fakeid'],
-                    "type": "9",
-                    "query": "",
-                    "token": str(token_dict['token']),
-                    "lang": "zh_CN",
-                    "f": "json",
-                    "ajax": "1",
-                }
-                urllib3.disable_warnings()
-                # s = requests.session()
-                # # max_retries=3 重试3次
-                # s.mount('http://', HTTPAdapter(max_retries=3))
-                # s.mount('https://', HTTPAdapter(max_retries=3))
-                # r = s.get(url=url, headers=headers, params=params, verify=False, proxies=Common.tunnel_proxies(), timeout=5)
-                r = requests.get(url=url, headers=headers, params=params, verify=False)
-                r.close()
-                if r.json()["base_resp"]["err_msg"] == "invalid session":
-                    Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}")
-                    Common.logger(log_type, crawler).warning(
-                        f"公众号_2:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 过期啦\n")
-                    if 20 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, f"token_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
-                    time.sleep(60 * 10)
-                    continue
-                if r.json()["base_resp"]["err_msg"] == "freq control":
-                    Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
-                    Common.logger(log_type, crawler).warning(
-                        f"公众号_2:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
-                    if 20 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                    time.sleep(60 * 10)
-                    continue
-                if 'app_msg_list' not in r.json():
-                    Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
-                    Common.logger(log_type, crawler).warning(
-                        f"公众号_2:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
-                    if 20 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                    time.sleep(60 * 10)
-                    continue
-                if len(r.json()['app_msg_list']) == 0:
-                    Common.logger(log_type, crawler).info('没有更多视频了\n')
-                    return
-                else:
-                    cls.begin += 5
-                    app_msg_list = r.json()['app_msg_list']
-                    for article_url in app_msg_list:
-                        # title
-                        if 'title' in article_url:
-                            title = article_url['title'].replace('/', '').replace('\n', '') \
-                                .replace('.', '').replace('“', '').replace('”', '').replace(' ', '')\
-                                .replace('"', '').replace("'", "")
-                        else:
-                            title = 0
-
-                        # aid
-                        if 'aid' in article_url:
-                            aid = article_url['aid']
-                        else:
-                            aid = 0
-
-                        # create_time
-                        if 'create_time' in article_url:
-                            create_time = article_url['create_time']
-                        else:
-                            create_time = 0
-                        publish_time_stamp = int(create_time)
-                        publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
-
-                        avatar_url = fakeid_dict['head_url']
-
-                        # cover_url
-                        if 'cover' in article_url:
-                            cover_url = article_url['cover']
-                        else:
-                            cover_url = 0
-
-                        # article_url
-                        if 'link' in article_url:
-                            article_url = article_url['link']
-                        else:
-                            article_url = 0
-
-                        video_url = cls.get_video_url(log_type, crawler, article_url, env)
-
-                        video_dict = {
-                            'video_id': aid,
-                            'video_title': title,
-                            'publish_time_stamp': publish_time_stamp,
-                            'publish_time_str': publish_time_str,
-                            'user_name': user,
-                            'play_cnt': 0,
-                            'comment_cnt': 0,
-                            'like_cnt': 0,
-                            'share_cnt': 0,
-                            'user_id': fakeid_dict['fakeid'],
-                            'avatar_url': avatar_url,
-                            'cover_url': cover_url,
-                            'article_url': article_url,
-                            'video_url': video_url,
-                            'session': f'gongzhonghao-follow-{int(time.time())}'
-                        }
-                        for k, v in video_dict.items():
-                            Common.logger(log_type, crawler).info(f"{k}:{v}")
-                        if int(time.time()) - publish_time_stamp >= 3600 * 24 * 3:
-                            Common.logger(log_type, crawler).info(f'发布时间{publish_time_str} > 3 天\n')
-                            cls.begin = 0
-                            return
-                        cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
-
-                    Common.logger(log_type, crawler).info('休眠 60 秒\n')
-                    time.sleep(60)
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"get_videoList异常:{e}\n")
+        # try:
+        while True:
+            fakeid_dict = cls.get_fakeid(log_type, crawler, user, index)
+            token_dict = cls.get_token(log_type, crawler)
+            url = "https://mp.weixin.qq.com/cgi-bin/appmsg?"
+            headers = {
+                "accept": "*/*",
+                "accept-encoding": "gzip, deflate, br",
+                "accept-language": "zh-CN,zh;q=0.9",
+                "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?"
+                           "t=media/appmsg_edit_v2&action=edit&isNew=1"
+                           "&type=77&createType=5&token=" + str(token_dict['token']) + "&lang=zh_CN",
+                'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
+                "sec-ch-ua-mobile": "?0",
+                "sec-ch-ua-platform": '"Windows"',
+                "sec-fetch-dest": "empty",
+                "sec-fetch-mode": "cors",
+                "sec-fetch-site": "same-origin",
+                "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
+                              " (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
+                "x-requested-with": "XMLHttpRequest",
+                'cookie': token_dict['cookie'],
+            }
+            params = {
+                "action": "list_ex",
+                "begin": str(cls.begin),
+                "count": "5",
+                "fakeid": fakeid_dict['fakeid'],
+                "type": "9",
+                "query": "",
+                "token": str(token_dict['token']),
+                "lang": "zh_CN",
+                "f": "json",
+                "ajax": "1",
+            }
+            urllib3.disable_warnings()
+            # s = requests.session()
+            # # max_retries=3 重试3次
+            # s.mount('http://', HTTPAdapter(max_retries=3))
+            # s.mount('https://', HTTPAdapter(max_retries=3))
+            # r = s.get(url=url, headers=headers, params=params, verify=False, proxies=Common.tunnel_proxies(), timeout=5)
+            r = requests.get(url=url, headers=headers, params=params, verify=False)
+            r.close()
+            if r.json()["base_resp"]["err_msg"] == "invalid session":
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
+                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}")
+                Common.logger(log_type, crawler).warning(
+                    f"公众号_2:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 过期啦\n")
+                if 20 >= datetime.datetime.now().hour >= 10:
+                    Feishu.bot(log_type, crawler, f"token_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
+                time.sleep(60 * 10)
+                continue
+            if r.json()["base_resp"]["err_msg"] == "freq control":
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
+                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(
+                    f"公众号_2:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
+                if 20 >= datetime.datetime.now().hour >= 10:
+                    Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                time.sleep(60 * 10)
+                continue
+            if 'app_msg_list' not in r.json():
+                Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
+                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(
+                    f"公众号_2:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
+                if 20 >= datetime.datetime.now().hour >= 10:
+                    Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                time.sleep(60 * 10)
+                continue
+            if len(r.json()['app_msg_list']) == 0:
+                Common.logger(log_type, crawler).info('没有更多视频了\n')
+                return
+            else:
+                cls.begin += 5
+                app_msg_list = r.json()['app_msg_list']
+                for article_url in app_msg_list:
+                    # title
+                    if 'title' in article_url:
+                        title = article_url['title'].replace('/', '').replace('\n', '') \
+                            .replace('.', '').replace('“', '').replace('”', '').replace(' ', '')\
+                            .replace('"', '').replace("'", "")
+                    else:
+                        title = 0
+
+                    # aid
+                    if 'aid' in article_url:
+                        aid = article_url['aid']
+                    else:
+                        aid = 0
+
+                    # create_time
+                    if 'create_time' in article_url:
+                        create_time = article_url['create_time']
+                    else:
+                        create_time = 0
+                    publish_time_stamp = int(create_time)
+                    publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
+
+                    avatar_url = fakeid_dict['head_url']
+
+                    # cover_url
+                    if 'cover' in article_url:
+                        cover_url = article_url['cover']
+                    else:
+                        cover_url = 0
+
+                    # article_url
+                    if 'link' in article_url:
+                        article_url = article_url['link']
+                    else:
+                        article_url = 0
+
+                    video_url = cls.get_video_url(article_url, env)
+
+                    video_dict = {
+                        'video_id': aid,
+                        'video_title': title,
+                        'publish_time_stamp': publish_time_stamp,
+                        'publish_time_str': publish_time_str,
+                        'user_name': user,
+                        'play_cnt': 0,
+                        'comment_cnt': 0,
+                        'like_cnt': 0,
+                        'share_cnt': 0,
+                        'user_id': fakeid_dict['fakeid'],
+                        'avatar_url': avatar_url,
+                        'cover_url': cover_url,
+                        'article_url': article_url,
+                        'video_url': video_url,
+                        'session': f'gongzhonghao-follow-{int(time.time())}'
+                    }
+                    for k, v in video_dict.items():
+                        Common.logger(log_type, crawler).info(f"{k}:{v}")
+                    if int(time.time()) - publish_time_stamp >= 3600 * 24 * 3:
+                        Common.logger(log_type, crawler).info(f'发布时间{publish_time_str} > 3 天\n')
+                        cls.begin = 0
+                        return
+                    cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
+
+                Common.logger(log_type, crawler).info('休眠 60 秒\n')
+                time.sleep(60)
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).error(f"get_videoList异常:{e}\n")
 
     @classmethod
     def repeat_video(cls, log_type, crawler, video_id, env):
@@ -369,133 +369,133 @@ class GongzhonghaoFollow2:
     # 下载/上传
     @classmethod
     def download_publish(cls, log_type, crawler, video_dict, oss_endpoint, env):
-        try:
-            if video_dict['article_url'] == 0 or video_dict['video_url'] == 0:
-                Common.logger(log_type, crawler).info("文章涉嫌违反相关法律法规和政策\n")
-            # 标题敏感词过滤
-            elif any(word if word in video_dict['video_title'] else False for word in
-                     filter_word(log_type, crawler, "公众号", env)) is True:
-                Common.logger(log_type, crawler).info("标题已中过滤词\n")
-            # 已下载判断
-            elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
-                Common.logger(log_type, crawler).info("视频已下载\n")
-            # 标题相似度
-            elif cls.title_like(log_type, crawler, video_dict['video_title'], env) is True:
-                Common.logger(log_type, crawler).info(f'标题相似度>=80%:{video_dict["video_title"]}\n')
+        # try:
+        if video_dict['article_url'] == 0 or video_dict['video_url'] == 0:
+            Common.logger(log_type, crawler).info("文章涉嫌违反相关法律法规和政策\n")
+        # 标题敏感词过滤
+        elif any(word if word in video_dict['video_title'] else False for word in
+                 filter_word(log_type, crawler, "公众号", env)) is True:
+            Common.logger(log_type, crawler).info("标题已中过滤词\n")
+        # 已下载判断
+        elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
+            Common.logger(log_type, crawler).info("视频已下载\n")
+        # 标题相似度
+        elif cls.title_like(log_type, crawler, video_dict['video_title'], env) is True:
+            Common.logger(log_type, crawler).info(f'标题相似度>=80%:{video_dict["video_title"]}\n')
+        else:
+            # 下载视频
+            Common.download_method(log_type=log_type, crawler=crawler, text="video",
+                                   title=video_dict["video_title"], url=video_dict["video_url"])
+            md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
+            # 获取视频时长
+            ffmpeg_dict = Common.ffmpeg(log_type, crawler,
+                                        f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
+            if ffmpeg_dict is None:
+                # 删除视频文件夹
+                shutil.rmtree(f"./{crawler}/videos/{md_title}")
+                Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
+                return
+            video_dict["video_width"] = ffmpeg_dict["width"]
+            video_dict["video_height"] = ffmpeg_dict["height"]
+            video_dict["duration"] = ffmpeg_dict["duration"]
+            video_size = ffmpeg_dict["size"]
+            Common.logger(log_type, crawler).info(f'video_width:{video_dict["video_width"]}')
+            Common.logger(log_type, crawler).info(f'video_height:{video_dict["video_height"]}')
+            Common.logger(log_type, crawler).info(f'duration:{video_dict["duration"]}')
+            Common.logger(log_type, crawler).info(f'video_size:{video_size}')
+            # 视频size=0,直接删除
+            if int(video_size) == 0 or cls.download_rule(video_dict) is False:
+                # 删除视频文件夹
+                shutil.rmtree(f"./{crawler}/videos/{md_title}")
+                Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
+                return
+            # 下载封面
+            Common.download_method(log_type=log_type, crawler=crawler, text="cover",
+                                   title=video_dict["video_title"], url=video_dict["cover_url"])
+            # 保存视频信息至 "./videos/{video_title}/info.txt"
+            Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
+
+            # 上传视频
+            Common.logger(log_type, crawler).info("开始上传视频...")
+            strategy = "定向爬虫策略"
+            our_video_id = Publish.upload_and_publish(log_type=log_type,
+                                                      crawler=crawler,
+                                                      strategy=strategy,
+                                                      our_uid="follow",
+                                                      oss_endpoint=oss_endpoint,
+                                                      env=env)
+            if env == 'prod':
+                our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{str(our_video_id)}/info"
             else:
-                # 下载视频
-                Common.download_method(log_type=log_type, crawler=crawler, text="video",
-                                       title=video_dict["video_title"], url=video_dict["video_url"])
-                md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
-                # 获取视频时长
-                ffmpeg_dict = Common.ffmpeg(log_type, crawler,
-                                            f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
-                if ffmpeg_dict is None:
-                    # 删除视频文件夹
-                    shutil.rmtree(f"./{crawler}/videos/{md_title}")
-                    Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
-                    return
-                video_dict["video_width"] = ffmpeg_dict["width"]
-                video_dict["video_height"] = ffmpeg_dict["height"]
-                video_dict["duration"] = ffmpeg_dict["duration"]
-                video_size = ffmpeg_dict["size"]
-                Common.logger(log_type, crawler).info(f'video_width:{video_dict["video_width"]}')
-                Common.logger(log_type, crawler).info(f'video_height:{video_dict["video_height"]}')
-                Common.logger(log_type, crawler).info(f'duration:{video_dict["duration"]}')
-                Common.logger(log_type, crawler).info(f'video_size:{video_size}')
-                # 视频size=0,直接删除
-                if int(video_size) == 0 or cls.download_rule(video_dict) is False:
-                    # 删除视频文件夹
-                    shutil.rmtree(f"./{crawler}/videos/{md_title}")
-                    Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
-                    return
-                # 下载封面
-                Common.download_method(log_type=log_type, crawler=crawler, text="cover",
-                                       title=video_dict["video_title"], url=video_dict["cover_url"])
-                # 保存视频信息至 "./videos/{video_title}/info.txt"
-                Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
-
-                # 上传视频
-                Common.logger(log_type, crawler).info("开始上传视频...")
-                strategy = "定向爬虫策略"
-                our_video_id = Publish.upload_and_publish(log_type=log_type,
-                                                          crawler=crawler,
-                                                          strategy=strategy,
-                                                          our_uid="follow",
-                                                          oss_endpoint=oss_endpoint,
-                                                          env=env)
-                if env == 'prod':
-                    our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{str(our_video_id)}/info"
-                else:
-                    our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{str(our_video_id)}/info"
-                Common.logger(log_type, crawler).info("视频上传完成")
-
-                if our_video_id is None:
-                    # 删除视频文件夹
-                    shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
-                    return
-
-                # 视频信息保存数据库
-                rule_dict = {
-                    "duration": {"min": 20, "max": 45 * 60},
-                    "publish_day": {"min": 3}
-                }
-
-                insert_sql = f""" insert into crawler_video(video_id,
-                                                            out_user_id,
-                                                            platform,
-                                                            strategy,
-                                                            out_video_id,
-                                                            video_title,
-                                                            cover_url,
-                                                            video_url,
-                                                            duration,
-                                                            publish_time,
-                                                            play_cnt,
-                                                            crawler_rule,
-                                                            width,
-                                                            height)
-                                                            values({our_video_id},
-                                                            "{video_dict['user_id']}",
-                                                            "{cls.platform}",
-                                                            "定向爬虫策略",
-                                                            "{video_dict['video_id']}",
-                                                            "{video_dict['video_title']}",
-                                                            "{video_dict['cover_url']}",
-                                                            "{video_dict['video_url']}",
-                                                            {int(video_dict['duration'])},
-                                                            "{video_dict['publish_time_str']}",
-                                                            {int(video_dict['play_cnt'])},
-                                                            '{json.dumps(rule_dict)}',
-                                                            {int(video_dict['video_width'])},
-                                                            {int(video_dict['video_height'])}) """
-                Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
-                MysqlHelper.update_values(log_type, crawler, insert_sql, env)
-                Common.logger(log_type, crawler).info('视频信息插入数据库成功!')
-
-                # 视频写入飞书
-                Feishu.insert_columns(log_type, crawler, "47e39d", "ROWS", 1, 2)
-                # 视频ID工作表,首行写入数据
-                upload_time = int(time.time())
-                values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
-                           "用户主页",
-                           video_dict['video_title'],
-                           video_dict['video_id'],
-                           our_video_link,
-                           int(video_dict['duration']),
-                           f"{video_dict['video_width']}*{video_dict['video_height']}",
-                           video_dict['publish_time_str'],
-                           video_dict['user_name'],
-                           video_dict['user_id'],
-                           video_dict['avatar_url'],
-                           video_dict['cover_url'],
-                           video_dict['article_url'],
-                           video_dict['video_url']]]
-                time.sleep(0.5)
-                Feishu.update_values(log_type, crawler, "47e39d", "F2:Z2", values)
-                Common.logger(log_type, crawler).info('视频下载/上传成功\n')
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"download_publish异常:{e}\n")
+                our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{str(our_video_id)}/info"
+            Common.logger(log_type, crawler).info("视频上传完成")
+
+            if our_video_id is None:
+                # 删除视频文件夹
+                shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
+                return
+
+            # 视频信息保存数据库
+            rule_dict = {
+                "duration": {"min": 20, "max": 45 * 60},
+                "publish_day": {"min": 3}
+            }
+
+            insert_sql = f""" insert into crawler_video(video_id,
+                                                        out_user_id,
+                                                        platform,
+                                                        strategy,
+                                                        out_video_id,
+                                                        video_title,
+                                                        cover_url,
+                                                        video_url,
+                                                        duration,
+                                                        publish_time,
+                                                        play_cnt,
+                                                        crawler_rule,
+                                                        width,
+                                                        height)
+                                                        values({our_video_id},
+                                                        "{video_dict['user_id']}",
+                                                        "{cls.platform}",
+                                                        "定向爬虫策略",
+                                                        "{video_dict['video_id']}",
+                                                        "{video_dict['video_title']}",
+                                                        "{video_dict['cover_url']}",
+                                                        "{video_dict['video_url']}",
+                                                        {int(video_dict['duration'])},
+                                                        "{video_dict['publish_time_str']}",
+                                                        {int(video_dict['play_cnt'])},
+                                                        '{json.dumps(rule_dict)}',
+                                                        {int(video_dict['video_width'])},
+                                                        {int(video_dict['video_height'])}) """
+            Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+            MysqlHelper.update_values(log_type, crawler, insert_sql, env)
+            Common.logger(log_type, crawler).info('视频信息插入数据库成功!')
+
+            # 视频写入飞书
+            Feishu.insert_columns(log_type, crawler, "47e39d", "ROWS", 1, 2)
+            # 视频ID工作表,首行写入数据
+            upload_time = int(time.time())
+            values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
+                       "用户主页",
+                       video_dict['video_title'],
+                       video_dict['video_id'],
+                       our_video_link,
+                       int(video_dict['duration']),
+                       f"{video_dict['video_width']}*{video_dict['video_height']}",
+                       video_dict['publish_time_str'],
+                       video_dict['user_name'],
+                       video_dict['user_id'],
+                       video_dict['avatar_url'],
+                       video_dict['cover_url'],
+                       video_dict['article_url'],
+                       video_dict['video_url']]]
+            time.sleep(0.5)
+            Feishu.update_values(log_type, crawler, "47e39d", "F2:Z2", values)
+            Common.logger(log_type, crawler).info('视频下载/上传成功\n')
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).error(f"download_publish异常:{e}\n")
 
     @classmethod
     def get_users(cls):

+ 406 - 406
gongzhonghao/gongzhonghao_follow/gongzhonghao_follow_3.py

@@ -66,299 +66,299 @@ class GongzhonghaoFollow3:
     @classmethod
     def get_token(cls, log_type, crawler):
         while True:
-            try:
-                sheet = Feishu.get_values_batch(log_type, crawler, "l1VZki")
-                if sheet is None:
-                    time.sleep(1)
-                    continue
-                token = sheet[0][1]
-                cookie = sheet[1][1]
-                gzh_name = sheet[2][1]
-                gzh_time = sheet[3][1]
-                token_dict = {'token': token, 'cookie': cookie, 'gzh_name': gzh_name, 'gzh_time': gzh_time}
-                return token_dict
-            except Exception as e:
-                Common.logger(log_type, crawler).error(f"get_cookie_token异常:{e}\n")
+            # try:
+            sheet = Feishu.get_values_batch(log_type, crawler, "l1VZki")
+            if sheet is None:
+                time.sleep(1)
+                continue
+            token = sheet[0][1]
+            cookie = sheet[1][1]
+            gzh_name = sheet[2][1]
+            gzh_time = sheet[3][1]
+            token_dict = {'token': token, 'cookie': cookie, 'gzh_name': gzh_name, 'gzh_time': gzh_time}
+            return token_dict
+            # except Exception as e:
+            #     Common.logger(log_type, crawler).error(f"get_cookie_token异常:{e}\n")
 
     # 获取用户 fakeid
     @classmethod
     def get_fakeid(cls, log_type, crawler, user, index):
-        try:
-            while True:
-                token_dict = cls.get_token(log_type, crawler)
-                url = "https://mp.weixin.qq.com/cgi-bin/searchbiz?"
-                headers = {
-                    "accept": "*/*",
-                    "accept-encoding": "gzip, deflate, br",
-                    "accept-language": "zh-CN,zh;q=0.9",
-                    "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?"
-                               "t=media/appmsg_edit_v2&action=edit&isNew=1"
-                               "&type=77&createType=5&token=1011071554&lang=zh_CN",
-                    'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
-                    "sec-ch-ua-mobile": "?0",
-                    "sec-ch-ua-platform": '"Windows"',
-                    "sec-fetch-dest": "empty",
-                    "sec-fetch-mode": "cors",
-                    "sec-fetch-site": "same-origin",
-                    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
-                                  " (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
-                    "x-requested-with": "XMLHttpRequest",
-                    'cookie': token_dict['cookie'],
-                }
-                params = {
-                    "action": "search_biz",
-                    "begin": "0",
-                    "count": "5",
-                    "query": str(user),
-                    "token": token_dict['token'],
-                    "lang": "zh_CN",
-                    "f": "json",
-                    "ajax": "1",
-                }
-                urllib3.disable_warnings()
-                # s = requests.session()
-                # # max_retries=3 重试3次
-                # s.mount('http://', HTTPAdapter(max_retries=3))
-                # s.mount('https://', HTTPAdapter(max_retries=3))
-                # r = s.get(url=url, headers=headers, params=params, verify=False, proxies=Common.tunnel_proxies(), timeout=5)
-                r = requests.get(url=url, headers=headers, params=params, verify=False)
-                r.close()
-                if r.json()["base_resp"]["err_msg"] == "invalid session":
-                    Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
-                    Common.logger(log_type, crawler).warning(
-                        f"公众号_3:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 过期啦\n")
-                    if 20 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, f"token_3:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
-                    time.sleep(60 * 10)
-                    continue
-                if r.json()["base_resp"]["err_msg"] == "freq control":
-                    Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
-                    Common.logger(log_type, crawler).warning(
-                        f"公众号_3:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
-                    if 20 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, f"公众号_3:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                    time.sleep(60 * 10)
-                    continue
-                if "list" not in r.json() or len(r.json()["list"]) == 0:
-                    Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
-                    Common.logger(log_type, crawler).warning(
-                        f"公众号_3:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
-                    if 20 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, f"公众号_3:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                    time.sleep(60 * 10)
-                    continue
-                fakeid = r.json()["list"][int(index) - 1]["fakeid"]
-                head_url = r.json()["list"][int(index) - 1]["round_head_img"]
-                fakeid_dict = {'fakeid': fakeid, 'head_url': head_url}
-                return fakeid_dict
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"get_fakeid异常:{e}\n")
+        # try:
+        while True:
+            token_dict = cls.get_token(log_type, crawler)
+            url = "https://mp.weixin.qq.com/cgi-bin/searchbiz?"
+            headers = {
+                "accept": "*/*",
+                "accept-encoding": "gzip, deflate, br",
+                "accept-language": "zh-CN,zh;q=0.9",
+                "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?"
+                           "t=media/appmsg_edit_v2&action=edit&isNew=1"
+                           "&type=77&createType=5&token=1011071554&lang=zh_CN",
+                'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
+                "sec-ch-ua-mobile": "?0",
+                "sec-ch-ua-platform": '"Windows"',
+                "sec-fetch-dest": "empty",
+                "sec-fetch-mode": "cors",
+                "sec-fetch-site": "same-origin",
+                "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
+                              " (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
+                "x-requested-with": "XMLHttpRequest",
+                'cookie': token_dict['cookie'],
+            }
+            params = {
+                "action": "search_biz",
+                "begin": "0",
+                "count": "5",
+                "query": str(user),
+                "token": token_dict['token'],
+                "lang": "zh_CN",
+                "f": "json",
+                "ajax": "1",
+            }
+            urllib3.disable_warnings()
+            # s = requests.session()
+            # # max_retries=3 重试3次
+            # s.mount('http://', HTTPAdapter(max_retries=3))
+            # s.mount('https://', HTTPAdapter(max_retries=3))
+            # r = s.get(url=url, headers=headers, params=params, verify=False, proxies=Common.tunnel_proxies(), timeout=5)
+            r = requests.get(url=url, headers=headers, params=params, verify=False)
+            r.close()
+            if r.json()["base_resp"]["err_msg"] == "invalid session":
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
+                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(
+                    f"公众号_3:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 过期啦\n")
+                if 20 >= datetime.datetime.now().hour >= 10:
+                    Feishu.bot(log_type, crawler, f"token_3:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
+                time.sleep(60 * 10)
+                continue
+            if r.json()["base_resp"]["err_msg"] == "freq control":
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
+                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(
+                    f"公众号_3:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
+                if 20 >= datetime.datetime.now().hour >= 10:
+                    Feishu.bot(log_type, crawler, f"公众号_3:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                time.sleep(60 * 10)
+                continue
+            if "list" not in r.json() or len(r.json()["list"]) == 0:
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
+                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(
+                    f"公众号_3:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
+                if 20 >= datetime.datetime.now().hour >= 10:
+                    Feishu.bot(log_type, crawler, f"公众号_3:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                time.sleep(60 * 10)
+                continue
+            fakeid = r.json()["list"][int(index) - 1]["fakeid"]
+            head_url = r.json()["list"][int(index) - 1]["round_head_img"]
+            fakeid_dict = {'fakeid': fakeid, 'head_url': head_url}
+            return fakeid_dict
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).error(f"get_fakeid异常:{e}\n")
 
     # 获取腾讯视频下载链接
     @classmethod
-    def get_tencent_video_url(cls, log_type, crawler, video_id):
-        try:
-            url = 'https://vv.video.qq.com/getinfo?vids=' + str(video_id) + '&platform=101001&charge=0&otype=json'
-            response = requests.get(url=url).text.replace('QZOutputJson=', '').replace('"};', '"}')
-            response = json.loads(response)
-            url = response['vl']['vi'][0]['ul']['ui'][0]['url']
-            fvkey = response['vl']['vi'][0]['fvkey']
-            video_url = url + str(video_id) + '.mp4?vkey=' + fvkey
-            return video_url
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"get_tencent_video_url异常:{e}\n")
+    def get_tencent_video_url(cls, video_id):
+        # try:
+        url = 'https://vv.video.qq.com/getinfo?vids=' + str(video_id) + '&platform=101001&charge=0&otype=json'
+        response = requests.get(url=url).text.replace('QZOutputJson=', '').replace('"};', '"}')
+        response = json.loads(response)
+        url = response['vl']['vi'][0]['ul']['ui'][0]['url']
+        fvkey = response['vl']['vi'][0]['fvkey']
+        video_url = url + str(video_id) + '.mp4?vkey=' + fvkey
+        return video_url
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).error(f"get_tencent_video_url异常:{e}\n")
 
     @classmethod
-    def get_video_url(cls, log_type, crawler, article_url, env):
-        try:
-            # 打印请求配置
-            ca = DesiredCapabilities.CHROME
-            ca["goog:loggingPrefs"] = {"performance": "ALL"}
-
-            # 不打开浏览器运行
-            chrome_options = webdriver.ChromeOptions()
-            chrome_options.add_argument("headless")
-            chrome_options.add_argument(
-                f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
-            chrome_options.add_argument("--no-sandbox")
-
-            # driver初始化
-            if env == "prod":
-                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
-            else:
-                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(
-                    '/Users/wangkun/Downloads/chromedriver/chromedriver_v111/chromedriver'))
-
-            driver.implicitly_wait(10)
-            # Common.logger(log_type, crawler).info('打开文章链接')
-            driver.get(article_url)
-            time.sleep(1)
-
-            if len(driver.find_elements(By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]')) != 0:
-                video_url = driver.find_element(
-                    By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]').get_attribute('src')
-            elif len(driver.find_elements(By.XPATH, '//span[@class="js_tx_video_container"]/*[1]')) != 0:
-                iframe = driver.find_element(By.XPATH, '//span[@class="js_tx_video_container"]/*[1]').get_attribute(
-                    'src')
-                video_id = iframe.split('vid=')[-1].split('&')[0]
-                video_url = cls.get_tencent_video_url(log_type, crawler, video_id)
-            else:
-                video_url = 0
-            driver.quit()
-            return video_url
-        except Exception as e:
-            Common.logger(log_type, crawler).info(f'get_video_url异常:{e}\n')
+    def get_video_url(cls, article_url, env):
+        # try:
+        # 打印请求配置
+        ca = DesiredCapabilities.CHROME
+        ca["goog:loggingPrefs"] = {"performance": "ALL"}
+
+        # 不打开浏览器运行
+        chrome_options = webdriver.ChromeOptions()
+        chrome_options.add_argument("headless")
+        chrome_options.add_argument(
+            f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
+        chrome_options.add_argument("--no-sandbox")
+
+        # driver初始化
+        if env == "prod":
+            driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
+        else:
+            driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(
+                '/Users/wangkun/Downloads/chromedriver/chromedriver_v111/chromedriver'))
+
+        driver.implicitly_wait(10)
+        # Common.logger(log_type, crawler).info('打开文章链接')
+        driver.get(article_url)
+        time.sleep(1)
+
+        if len(driver.find_elements(By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]')) != 0:
+            video_url = driver.find_element(
+                By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]').get_attribute('src')
+        elif len(driver.find_elements(By.XPATH, '//span[@class="js_tx_video_container"]/*[1]')) != 0:
+            iframe = driver.find_element(By.XPATH, '//span[@class="js_tx_video_container"]/*[1]').get_attribute(
+                'src')
+            video_id = iframe.split('vid=')[-1].split('&')[0]
+            video_url = cls.get_tencent_video_url(video_id)
+        else:
+            video_url = 0
+        driver.quit()
+        return video_url
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).info(f'get_video_url异常:{e}\n')
 
     # 获取文章列表
     @classmethod
     def get_videoList(cls, log_type, crawler, user, index, oss_endpoint, env):
-        try:
-            while True:
-                fakeid_dict = cls.get_fakeid(log_type, crawler, user, index)
-                token_dict = cls.get_token(log_type, crawler)
-                url = "https://mp.weixin.qq.com/cgi-bin/appmsg?"
-                headers = {
-                    "accept": "*/*",
-                    "accept-encoding": "gzip, deflate, br",
-                    "accept-language": "zh-CN,zh;q=0.9",
-                    "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?"
-                               "t=media/appmsg_edit_v2&action=edit&isNew=1"
-                               "&type=77&createType=5&token=" + str(token_dict['token']) + "&lang=zh_CN",
-                    'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
-                    "sec-ch-ua-mobile": "?0",
-                    "sec-ch-ua-platform": '"Windows"',
-                    "sec-fetch-dest": "empty",
-                    "sec-fetch-mode": "cors",
-                    "sec-fetch-site": "same-origin",
-                    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
-                                  " (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
-                    "x-requested-with": "XMLHttpRequest",
-                    'cookie': token_dict['cookie'],
-                }
-                params = {
-                    "action": "list_ex",
-                    "begin": str(cls.begin),
-                    "count": "5",
-                    "fakeid": fakeid_dict['fakeid'],
-                    "type": "9",
-                    "query": "",
-                    "token": str(token_dict['token']),
-                    "lang": "zh_CN",
-                    "f": "json",
-                    "ajax": "1",
-                }
-                urllib3.disable_warnings()
-                # s = requests.session()
-                # # max_retries=3 重试3次
-                # s.mount('http://', HTTPAdapter(max_retries=3))
-                # s.mount('https://', HTTPAdapter(max_retries=3))
-                # r = s.get(url=url, headers=headers, params=params, verify=False, proxies=Common.tunnel_proxies(), timeout=5)
-                r = requests.get(url=url, headers=headers, params=params, verify=False)
-                r.close()
-                if r.json()["base_resp"]["err_msg"] == "invalid session":
-                    Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
-                    Common.logger(log_type, crawler).warning(
-                        f"公众号_3:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 过期啦\n")
-                    if 20 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, f"token_3:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
-                    time.sleep(60 * 10)
-                    continue
-                if r.json()["base_resp"]["err_msg"] == "freq control":
-                    Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
-                    Common.logger(log_type, crawler).warning(
-                        f"公众号_3:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
-                    if 20 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, f"公众号_3:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                    time.sleep(60 * 10)
-                    continue
-                if 'app_msg_list' not in r.json():
-                    Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
-                    Common.logger(log_type, crawler).warning(
-                        f"公众号_3:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
-                    if 20 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, f"公众号_3:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                    time.sleep(60 * 10)
-                    continue
-                if len(r.json()['app_msg_list']) == 0:
-                    Common.logger(log_type, crawler).info('没有更多视频了\n')
-                    return
-                else:
-                    cls.begin += 5
-                    app_msg_list = r.json()['app_msg_list']
-                    for article_url in app_msg_list:
-                        # title
-                        if 'title' in article_url:
-                            title = article_url['title'].replace('/', '').replace('\n', '') \
-                                .replace('.', '').replace('“', '').replace('”', '').replace(' ', '')\
-                                .replace('"', '').replace("'", "")
-                        else:
-                            title = 0
-
-                        # aid
-                        if 'aid' in article_url:
-                            aid = article_url['aid']
-                        else:
-                            aid = 0
-
-                        # create_time
-                        if 'create_time' in article_url:
-                            create_time = article_url['create_time']
-                        else:
-                            create_time = 0
-                        publish_time_stamp = int(create_time)
-                        publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
-
-                        avatar_url = fakeid_dict['head_url']
-
-                        # cover_url
-                        if 'cover' in article_url:
-                            cover_url = article_url['cover']
-                        else:
-                            cover_url = 0
-
-                        # article_url
-                        if 'link' in article_url:
-                            article_url = article_url['link']
-                        else:
-                            article_url = 0
-
-                        video_url = cls.get_video_url(log_type, crawler, article_url, env)
-
-                        video_dict = {
-                            'video_id': aid,
-                            'video_title': title,
-                            'publish_time_stamp': publish_time_stamp,
-                            'publish_time_str': publish_time_str,
-                            'user_name': user,
-                            'play_cnt': 0,
-                            'comment_cnt': 0,
-                            'like_cnt': 0,
-                            'share_cnt': 0,
-                            'user_id': fakeid_dict['fakeid'],
-                            'avatar_url': avatar_url,
-                            'cover_url': cover_url,
-                            'article_url': article_url,
-                            'video_url': video_url,
-                            'session': f'gongzhonghao-follow-{int(time.time())}'
-                        }
-                        for k, v in video_dict.items():
-                            Common.logger(log_type, crawler).info(f"{k}:{v}")
-                        if int(time.time()) - publish_time_stamp >= 3600 * 24 * 3:
-                            Common.logger(log_type, crawler).info(f'发布时间{publish_time_str} > 3 天\n')
-                            cls.begin = 0
-                            return
-                        cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
-
-                    Common.logger(log_type, crawler).info('休眠 60 秒\n')
-                    time.sleep(60)
-        except Exception as e:
-            Common.logger(log_type, crawler).error("get_videoList异常:{}\n", e)
+        # try:
+        while True:
+            fakeid_dict = cls.get_fakeid(log_type, crawler, user, index)
+            token_dict = cls.get_token(log_type, crawler)
+            url = "https://mp.weixin.qq.com/cgi-bin/appmsg?"
+            headers = {
+                "accept": "*/*",
+                "accept-encoding": "gzip, deflate, br",
+                "accept-language": "zh-CN,zh;q=0.9",
+                "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?"
+                           "t=media/appmsg_edit_v2&action=edit&isNew=1"
+                           "&type=77&createType=5&token=" + str(token_dict['token']) + "&lang=zh_CN",
+                'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
+                "sec-ch-ua-mobile": "?0",
+                "sec-ch-ua-platform": '"Windows"',
+                "sec-fetch-dest": "empty",
+                "sec-fetch-mode": "cors",
+                "sec-fetch-site": "same-origin",
+                "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
+                              " (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
+                "x-requested-with": "XMLHttpRequest",
+                'cookie': token_dict['cookie'],
+            }
+            params = {
+                "action": "list_ex",
+                "begin": str(cls.begin),
+                "count": "5",
+                "fakeid": fakeid_dict['fakeid'],
+                "type": "9",
+                "query": "",
+                "token": str(token_dict['token']),
+                "lang": "zh_CN",
+                "f": "json",
+                "ajax": "1",
+            }
+            urllib3.disable_warnings()
+            # s = requests.session()
+            # # max_retries=3 重试3次
+            # s.mount('http://', HTTPAdapter(max_retries=3))
+            # s.mount('https://', HTTPAdapter(max_retries=3))
+            # r = s.get(url=url, headers=headers, params=params, verify=False, proxies=Common.tunnel_proxies(), timeout=5)
+            r = requests.get(url=url, headers=headers, params=params, verify=False)
+            r.close()
+            if r.json()["base_resp"]["err_msg"] == "invalid session":
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
+                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(
+                    f"公众号_3:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 过期啦\n")
+                if 20 >= datetime.datetime.now().hour >= 10:
+                    Feishu.bot(log_type, crawler, f"token_3:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
+                time.sleep(60 * 10)
+                continue
+            if r.json()["base_resp"]["err_msg"] == "freq control":
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
+                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(
+                    f"公众号_3:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
+                if 20 >= datetime.datetime.now().hour >= 10:
+                    Feishu.bot(log_type, crawler, f"公众号_3:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                time.sleep(60 * 10)
+                continue
+            if 'app_msg_list' not in r.json():
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
+                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(
+                    f"公众号_3:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
+                if 20 >= datetime.datetime.now().hour >= 10:
+                    Feishu.bot(log_type, crawler, f"公众号_3:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                time.sleep(60 * 10)
+                continue
+            if len(r.json()['app_msg_list']) == 0:
+                Common.logger(log_type, crawler).info('没有更多视频了\n')
+                return
+            else:
+                cls.begin += 5
+                app_msg_list = r.json()['app_msg_list']
+                for article_url in app_msg_list:
+                    # title
+                    if 'title' in article_url:
+                        title = article_url['title'].replace('/', '').replace('\n', '') \
+                            .replace('.', '').replace('“', '').replace('”', '').replace(' ', '')\
+                            .replace('"', '').replace("'", "")
+                    else:
+                        title = 0
+
+                    # aid
+                    if 'aid' in article_url:
+                        aid = article_url['aid']
+                    else:
+                        aid = 0
+
+                    # create_time
+                    if 'create_time' in article_url:
+                        create_time = article_url['create_time']
+                    else:
+                        create_time = 0
+                    publish_time_stamp = int(create_time)
+                    publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
+
+                    avatar_url = fakeid_dict['head_url']
+
+                    # cover_url
+                    if 'cover' in article_url:
+                        cover_url = article_url['cover']
+                    else:
+                        cover_url = 0
+
+                    # article_url
+                    if 'link' in article_url:
+                        article_url = article_url['link']
+                    else:
+                        article_url = 0
+
+                    video_url = cls.get_video_url(article_url, env)
+
+                    video_dict = {
+                        'video_id': aid,
+                        'video_title': title,
+                        'publish_time_stamp': publish_time_stamp,
+                        'publish_time_str': publish_time_str,
+                        'user_name': user,
+                        'play_cnt': 0,
+                        'comment_cnt': 0,
+                        'like_cnt': 0,
+                        'share_cnt': 0,
+                        'user_id': fakeid_dict['fakeid'],
+                        'avatar_url': avatar_url,
+                        'cover_url': cover_url,
+                        'article_url': article_url,
+                        'video_url': video_url,
+                        'session': f'gongzhonghao-follow-{int(time.time())}'
+                    }
+                    for k, v in video_dict.items():
+                        Common.logger(log_type, crawler).info(f"{k}:{v}")
+                    if int(time.time()) - publish_time_stamp >= 3600 * 24 * 3:
+                        Common.logger(log_type, crawler).info(f'发布时间{publish_time_str} > 3 天\n')
+                        cls.begin = 0
+                        return
+                    cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
+
+                Common.logger(log_type, crawler).info('休眠 60 秒\n')
+                time.sleep(60)
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).error("get_videoList异常:{}\n", e)
 
     @classmethod
     def repeat_video(cls, log_type, crawler, video_id, env):
@@ -369,133 +369,133 @@ class GongzhonghaoFollow3:
     # 下载/上传
     @classmethod
     def download_publish(cls, log_type, crawler, video_dict, oss_endpoint, env):
-        try:
-            if video_dict['article_url'] == 0 or video_dict['video_url'] == 0:
-                Common.logger(log_type, crawler).info("文章涉嫌违反相关法律法规和政策\n")
-            # 标题敏感词过滤
-            elif any(word if word in video_dict['video_title'] else False for word in
-                     filter_word(log_type, crawler, "公众号", env)) is True:
-                Common.logger(log_type, crawler).info("标题已中过滤词\n")
-            # 已下载判断
-            elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
-                Common.logger(log_type, crawler).info("视频已下载\n")
-            # 标题相似度
-            elif cls.title_like(log_type, crawler, video_dict['video_title'], env) is True:
-                Common.logger(log_type, crawler).info(f'标题相似度>=80%:{video_dict["video_title"]}\n')
+        # try:
+        if video_dict['article_url'] == 0 or video_dict['video_url'] == 0:
+            Common.logger(log_type, crawler).info("文章涉嫌违反相关法律法规和政策\n")
+        # 标题敏感词过滤
+        elif any(word if word in video_dict['video_title'] else False for word in
+                 filter_word(log_type, crawler, "公众号", env)) is True:
+            Common.logger(log_type, crawler).info("标题已中过滤词\n")
+        # 已下载判断
+        elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
+            Common.logger(log_type, crawler).info("视频已下载\n")
+        # 标题相似度
+        elif cls.title_like(log_type, crawler, video_dict['video_title'], env) is True:
+            Common.logger(log_type, crawler).info(f'标题相似度>=80%:{video_dict["video_title"]}\n')
+        else:
+            # 下载视频
+            Common.download_method(log_type=log_type, crawler=crawler, text="video",
+                                   title=video_dict["video_title"], url=video_dict["video_url"])
+            md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
+            # 获取视频时长
+            ffmpeg_dict = Common.ffmpeg(log_type, crawler,
+                                        f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
+            if ffmpeg_dict is None:
+                # 删除视频文件夹
+                shutil.rmtree(f"./{crawler}/videos/{md_title}")
+                Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
+                return
+            video_dict["video_width"] = ffmpeg_dict["width"]
+            video_dict["video_height"] = ffmpeg_dict["height"]
+            video_dict["duration"] = ffmpeg_dict["duration"]
+            video_size = ffmpeg_dict["size"]
+            Common.logger(log_type, crawler).info(f'video_width:{video_dict["video_width"]}')
+            Common.logger(log_type, crawler).info(f'video_height:{video_dict["video_height"]}')
+            Common.logger(log_type, crawler).info(f'duration:{video_dict["duration"]}')
+            Common.logger(log_type, crawler).info(f'video_size:{video_size}')
+            # 视频size=0,直接删除
+            if int(video_size) == 0 or cls.download_rule(video_dict) is False:
+                # 删除视频文件夹
+                shutil.rmtree(f"./{crawler}/videos/{md_title}")
+                Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
+                return
+            # 下载封面
+            Common.download_method(log_type=log_type, crawler=crawler, text="cover",
+                                   title=video_dict["video_title"], url=video_dict["cover_url"])
+            # 保存视频信息至 "./videos/{video_title}/info.txt"
+            Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
+
+            # 上传视频
+            Common.logger(log_type, crawler).info("开始上传视频...")
+            strategy = "定向爬虫策略"
+            our_video_id = Publish.upload_and_publish(log_type=log_type,
+                                                      crawler=crawler,
+                                                      strategy=strategy,
+                                                      our_uid="follow",
+                                                      oss_endpoint=oss_endpoint,
+                                                      env=env)
+            if env == 'prod':
+                our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{str(our_video_id)}/info"
             else:
-                # 下载视频
-                Common.download_method(log_type=log_type, crawler=crawler, text="video",
-                                       title=video_dict["video_title"], url=video_dict["video_url"])
-                md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
-                # 获取视频时长
-                ffmpeg_dict = Common.ffmpeg(log_type, crawler,
-                                            f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
-                if ffmpeg_dict is None:
-                    # 删除视频文件夹
-                    shutil.rmtree(f"./{crawler}/videos/{md_title}")
-                    Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
-                    return
-                video_dict["video_width"] = ffmpeg_dict["width"]
-                video_dict["video_height"] = ffmpeg_dict["height"]
-                video_dict["duration"] = ffmpeg_dict["duration"]
-                video_size = ffmpeg_dict["size"]
-                Common.logger(log_type, crawler).info(f'video_width:{video_dict["video_width"]}')
-                Common.logger(log_type, crawler).info(f'video_height:{video_dict["video_height"]}')
-                Common.logger(log_type, crawler).info(f'duration:{video_dict["duration"]}')
-                Common.logger(log_type, crawler).info(f'video_size:{video_size}')
-                # 视频size=0,直接删除
-                if int(video_size) == 0 or cls.download_rule(video_dict) is False:
-                    # 删除视频文件夹
-                    shutil.rmtree(f"./{crawler}/videos/{md_title}")
-                    Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
-                    return
-                # 下载封面
-                Common.download_method(log_type=log_type, crawler=crawler, text="cover",
-                                       title=video_dict["video_title"], url=video_dict["cover_url"])
-                # 保存视频信息至 "./videos/{video_title}/info.txt"
-                Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
-
-                # 上传视频
-                Common.logger(log_type, crawler).info("开始上传视频...")
-                strategy = "定向爬虫策略"
-                our_video_id = Publish.upload_and_publish(log_type=log_type,
-                                                          crawler=crawler,
-                                                          strategy=strategy,
-                                                          our_uid="follow",
-                                                          oss_endpoint=oss_endpoint,
-                                                          env=env)
-                if env == 'prod':
-                    our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{str(our_video_id)}/info"
-                else:
-                    our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{str(our_video_id)}/info"
-                Common.logger(log_type, crawler).info("视频上传完成")
-
-                if our_video_id is None:
-                    # 删除视频文件夹
-                    shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
-                    return
-
-                # 视频信息保存数据库
-                rule_dict = {
-                    "duration": {"min": 20, "max": 45 * 60},
-                    "publish_day": {"min": 3}
-                }
-
-                insert_sql = f""" insert into crawler_video(video_id,
-                                                            out_user_id,
-                                                            platform,
-                                                            strategy,
-                                                            out_video_id,
-                                                            video_title,
-                                                            cover_url,
-                                                            video_url,
-                                                            duration,
-                                                            publish_time,
-                                                            play_cnt,
-                                                            crawler_rule,
-                                                            width,
-                                                            height)
-                                                            values({our_video_id},
-                                                            "{video_dict['user_id']}",
-                                                            "{cls.platform}",
-                                                            "定向爬虫策略",
-                                                            "{video_dict['video_id']}",
-                                                            "{video_dict['video_title']}",
-                                                            "{video_dict['cover_url']}",
-                                                            "{video_dict['video_url']}",
-                                                            {int(video_dict['duration'])},
-                                                            "{video_dict['publish_time_str']}",
-                                                            {int(video_dict['play_cnt'])},
-                                                            '{json.dumps(rule_dict)}',
-                                                            {int(video_dict['video_width'])},
-                                                            {int(video_dict['video_height'])}) """
-                Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
-                MysqlHelper.update_values(log_type, crawler, insert_sql, env)
-                Common.logger(log_type, crawler).info('视频信息插入数据库成功!')
-
-                # 视频写入飞书
-                Feishu.insert_columns(log_type, crawler, "47e39d", "ROWS", 1, 2)
-                # 视频ID工作表,首行写入数据
-                upload_time = int(time.time())
-                values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
-                           "用户主页",
-                           video_dict['video_title'],
-                           video_dict['video_id'],
-                           our_video_link,
-                           int(video_dict['duration']),
-                           f"{video_dict['video_width']}*{video_dict['video_height']}",
-                           video_dict['publish_time_str'],
-                           video_dict['user_name'],
-                           video_dict['user_id'],
-                           video_dict['avatar_url'],
-                           video_dict['cover_url'],
-                           video_dict['article_url'],
-                           video_dict['video_url']]]
-                time.sleep(0.5)
-                Feishu.update_values(log_type, crawler, "47e39d", "F2:Z2", values)
-                Common.logger(log_type, crawler).info('视频下载/上传成功\n')
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"download_publish异常:{e}\n")
+                our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{str(our_video_id)}/info"
+            Common.logger(log_type, crawler).info("视频上传完成")
+
+            if our_video_id is None:
+                # 删除视频文件夹
+                shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
+                return
+
+            # 视频信息保存数据库
+            rule_dict = {
+                "duration": {"min": 20, "max": 45 * 60},
+                "publish_day": {"min": 3}
+            }
+
+            insert_sql = f""" insert into crawler_video(video_id,
+                                                        out_user_id,
+                                                        platform,
+                                                        strategy,
+                                                        out_video_id,
+                                                        video_title,
+                                                        cover_url,
+                                                        video_url,
+                                                        duration,
+                                                        publish_time,
+                                                        play_cnt,
+                                                        crawler_rule,
+                                                        width,
+                                                        height)
+                                                        values({our_video_id},
+                                                        "{video_dict['user_id']}",
+                                                        "{cls.platform}",
+                                                        "定向爬虫策略",
+                                                        "{video_dict['video_id']}",
+                                                        "{video_dict['video_title']}",
+                                                        "{video_dict['cover_url']}",
+                                                        "{video_dict['video_url']}",
+                                                        {int(video_dict['duration'])},
+                                                        "{video_dict['publish_time_str']}",
+                                                        {int(video_dict['play_cnt'])},
+                                                        '{json.dumps(rule_dict)}',
+                                                        {int(video_dict['video_width'])},
+                                                        {int(video_dict['video_height'])}) """
+            Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+            MysqlHelper.update_values(log_type, crawler, insert_sql, env)
+            Common.logger(log_type, crawler).info('视频信息插入数据库成功!')
+
+            # 视频写入飞书
+            Feishu.insert_columns(log_type, crawler, "47e39d", "ROWS", 1, 2)
+            # 视频ID工作表,首行写入数据
+            upload_time = int(time.time())
+            values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
+                       "用户主页",
+                       video_dict['video_title'],
+                       video_dict['video_id'],
+                       our_video_link,
+                       int(video_dict['duration']),
+                       f"{video_dict['video_width']}*{video_dict['video_height']}",
+                       video_dict['publish_time_str'],
+                       video_dict['user_name'],
+                       video_dict['user_id'],
+                       video_dict['avatar_url'],
+                       video_dict['cover_url'],
+                       video_dict['article_url'],
+                       video_dict['video_url']]]
+            time.sleep(0.5)
+            Feishu.update_values(log_type, crawler, "47e39d", "F2:Z2", values)
+            Common.logger(log_type, crawler).info('视频下载/上传成功\n')
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).error(f"download_publish异常:{e}\n")
 
     @classmethod
     def get_users(cls):

+ 16 - 16
gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py

@@ -14,22 +14,22 @@ from gongzhonghao.gongzhonghao_follow.gongzhonghao_follow_2 import GongzhonghaoF
 class Main:
     @classmethod
     def main(cls, log_type, crawler, env):
-        try:
-            if env == "dev":
-                oss_endpoint = "out"
-            else:
-                oss_endpoint = "inner"
-            Common.logger(log_type, crawler).info('开始抓取公众号视频\n')
-            GongzhonghaoFollow2.get_all_videos(log_type=log_type,
-                                               crawler=crawler,
-                                               oss_endpoint=oss_endpoint,
-                                               env=env)
-            Common.del_logs(log_type, crawler)
-            GongzhonghaoFollow2.begin = 0
-            Common.logger(log_type, crawler).info('公众号抓取一轮完毕,休眠 8 小时\n')
-            time.sleep(3600*8)
-        except Exception as e:
-            Common.logger(log_type, crawler).info(f"公众号抓取异常:{e}\n")
+        # try:
+        if env == "dev":
+            oss_endpoint = "out"
+        else:
+            oss_endpoint = "inner"
+        Common.logger(log_type, crawler).info('开始抓取公众号视频\n')
+        GongzhonghaoFollow2.get_all_videos(log_type=log_type,
+                                           crawler=crawler,
+                                           oss_endpoint=oss_endpoint,
+                                           env=env)
+        Common.del_logs(log_type, crawler)
+        GongzhonghaoFollow2.begin = 0
+        Common.logger(log_type, crawler).info('公众号抓取一轮完毕,休眠 8 小时\n')
+        time.sleep(3600*8)
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).info(f"公众号抓取异常:{e}\n")
 
 
 if __name__ == '__main__':

+ 16 - 16
gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_3.py

@@ -14,22 +14,22 @@ from gongzhonghao.gongzhonghao_follow.gongzhonghao_follow_3 import GongzhonghaoF
 class Main:
     @classmethod
     def main(cls, log_type, crawler, env):
-        try:
-            if env == "dev":
-                oss_endpoint = "out"
-            else:
-                oss_endpoint = "inner"
-            Common.logger(log_type, crawler).info('开始抓取公众号视频\n')
-            GongzhonghaoFollow3.get_all_videos(log_type=log_type,
-                                               crawler=crawler,
-                                               oss_endpoint=oss_endpoint,
-                                               env=env)
-            Common.del_logs(log_type, crawler)
-            GongzhonghaoFollow3.begin = 0
-            Common.logger(log_type, crawler).info('公众号抓取一轮完毕,休眠 8 小时\n')
-            time.sleep(3600*8)
-        except Exception as e:
-            Common.logger(log_type, crawler).info(f"公众号抓取异常:{e}\n")
+        # try:
+        if env == "dev":
+            oss_endpoint = "out"
+        else:
+            oss_endpoint = "inner"
+        Common.logger(log_type, crawler).info('开始抓取公众号视频\n')
+        GongzhonghaoFollow3.get_all_videos(log_type=log_type,
+                                           crawler=crawler,
+                                           oss_endpoint=oss_endpoint,
+                                           env=env)
+        Common.del_logs(log_type, crawler)
+        GongzhonghaoFollow3.begin = 0
+        Common.logger(log_type, crawler).info('公众号抓取一轮完毕,休眠 8 小时\n')
+        time.sleep(3600*8)
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).info(f"公众号抓取异常:{e}\n")
 
 
 if __name__ == '__main__':

BIN
xiaoniangao/.DS_Store → jixiangxingfu/.DS_Store


+ 3 - 0
jixiangxingfu/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/4/17

+ 3 - 0
jixiangxingfu/jixiangxingfu_main/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/4/17

+ 25 - 0
jixiangxingfu/jixiangxingfu_main/run_jixiangxingfu_recommend.py

@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/4/17
+import argparse
+import os
+import sys
+sys.path.append(os.getcwd())
+from common.common import Common
+from jixiangxingfu.jixiangxingfu_recommend.jixiangxingfu_recommend import JixiangxingfuRecommend
+
+
+def main(log_type, crawler, env):
+    Common.logger(log_type, crawler).info('开始抓取 吉祥幸福小程序\n')
+    JixiangxingfuRecommend.start_wechat(log_type, crawler, env)
+    Common.del_logs(log_type, crawler)
+    Common.logger(log_type, crawler).info('抓取完一轮\n')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
+    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
+    parser.add_argument('--crawler')  ## 添加参数
+    parser.add_argument('--env')  ## 添加参数
+    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
+    main(log_type=args.log_type, crawler=args.crawler, env=args.env)

BIN
jixiangxingfu/jixiangxingfu_recommend/.DS_Store


+ 3 - 0
jixiangxingfu/jixiangxingfu_recommend/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/4/17

+ 161 - 0
jixiangxingfu/jixiangxingfu_recommend/insert.py

@@ -0,0 +1,161 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/4/17
+import json
+import os
+import sys
+import time
+from datetime import date, timedelta
+from hashlib import md5
+
+sys.path.append(os.getcwd())
+from common.common import Common
+from common.feishu import Feishu
+from common.scheduling_db import MysqlHelper
+
+
+class Insert:
+    @classmethod
+    def get_config(cls, log_type, crawler, text, env):
+        select_sql = f"""select * from crawler_config where source="benshanzhufu" """
+        contents = MysqlHelper.get_values(log_type, crawler, select_sql, env, action='')
+        title_list = []
+        filter_list = []
+        for content in contents:
+            config = content['config']
+            config_dict = eval(config)
+            for k, v in config_dict.items():
+                if k == "title":
+                    title_list_config = v.split(",")
+                    for title in title_list_config:
+                        title_list.append(title)
+                if k == "filter":
+                    filter_list_config = v.split(",")
+                    for filter_word in filter_list_config:
+                        filter_list.append(filter_word)
+        if text == "title":
+            return title_list
+        elif text == "filter":
+            return filter_list
+
+    @classmethod
+    def before_day(cls):
+        publish_time_str_rule = (date.today() + timedelta(days=-30)).strftime("%Y-%m-%d %H:%M:%S")
+        publish_time_stamp_rule = int(time.mktime(time.strptime(publish_time_str_rule, "%Y-%m-%d %H:%M:%S")))
+        print(publish_time_str_rule)
+        print(publish_time_stamp_rule)
+
+    @classmethod
+    def insert_config(cls, log_type, crawler, env):
+        filter_sheet = Feishu.get_values_batch(log_type, crawler, "DjXfqG")
+        # title_sheet = Feishu.get_values_batch(log_type, crawler, "bHSW1p")
+        filter_list = []
+        # title_list = []
+        for x in filter_sheet:
+            for y in x:
+                if y is None:
+                    pass
+                else:
+                    filter_list.append(y)
+        # for x in title_sheet:
+        #     for y in x:
+        #         if y is None:
+        #             pass
+        #         else:
+        #             title_list.append(y)
+        # str_title = ','.join(title_list)
+        str_filter = ','.join(filter_list)
+        config_dict = {
+            # "title": str_title,
+            "filter": str_filter
+        }
+        str_config_dict = str(config_dict)
+        # print(f"config_dict:{config_dict}")
+        # print(f"str_config_dict:{str_config_dict}")
+        insert_sql = f""" insert into crawler_config(title, source, config) values("本山祝福小程序", "benshanzhufu", "{str_config_dict}") """
+        MysqlHelper.update_values(log_type, crawler, insert_sql, env)
+
+    @classmethod
+    def insert_video_from_feishu_to_mysql(cls, log_type, crawler, env):
+        jixiangxingfu_sheetid = ['d9e9b1']
+        for sheetid in jixiangxingfu_sheetid:
+            xiaoniangao_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
+            for i in range(1, len(xiaoniangao_sheet)):
+            # for i in range(1, 5):
+                if xiaoniangao_sheet[i][5] is None or xiaoniangao_sheet[i][7] is None:
+                    continue
+                video_id = xiaoniangao_sheet[i][12].replace("https://admin.piaoquantv.com/cms/post-detail/", "").replace(
+                    "/info", "")
+                if video_id == "None":
+                    continue
+                video_id = int(video_id)
+                out_user_id = "jixiangxingfu"
+                platform = "吉祥幸福"
+                strategy = "推荐榜爬虫策略"
+                video_title = str(xiaoniangao_sheet[i][7])
+                play_cnt = int(xiaoniangao_sheet[i][9].split("万")[0])*10000
+                duration = str(xiaoniangao_sheet[i][10])
+                width = int(xiaoniangao_sheet[i][11].split("*")[0])
+                height = int(xiaoniangao_sheet[i][11].split("*")[1])
+                cover_url = str(xiaoniangao_sheet[i][13])
+                video_url = str(xiaoniangao_sheet[i][14])
+                crawler_rule = json.dumps({})
+                out_video_id = md5(video_title.encode('utf8')).hexdigest()
+
+                # print(f"video_id:{video_id}, type:{type(video_id)}")
+                # print(f"out_user_id:{out_user_id}, type:{type(out_user_id)}")
+                # print(f"platform:{platform}, type:{type(platform)}")
+                # print(f"strategy:{strategy}, type:{type(strategy)}")
+                # print(f"video_title:{video_title}, type:{type(video_title)}")
+                # print(f"cover_url:{cover_url}, type:{type(cover_url)}")
+                # print(f"video_url:{video_url}, type:{type(video_url)}")
+                # print(f"crawler_rule:{crawler_rule}, type:{type(crawler_rule)}")
+
+                select_sql = f""" select * from crawler_video where platform="{platform}" and video_url="{video_url}" """
+                Common.logger(log_type, crawler).info(f"select_sql:{select_sql}")
+                repeat_video = MysqlHelper.get_values(log_type, crawler, select_sql, env)
+                Common.logger(log_type, crawler).info(f"repeat_video:{repeat_video}")
+
+                if repeat_video is not None and len(repeat_video) != 0:
+                    Common.logger(log_type, crawler).info(f"{video_title} 已存在数据库中\n")
+                else:
+                    # 视频信息保存数据库
+                    insert_sql = f""" insert into crawler_video(video_id,
+                                        out_user_id,
+                                        platform,
+                                        strategy,
+                                        out_video_id,
+                                        video_title,
+                                        cover_url,
+                                        video_url,
+                                        duration,
+                                        play_cnt,
+                                        crawler_rule,
+                                        width,
+                                        height)
+                                        values({video_id},
+                                        "{out_user_id}",
+                                        "{platform}",
+                                        "{strategy}",
+                                        "{out_video_id}",
+                                        "{video_title}",
+                                        "{cover_url}",
+                                        "{video_url}",
+                                        {duration},
+                                        {play_cnt},
+                                        '{crawler_rule}',
+                                        {width},
+                                        {height}) """
+                    Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+                    MysqlHelper.update_values(log_type, crawler, insert_sql, env, action='')
+                    Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
+
+
+
+if __name__ == "__main__":
+    # Insert.insert_config("insert", "benshanzhufu", "dev")
+    # print(Insert.get_config("insert", "ganggangdouchuan", "filter", "dev"))
+    # Insert.insert_video_from_feishu_to_mysql("insert-dev", "ganggangdouchuan", "dev")
+    # Insert.insert_video_from_feishu_to_mysql("insert-dev", "jixiangxingfu", "dev")
+    Insert.insert_video_from_feishu_to_mysql("insert-prod", "jixiangxingfu", "prod")
+    pass

+ 403 - 0
jixiangxingfu/jixiangxingfu_recommend/jixiangxingfu_recommend.py

@@ -0,0 +1,403 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/4/17
+import json
+import os
+import shutil
+import sys
+import time
+from hashlib import md5
+from appium import webdriver
+from appium.webdriver.common.touch_action import TouchAction
+from appium.webdriver.extensions.android.nativekey import AndroidKey
+from appium.webdriver.webdriver import WebDriver
+from selenium.common import NoSuchElementException
+from selenium.webdriver.common.by import By
+sys.path.append(os.getcwd())
+from common.feishu import Feishu
+from common.common import Common
+from common.publish import Publish
+from common.scheduling_db import MysqlHelper
+
+
+class JixiangxingfuRecommend:
+    platform = "吉祥幸福"
+    i = 0
+
+    @classmethod
+    def jixiangxingfu_config(cls, log_type, crawler, text, env):
+        select_sql = f"""select * from crawler_config where source="jixiangxingfu" """
+        contents = MysqlHelper.get_values(log_type, crawler, select_sql, env, action='')
+        title_list = []
+        filter_list = []
+        for content in contents:
+            config = content['config']
+            config_dict = eval(config)
+            for k, v in config_dict.items():
+                if k == "title":
+                    title_list_config = v.split(",")
+                    for title in title_list_config:
+                        title_list.append(title)
+                if k == "filter":
+                    filter_list_config = v.split(",")
+                    for filter_word in filter_list_config:
+                        filter_list.append(filter_word)
+        if text == "title":
+            return title_list
+        elif text == "filter":
+            return filter_list
+
+    @classmethod
+    def start_wechat(cls, log_type, crawler, env):
+        # try:
+        if env == "dev":
+            chromedriverExecutable = '/Users/wangkun/Downloads/chromedriver/chromedriver_v107/chromedriver'
+        else:
+            chromedriverExecutable = '/Users/piaoquan/Downloads/chromedriver'
+        Common.logger(log_type, crawler).info('启动微信')
+        caps = {
+            "platformName": "Android",  # 手机操作系统 Android / iOS
+            "deviceName": "a0a65126",  # 连接的设备名(模拟器或真机),安卓可以随便写
+            "platforVersion": "11",  # 手机对应的系统版本(Android 11)
+            "appPackage": "com.tencent.mm",  # 被测APP的包名,乐活圈 Android
+            "appActivity": ".ui.LauncherUI",  # 启动的Activity名
+            "autoGrantPermissions": "true",  # 让 appium 自动授权 base 权限,
+            # 如果 noReset 为 True,则该条不生效(该参数为 Android 独有),对应的值为 True 或 False
+            "unicodekeyboard": True,  # 使用自带输入法,输入中文时填True
+            "resetkeyboard": True,  # 执行完程序恢复原来输入法
+            "noReset": True,  # 不重置APP
+            "printPageSourceOnFailure": True,  # 找不到元素时,appium log 会完整记录当前页面的 pagesource
+            "newCommandTimeout": 6000,  # 初始等待时间
+            "automationName": "UiAutomator2",  # 使用引擎,默认为 Appium,
+            # 其中 Appium、UiAutomator2、Selendroid、Espresso 用于 Android,XCUITest 用于 iOS
+            "showChromedriverLog": True,
+            'enableWebviewDetailsCollection': True,
+            'setWebContentsDebuggingEnabled': True,
+            'recreateChromeDriverSessions': True,
+            'chromedriverExecutable': chromedriverExecutable,
+            "chromeOptions": {"androidProcess": "com.tencent.mm:appbrand0"},
+            # "chromeOptions": {"androidProcess": "com.tencent.mm:tools"},
+            'browserName': ''
+        }
+        driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
+        driver.implicitly_wait(20)
+        # 向下滑动页面,展示出小程序选择面板
+        for i in range(120):
+            try:
+                # 发现微信消息 TAB,代表微信已启动成功
+                if driver.find_elements(By.ID, 'com.tencent.mm:id/f2s'):
+                    break
+                # 发现并关闭系统菜单栏
+                elif driver.find_element(By.ID, 'com.android.systemui:id/dismiss_view'):
+                    Common.logger(log_type, crawler).info('发现并关闭系统下拉菜单栏')
+                    driver.find_element(By.ID, 'com.android.systemui:id/dismiss_view').click()
+                else:
+                    pass
+            except NoSuchElementException:
+                time.sleep(1)
+        Common.logger(log_type, crawler).info('下滑,展示小程序选择面板')
+        size = driver.get_window_size()
+        driver.swipe(int(size['width'] * 0.5), int(size['height'] * 0.2), int(size['width'] * 0.5),
+                     int(size['height'] * 0.8), 200)
+        # 打开小程序"祝福大家好才是真好"
+        time.sleep(5)
+        Common.logger(log_type, crawler).info('打开小程序"祝福大家好才是真好"')
+        driver.find_elements(By.XPATH, '//*[@text="祝福大家好才是真好"]')[-1].click()
+
+        # 获取视频信息
+        time.sleep(5)
+        cls.get_videoList(log_type, crawler, driver, env)
+
+        # 退出微信
+        cls.quit(log_type, crawler, driver)
+
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).error('start_wechat异常:{},重启 ADB\n', e)
+
+    # 退出 APP
+    @classmethod
+    def quit(cls, log_type, crawler, driver: WebDriver):
+        driver.quit()
+        Common.logger(log_type, crawler).info('退出APP成功\n')
+
+    @classmethod
+    def check_to_applet(cls, log_type, crawler, driver: WebDriver):
+        while True:
+            webview = driver.contexts
+            driver.switch_to.context(webview[1])
+            windowHandles = driver.window_handles
+            for handle in windowHandles:
+                driver.switch_to.window(handle)
+                time.sleep(1)
+                try:
+                    video_list = driver.find_element(By.XPATH, '//wx-view[text()="视频"]')
+                    video_list.click()
+                    Common.logger(log_type, crawler).info('切换到小程序视频列表成功\n')
+                    return
+                except NoSuchElementException:
+                    time.sleep(1)
+            Common.logger(log_type, crawler).info("切换到小程序失败\n")
+            break
+
+    # 查找元素
+    @classmethod
+    def search_elements(cls,driver: WebDriver, xpath):
+        windowHandles = driver.window_handles
+        for handle in windowHandles:
+            driver.switch_to.window(handle)
+            time.sleep(1)
+            try:
+                elements = driver.find_elements(By.XPATH, xpath)
+                if elements:
+                    return elements
+            except NoSuchElementException:
+                pass
+    @classmethod
+    def repeat_out_video_id(cls, log_type, crawler, out_video_id, env):
+        sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{out_video_id}"; """
+        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
+        return len(repeat_video)
+
+    @classmethod
+    def repeat_video_url(cls, log_type, crawler, video_url, env):
+        sql = f""" select * from crawler_video where platform="{cls.platform}" and video_url="{video_url}"; """
+        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
+        return len(repeat_video)
+
+    @classmethod
+    def find_ad(cls, log_type, crawler, driver: WebDriver):
+        windowHandles = driver.window_handles
+        # Common.logger(log_type, crawler).info('windowHandles:{}', windowHandles)
+        # 遍历所有的handles,找到当前页面所在的handle:如果pageSource有包含你想要的元素,就是所要找的handle
+        # 小程序的页面来回切换也需要:遍历所有的handles,切换到元素所在的handle
+        for handle in windowHandles:
+            driver.switch_to.window(handle)
+            time.sleep(1)
+            try:
+                Common.logger(log_type, crawler).info("寻找广告~~~~~~")
+                ad_element = driver.find_element(By.XPATH, '//div[@class="ad-text"]')
+                if ad_element:
+                    Common.logger(log_type, crawler).info("发现广告")
+                    for i in range(20):
+                        if driver.find_element(By.XPATH, '//div[@id="count_down_container"]/*[1]').text == "已完成浏览":
+                            Common.logger(log_type, crawler).info("广告播放完毕,点击返回")
+                            driver.press_keycode(AndroidKey.BACK)
+                            return
+                        else:
+                            Common.logger(log_type, crawler).info("广告未播放完毕,等待 1 秒")
+                            time.sleep(1)
+                else:
+                    Common.logger(log_type, crawler).info("未发现广告, 退出")
+                    return
+            except NoSuchElementException:
+                time.sleep(1)
+
+    @classmethod
+    def get_video_url(cls, log_type, crawler, driver: WebDriver, video_element):
+        video_element.click()
+        time.sleep(5)
+        windowHandles = driver.window_handles
+        for handle in windowHandles:
+            driver.switch_to.window(handle)
+            time.sleep(1)
+            try:
+                video_url_element = driver.find_element(By.XPATH, '//wx-video[@class="video-section"]')
+                video_url = video_url_element.get_attribute("src")
+                cls.find_ad(log_type, crawler, driver)
+                return video_url
+            except NoSuchElementException:
+                time.sleep(1)
+
+    @classmethod
+    def get_videoList(cls, log_type, crawler, driver, env):
+        # 鼠标左键点击, 1为x坐标, 2为y坐标
+        Common.logger(log_type, crawler).info('关闭广告')
+        size = driver.get_window_size()
+        TouchAction(driver).tap(x=int(size['width'] * 0.5), y=int(size['height'] * 0.1)).perform()
+        # 切换到小程序
+        cls.check_to_applet(log_type, crawler, driver)
+
+        time.sleep(5)
+        index = 0
+        while True:
+            try:
+                if cls.search_elements(driver, '//wx-view[@class="video-list-container"]') is None:
+                    Common.logger(log_type, crawler).info('窗口已销毁\n')
+                    return
+
+                Common.logger(log_type, crawler).info('获取视频列表\n')
+                video_elements = cls.search_elements(driver, '//wx-view[@class="video-item"]')
+                if video_elements is None:
+                    Common.logger(log_type, crawler).warning(f'video_elements:{video_elements}')
+                    return
+
+                video_element_temp = video_elements[index:]
+                if len(video_element_temp) == 0:
+                    Common.logger(log_type, crawler).info('到底啦~~~~~~~~~~~~~\n')
+                    return
+
+                for i, video_element in enumerate(video_element_temp):
+                    if video_element is None:
+                        Common.logger(log_type, crawler).info('到底啦~\n')
+                        return
+                    cls.i += 1
+                    cls.search_elements(driver, '//wx-view[@class="video-item"]')
+
+                    Common.logger(log_type, crawler).info(f'拖动"视频"列表第{cls.i}个至屏幕中间')
+                    time.sleep(3)
+                    driver.execute_script("arguments[0].scrollIntoView({block:'center',inline:'center'})", video_element)
+
+                    # video_title = video_element.find_elements(By.XPATH, '//wx-view[@class="video-title"]')[cls.i - 1].text
+                    # cover_url = video_element.find_elements(By.XPATH, '//wx-image[@class="video-cover-img"]')[cls.i - 1].get_attribute('src')
+                    # play_cnt = video_element.find_elements(By.XPATH, '//wx-view[@class="video-play-num"]')[cls.i - 1].text
+
+                    video_title = video_element.find_elements(By.XPATH, '//wx-view[@class="video-title"]')[index+i].text
+                    cover_url = video_element.find_elements(By.XPATH, '//wx-image[@class="video-cover-img"]')[index+i].get_attribute('src')
+                    play_cnt = video_element.find_elements(By.XPATH, '//wx-view[@class="video-play-num"]')[index+i].text
+
+                    if "万" in play_cnt:
+                        play_cnt = int(play_cnt.split("万")[0])*10000
+                    # play_btn = video_element.find_elements(By.XPATH, '//wx-image[@class="video-play-img"]')[cls.i - 1]
+                    out_video_id = md5(video_title.encode('utf8')).hexdigest()
+                    video_dict = {
+                        'video_title': video_title,
+                        'video_id': out_video_id,
+                        'play_cnt': play_cnt,
+                        'comment_cnt': 0,
+                        'like_cnt': 0,
+                        'share_cnt': 0,
+                        'publish_time_stamp': int(time.time()),
+                        'publish_time_str': time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
+                        'user_name': "jixiangxingfu",
+                        'user_id': "jixiangxingfu",
+                        'avatar_url': cover_url,
+                        'cover_url': cover_url,
+                        'session': f"jixiangxingfu-{int(time.time())}"
+                    }
+                    for k, v in video_dict.items():
+                        Common.logger(log_type, crawler).info(f"{k}:{v}")
+
+                    if video_title is None or cover_url is None:
+                        Common.logger(log_type, crawler).info("无效视频\n")
+                    elif any(str(word) if str(word) in video_title else False for word in
+                             cls.jixiangxingfu_config(log_type, crawler, "filter", env)) is True:
+                        Common.logger(log_type, crawler).info('已中过滤词\n')
+                    elif cls.repeat_out_video_id(log_type, crawler, out_video_id, env) != 0:
+                        Common.logger(log_type, crawler).info('视频已下载\n')
+                    else:
+                        video_url = cls.get_video_url(log_type, crawler, driver, video_element)
+
+                        if video_url is None:
+                            Common.logger(log_type, crawler).info("未获取到视频播放地址\n")
+                            driver.press_keycode(AndroidKey.BACK)
+                        elif cls.repeat_video_url(log_type, crawler, video_url, env) != 0:
+                            Common.logger(log_type, crawler).info('视频已下载\n')
+                            driver.press_keycode(AndroidKey.BACK)
+                        else:
+                            video_dict["video_url"] = video_url
+                            Common.logger(log_type, crawler).info(f"video_url:{video_url}\n")
+                            # driver.press_keycode(AndroidKey.BACK)
+                            cls.download_publish(log_type, crawler, video_dict, env, driver)
+                Common.logger(log_type, crawler).info('已抓取完一组视频,休眠10秒\n')
+                time.sleep(10)
+                index = index + len(video_element_temp)
+            except Exception as e:
+                Common.logger(log_type, crawler).info(f"get_videoList:{e}\n")
+                cls.i = 0
+
+    @classmethod
+    def download_publish(cls, log_type, crawler, video_dict, env, driver: WebDriver):
+        # 下载视频
+        Common.download_method(log_type=log_type, crawler=crawler, text='video', title=video_dict['video_title'], url=video_dict['video_url'])
+        ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
+        if ffmpeg_dict is None:
+            md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
+            shutil.rmtree(f"./{crawler}/videos/{md_title}/")
+            Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
+            return
+        video_dict["duration"] = ffmpeg_dict["duration"]
+        video_dict["video_width"] = ffmpeg_dict["width"]
+        video_dict["video_height"] = ffmpeg_dict["height"]
+        # 下载封面
+        Common.download_method(log_type=log_type, crawler=crawler, text='cover', title=video_dict['video_title'],
+                               url=video_dict['cover_url'])
+        # 保存视频信息至txt
+        Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
+
+        # 上传视频
+        Common.logger(log_type, crawler).info("开始上传视频...")
+        our_video_id = Publish.upload_and_publish(log_type=log_type,
+                                                  crawler=crawler,
+                                                  strategy="推荐榜爬虫策略",
+                                                  our_uid="recommend",
+                                                  env=env,
+                                                  oss_endpoint="out")
+        if env == 'dev':
+            our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
+        else:
+            our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
+        Common.logger(log_type, crawler).info("视频上传完成")
+
+        if our_video_id is None:
+            # 删除视频文件夹
+            shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
+            return
+
+        # 视频信息保存至飞书
+        Feishu.insert_columns(log_type, crawler, "d9e9b1", "ROWS", 1, 2)
+        # 视频ID工作表,首行写入数据
+        upload_time = int(time.time())
+        values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
+                   "推荐榜爬虫策略",
+                   video_dict["video_title"],
+                   video_dict["video_id"],
+                   video_dict["play_cnt"],
+                   video_dict["duration"],
+                   f'{video_dict["video_width"]}*{video_dict["video_height"]}',
+                   our_video_link,
+                   video_dict["cover_url"],
+                   video_dict["video_url"]]]
+        time.sleep(1)
+        Feishu.update_values(log_type, crawler, "d9e9b1", "F2:V2", values)
+        Common.logger(log_type, crawler).info(f"视频已保存至飞书文档\n")
+
+        rule_dict = {}
+        # 视频信息保存数据库
+        insert_sql = f""" insert into crawler_video(video_id,
+                                                        out_user_id,
+                                                        platform,
+                                                        strategy,
+                                                        out_video_id,
+                                                        video_title,
+                                                        cover_url,
+                                                        video_url,
+                                                        duration,
+                                                        publish_time,
+                                                        play_cnt,
+                                                        crawler_rule,
+                                                        width,
+                                                        height)
+                                                        values({our_video_id},
+                                                        "{video_dict['user_id']}",
+                                                        "{cls.platform}",
+                                                        "推荐榜爬虫策略",
+                                                        "{video_dict['video_id']}",
+                                                        "{video_dict['video_title']}",
+                                                        "{video_dict['cover_url']}",
+                                                        "{video_dict['video_url']}",
+                                                        {int(video_dict['duration'])},
+                                                        "{video_dict['publish_time_str']}",
+                                                        {int(video_dict['play_cnt'])},
+                                                        '{json.dumps(rule_dict)}',
+                                                        {int(video_dict['video_width'])},
+                                                        {int(video_dict['video_height'])}) """
+        Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+        MysqlHelper.update_values(log_type, crawler, insert_sql, env, action='')
+        Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
+        driver.press_keycode(AndroidKey.BACK)
+
+
+if __name__ == "__main__":
+    # JixiangxingfuRecommend.start_wechat('recommend', 'jixiangxingfu', 'dev')
+    print(JixiangxingfuRecommend.jixiangxingfu_config("recommend", "jixiangxingfu", "filter", "dev"))
+    pass

+ 3 - 0
jixiangxingfu/logs/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/4/18

BIN
jixiangxingfu/videos/.DS_Store


+ 63 - 45
main/process_offline.sh

@@ -1,8 +1,7 @@
 #! /bin/bash
 # **********线下爬虫********** #
-
 env=$1          # 爬虫运行环境,正式环境: prod / 测试环境: dev
-echo ${env}
+#echo ${env}
 if [ ${env} = "dev" ];then
   piaoquan_crawler_dir=/Users/wangkun/Desktop/crawler/piaoquan_crawler/
   profile_path=/etc/profile
@@ -23,71 +22,90 @@ echo "$(date "+%Y-%m-%d %H:%M:%S") 更新环境变量完成!" >> ${log_path}
 echo "$(date "+%Y-%m-%d %H:%M:%S") 正在重启Appium..." >> ${log_path}
 ps -ef | grep "/Applications/Appium.app/Contents/Resources/app/node_modules/appium/build/lib/main.js" | grep -v "grep"
 if [ "$?" -eq 1 ];then
-  echo "$(date "+%Y-%m-%d_%H:%M:%S") Appium异常停止,正在重启!" >> ${log_path}
+  echo "$(date "+%Y-%m-%d %H:%M:%S") Appium异常停止,正在重启!" >> ${log_path}
   nohup ${node_path} /Applications/Appium.app/Contents/Resources/app/node_modules/appium/build/lib/main.js >>./nohup.log 2>&1 &
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启Appium完毕!" >> ${log_path}
 else
   echo "$(date "+%Y-%m-%d %H:%M:%S") Appium 进程状态正常" >> ${log_path}
 fi
-echo "$(date "+%Y-%m-%d %H:%M:%S") 重启Appium完毕!" >> ${log_path}
 
-## 吉祥幸福
-#if [[ "$time" > "00:00:0" ]] && [[ "$time" < "08:59:59" ]]; then
-#  echo "开始启动 吉祥幸福 爬虫脚本任务" >> ${log_path}
+# 吉祥幸福
+if [[ "$time" > "00:00:0" ]] && [[ "$time" < "22:59:59" ]]; then
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 吉祥幸福 爬虫脚本任务" >> ${log_path}
+  ps aux | grep run_zhongmiaoyinxin | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_zhiqingtiantiankan | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_ganggangdouchuan | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps -ef | grep "run_jixiangxingfu_recommend.py" | grep -v "grep"
+  if [ "$?" -eq 1 ];then
+    echo "$(date "+%Y-%m-%d %H:%M:%S") 吉祥幸福爬虫, 异常停止, 正在重启!" >> ${log_path}
+    cd ${piaoquan_crawler_dir}
+    nohup python3 -u jixiangxingfu/jixiangxingfu_main/run_jixiangxingfu_recommend.py --log_type="recommend" --crawler="jixiangxingfu" --env=${env} >>jixiangxingfu/logs/nohup-recommend.log 2>&1 &
+    echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
+  else
+    echo "$(date "+%Y-%m-%d %H:%M:%S") 吉祥幸福 进程状态正常" >> ${log_path}
+  fi
+else
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 吉祥幸福 爬虫脚本任务结束" >> ${log_path}
+fi
+
+## 知青天天看
+#if [[ "$time" > "09:00:0" ]] && [[ "$time" < "22:59:59" ]]; then
+#  echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 知青天天看 爬虫脚本任务" >> ${log_path}
 #  ps aux | grep run_zhongmiaoyinxin | grep -v grep | awk '{print $2}' | xargs kill -9
-#  ps aux | grep run_zhiqingtiantiankan | grep -v grep | awk '{print $2}' | xargs kill -9
 #  ps aux | grep run_ganggangdouchuan | grep -v grep | awk '{print $2}' | xargs kill -9
-#  ps -ef | grep "run_jixiangxingfu_recommend.py" | grep -v "grep"
+#  ps aux | grep run_jixiangxingfu | grep -v grep | awk '{print $2}' | xargs kill -9
+#  ps -ef | grep "run_zhiqingtiantiankan_recommend.py" | grep -v "grep"
 #  if [ "$?" -eq 1 ];then
-#    echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
-#    if [ ${env} = "dev" ];then
-#      cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./jixiangxingfu/jixiangxingfu_main/run_jixiangxingfu_recommend.py --log_type="recommend" --crawler="jixiangxingfu" --env="dev" jixiangxingfu/logs/nohup-recommend.log
-#    else
-#      cd ${piaoquan_crawler_dir} && /bin/sh main/scheduling_main.sh ./jixiangxingfu/jixiangxingfu_main/run_jixiangxingfu_recommend.py --log_type="recommend" --crawler="jixiangxingfu" --env="prod" jixiangxingfu/logs/nohup-recommend.log
-#    fi
+#    echo "$(date "+%Y-%m-%d %H:%M:%S") 知青天天看小程序爬虫, 异常停止, 正在重启!" >> ${log_path}
+#    cd ${piaoquan_crawler_dir}
+#    nohup python3 -u zhiqingtiantiankan/zhiqingtiantiankan_main/run_zhiqingtiantiankan_recommend.py --log_type="recommend" --crawler="zhiqingtiantiankan" --env=${env} >>zhiqingtiantiankan/logs/nohup-recommend.log 2>&1 &
 #    echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
 #  else
-#    echo "$(date "+%Y-%m-%d %H:%M:%S") 吉祥幸福 进程状态正常" >> ${log_path}
+#    echo "$(date "+%Y-%m-%d %H:%M:%S") 知青天天看小程序爬虫, 进程状态正常" >> ${log_path}
 #  fi
 #else
-#  echo "吉祥幸福 爬虫脚本任务结束" >> ${log_path}
+#  echo "$(date "+%Y-%m-%d %H:%M:%S") 知青天天看 爬虫脚本任务结束" >> ${log_path}
 #fi
 
-## 知青天天看
-#if [[ "$time" > "09:00:0" ]] && [[ "$time" < "12:59:59" ]]; then
-#  echo "开始启动 知青天天看 爬虫脚本任务" >> ${log_path}
-#
+## 刚刚都传
+#if [[ "$time" > "13:00:0" ]] && [[ "$time" < "16:59:59" ]]; then
+#  echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 刚刚都传 爬虫脚本任务" >> ${log_path}
+#  ps aux | grep run_zhongmiaoyinxin | grep -v grep | awk '{print $2}' | xargs kill -9
+#  ps aux | grep run_zhiqingtiantiankan | grep -v grep | awk '{print $2}' | xargs kill -9
+#  ps aux | grep run_jixiangxingfu | grep -v grep | awk '{print $2}' | xargs kill -9
+#  ps -ef | grep "run_ganggangdouchuan_recommend.py" | grep -v "grep"
+#  if [ "$?" -eq 1 ];then
+#    echo "$(date "+%Y-%m-%d %H:%M:%S") 刚刚都传小程序爬虫, 异常停止, 正在重启!" >> ${log_path}
+#    cd ${piaoquan_crawler_dir}
+#    nohup python3 -u ganggangdouchuan/ganggangdouchuan_main/run_ganggangdouchuan_recommend.py --log_type="recommend" --crawler="ganggangdouchuan" --env=${env} >>ganggangdouchuan/logs/nohup-recommend.log 2>&1 &
+#    echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
+#  else
+#    echo "$(date "+%Y-%m-%d %H:%M:%S") 刚刚都传小程序爬虫, 进程状态正常" >> ${log_path}
+#  fi
 #else
-#  echo "知青天天看 爬虫脚本任务结束" >> ${log_path}
+#  echo "$(date "+%Y-%m-%d %H:%M:%S") 刚刚都传小程序爬虫, 任务结束" >> ${log_path}
 #fi
 
-# 刚刚都传
-if [[ "$time" > "10:00:0" ]] && [[ "$time" < "22:59:59" ]]; then
-  echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 刚刚都传 爬虫脚本任务" >> ${log_path}
-  ps aux | grep run_zhongmiaoyinxin | grep -v grep | awk '{print $2}' | xargs kill -9
-  ps aux | grep run_zhiqingtiantiankan | grep -v grep | awk '{print $2}' | xargs kill -9
-  ps aux | grep run_jixiangxingfu | grep -v grep | awk '{print $2}' | xargs kill -9
-  ps -ef | grep "run_ganggangdouchuan_recommend.py" | grep -v "grep"
-  if [ "$?" -eq 1 ];then
-    echo "$(date "+%Y-%m-%d_%H:%M:%S") 刚刚都传小程序爬虫, 异常停止, 正在重启!" >> ${log_path}
-    cd ${piaoquan_crawler_dir}
-    nohup python3 -u ganggangdouchuan/ganggangdouchuan_main/run_ganggangdouchuan_recommend.py --log_type="recommend" --crawler="ganggangdouchuan" --env=${env} >>ganggangdouchuan/logs/nohup-recommend.log 2>&1 &
-    echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
-  else
-    echo "$(date "+%Y-%m-%d %H:%M:%S") 刚刚都传小程序爬虫, 进程状态正常" >> ${log_path}
-  fi
-else
-  echo "$(date "+%Y-%m-%d %H:%M:%S") 刚刚都传小程序爬虫, 任务结束" >> ${log_path}
-fi
-
 ## 众妙音信
-#if [[ "$time" > "17:00:0" ]] && [[ "$time" < "23:59:59" ]]; then
-#  echo "开始启动 众妙音信 爬虫脚本任务" >> ${log_path}
+#if [[ "$time" > "10:00:0" ]] && [[ "$time" < "23:59:59" ]]; then
+#  echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 众妙音信 爬虫脚本任务" >> ${log_path}
+#  ps aux | grep run_ganggangdouchuan | grep -v grep | awk '{print $2}' | xargs kill -9
+#  ps aux | grep run_zhiqingtiantiankan | grep -v grep | awk '{print $2}' | xargs kill -9
+#  ps aux | grep run_jixiangxingfu | grep -v grep | awk '{print $2}' | xargs kill -9
+#  ps -ef | grep "run_zhongmiaoyinxin_recommend.py" | grep -v "grep"
+#  if [ "$?" -eq 1 ];then
+#    echo "$(date "+%Y-%m-%d %H:%M:%S") 众妙音信小程序爬虫, 异常停止, 正在重启!" >> ${log_path}
+#    cd ${piaoquan_crawler_dir}
+#    nohup python3 -u zhongmiaoyinxin/zhongmiaoyinxin_main/run_zhongmiaoyinxin_recommend.py --log_type="recommend" --crawler="zhongmiaoyinxin" --env=${env} >>zhongmiaoyinxin/logs/nohup-recommend.log 2>&1 &
+#    echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
+#  else
+#    echo "$(date "+%Y-%m-%d %H:%M:%S") 众妙音信小程序爬虫, 进程状态正常" >> ${log_path}
+#  fi
 #
 #else
-#  echo "众妙音信 爬虫脚本任务结束" >> ${log_path}
+#  echo "$(date "+%Y-%m-%d %H:%M:%S") 众妙音信 爬虫脚本任务结束" >> ${log_path}
 #fi
 
-
 # 删除日志
 echo "$(date "+%Y-%m-%d %H:%M:%S") 开始清理 10 天前的日志文件" >> ${log_path}
 find ${piaoquan_crawler_dir}main/main_logs/ -mtime +10 -name "*.log" -exec rm -rf {} \;

+ 335 - 335
xiaoniangao/xiaoniangao_follow/xiaoniangao_follow.py

@@ -76,234 +76,234 @@ class XiaoniangaoFollow:
     # 获取个人主页视频
     @classmethod
     def get_videoList(cls, log_type, crawler, strategy, p_mid, oss_endpoint, env):
-        try:
-            while True:
-                url = "https://api.xiaoniangao.cn/profile/list_album"
-                headers = {
-                    # "X-Mid": str(cls.follow_x_mid),
-                    "X-Mid": '1fb47aa7a860d9',
-                    # "X-Token-Id": str(cls.follow_x_token_id),
-                    "X-Token-Id": '9f2cb91f9952c107ecb73642083e1dec-1145266232',
-                    "content-type": "application/json",
-                    # "uuid": str(cls.follow_uid),
-                    "uuid": 'f40c2e7c-3cfb-4804-b513-608c0280268c',
-                    "Accept-Encoding": "gzip,compress,br,deflate",
-                    "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)"
-                                  " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 "
-                                  "MicroMessenger/8.0.20(0x18001435) NetType/WIFI Language/zh_CN",
-                    # "Referer": str(cls.follow_referer)
-                    "Referer": 'https://servicewechat.com/wxd7911e4c177690e4/654/page-frame.html'
+        # try:
+        while True:
+            url = "https://api.xiaoniangao.cn/profile/list_album"
+            headers = {
+                # "X-Mid": str(cls.follow_x_mid),
+                "X-Mid": '1fb47aa7a860d9',
+                # "X-Token-Id": str(cls.follow_x_token_id),
+                "X-Token-Id": '9f2cb91f9952c107ecb73642083e1dec-1145266232',
+                "content-type": "application/json",
+                # "uuid": str(cls.follow_uid),
+                "uuid": 'f40c2e7c-3cfb-4804-b513-608c0280268c',
+                "Accept-Encoding": "gzip,compress,br,deflate",
+                "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)"
+                              " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 "
+                              "MicroMessenger/8.0.20(0x18001435) NetType/WIFI Language/zh_CN",
+                # "Referer": str(cls.follow_referer)
+                "Referer": 'https://servicewechat.com/wxd7911e4c177690e4/654/page-frame.html'
+            }
+            json_text = {
+                "visited_mid": str(p_mid),
+                "start_t": cls.next_t,
+                "qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!690x385r/crop/690x385/interlace/1/format/jpg",
+                "h_qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!120x120r/crop/120x120/interlace/1/format/jpg",
+                "limit": 20,
+                # "token": str(cls.follow_token),
+                "token": '54e4c603f7bf3dc009c86b49ed91be36',
+                # "uid": str(cls.follow_uid),
+                "uid": 'f40c2e7c-3cfb-4804-b513-608c0280268c',
+                "proj": "ma",
+                "wx_ver": "8.0.23",
+                "code_ver": "3.68.0",
+                "log_common_params": {
+                    "e": [{
+                        "data": {
+                            "page": "profilePage",
+                            "topic": "public"
+                        }
+                    }],
+                    "ext": {
+                        "brand": "iPhone",
+                        "device": "iPhone 11",
+                        "os": "iOS 14.7.1",
+                        "weixinver": "8.0.23",
+                        "srcver": "2.24.7",
+                        "net": "wifi",
+                        "scene": "1089"
+                    },
+                    "pj": "1",
+                    "pf": "2",
+                    "session_id": "7468cf52-00ea-432e-8505-6ea3ad7ec164"
                 }
-                json_text = {
-                    "visited_mid": str(p_mid),
-                    "start_t": cls.next_t,
-                    "qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!690x385r/crop/690x385/interlace/1/format/jpg",
-                    "h_qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!120x120r/crop/120x120/interlace/1/format/jpg",
-                    "limit": 20,
-                    # "token": str(cls.follow_token),
-                    "token": '54e4c603f7bf3dc009c86b49ed91be36',
-                    # "uid": str(cls.follow_uid),
-                    "uid": 'f40c2e7c-3cfb-4804-b513-608c0280268c',
-                    "proj": "ma",
-                    "wx_ver": "8.0.23",
-                    "code_ver": "3.68.0",
-                    "log_common_params": {
-                        "e": [{
-                            "data": {
-                                "page": "profilePage",
-                                "topic": "public"
-                            }
-                        }],
-                        "ext": {
-                            "brand": "iPhone",
-                            "device": "iPhone 11",
-                            "os": "iOS 14.7.1",
-                            "weixinver": "8.0.23",
-                            "srcver": "2.24.7",
-                            "net": "wifi",
-                            "scene": "1089"
-                        },
-                        "pj": "1",
-                        "pf": "2",
-                        "session_id": "7468cf52-00ea-432e-8505-6ea3ad7ec164"
-                    }
-                }
-                urllib3.disable_warnings()
-                r = requests.post(url=url, headers=headers, json=json_text, proxies=proxies, verify=False)
-                if 'data' not in r.text or r.status_code != 200:
-                    Common.logger(log_type, crawler).info(f"get_videoList:{r.text}\n")
-                    cls.next_t = None
-                    return
-                elif 'list' not in r.json()['data']:
-                    Common.logger(log_type, crawler).info(f"get_videoList:{r.json()}\n")
-                    cls.next_t = None
-                    return
-                elif len(r.json()['data']['list']) == 0:
-                    Common.logger(log_type, crawler).info(f"没有更多数据啦~\n")
-                    cls.next_t = None
-                    return
-                else:
-                    cls.next_t = r.json()["data"]["next_t"]
-                    feeds = r.json()["data"]["list"]
-                    for i in range(len(feeds)):
-                        # 标题,表情随机加在片头、片尾,或替代句子中间的标点符号
-                        char_sheet = [['📍', '...'], ['⭕️', '~~'], ['🔥', None], ['📣', None], ['🎈', None], ['⚡', None],
-                                      ['🔔', None], ['🚩', None], ['💢', None], ['💎', None], ['👉', None], ['💓', None],
-                                      ['❗️', None], ['🔴', None], ['🔺', None], ['♦️', None], ['♥️', None], ['👉', None],
-                                      ['👈', None], ['🏆', None], ['❤️\u200d🔥', None]]
-                        expression_list = []
-                        char_list = []
-                        for q in range(len(char_sheet)):
-                            if char_sheet[q][0] is not None:
-                                expression_list.append(char_sheet[q][0])
-                            if char_sheet[q][1] is not None:
-                                char_list.append(char_sheet[q][1])
-                        befor_video_title = feeds[i]["title"].strip().replace("\n", "") \
-                            .replace("/", "").replace("\r", "").replace("#", "") \
-                            .replace(".", "。").replace("\\", "").replace("&NBSP", "") \
-                            .replace(":", "").replace("*", "").replace("?", "") \
-                            .replace("?", "").replace('"', "").replace("<", "") \
-                            .replace(">", "").replace("|", "").replace(" ", "").replace('"', '').replace("'", '')
-                        expression = random.choice(expression_list)
-                        expression_title_list = [expression + befor_video_title, befor_video_title + expression]
-                        # 标题,表情随机加在片头
-                        title_list1 = random.choice(expression_title_list)
-                        # 标题,表情随机加在片尾
-                        title_list2 = befor_video_title + random.choice(char_list)
-                        # # 替代句子中间的标点符号
-                        # title_list3 = befor_video_title.replace(
-                        #     ",", random.choice(expression_list)).replace(",", random.choice(expression_list))
-                        title_list4 = [title_list1, title_list2]
-                        video_title = random.choice(title_list4)
-
-                        # 用户名
-                        user_name = feeds[i]["album_user"]["nick"].strip().replace("\n", "") \
-                            .replace("/", "").replace("快手", "").replace(" ", "") \
-                            .replace(" ", "").replace("&NBSP", "").replace("\r", "")
-
-                        # 视频 ID
-                        if "vid" in feeds[i]:
-                            video_id = feeds[i]["vid"]
-                        else:
-                            video_id = 0
-
-                        # 播放量
-                        if "play_pv" in feeds[i]:
-                            video_play_cnt = feeds[i]["play_pv"]
-                        else:
-                            video_play_cnt = 0
-
-                        # 点赞
-                        if "total" in feeds[i]["favor"]:
-                            video_like_cnt = feeds[i]["favor"]["total"]
-                        else:
-                            video_like_cnt = 0
-
-                        # 评论数
-                        if "comment_count" in feeds[i]:
-                            video_comment_cnt = feeds[i]["comment_count"]
-                        else:
-                            video_comment_cnt = 0
-
-                        # 分享
-                        if "share" in feeds[i]:
-                            video_share_cnt = feeds[i]["share"]
-                        else:
-                            video_share_cnt = 0
-
-                        # 时长
-                        if "du" in feeds[i]:
-                            video_duration = int(feeds[i]["du"] / 1000)
-                        else:
-                            video_duration = 0
-
-                        # 发布时间
-                        if "t" in feeds[i]:
-                            publish_time_stamp = int(feeds[i]["t"] / 1000)
-                        else:
-                            publish_time_stamp = 0
-                        publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
-
-                        # 宽和高
-                        if "w" in feeds[i] or "h" in feeds[i]:
-                            video_width = feeds[i]["w"]
-                            video_height = feeds[i]["h"]
-                        else:
-                            video_width = 0
-                            video_height = 0
-
-                        # 头像
-                        if "hurl" in feeds[i]["album_user"]:
-                            head_url = feeds[i]["album_user"]["hurl"]
-                        else:
-                            head_url = 0
-
-                        # 用户 ID
-                        if "id" in feeds[i]:
-                            profile_id = feeds[i]["id"]
-                        else:
-                            profile_id = 0
-
-                        # 用户 mid
-                        if "mid" in feeds[i]:
-                            profile_mid = feeds[i]["mid"]
-                        else:
-                            profile_mid = 0
-
-                        # 封面
-                        if "url" in feeds[i]:
-                            cover_url = feeds[i]["url"]
-                        else:
-                            cover_url = 0
-
-                        # 视频播放地址
-                        if "v_url" in feeds[i]:
-                            video_url = feeds[i]["v_url"]
-                        else:
-                            video_url = 0
-
-                        # 过滤无效视频
-                        if video_id == 0 \
-                                or video_title == 0 \
-                                or publish_time_stamp == 0 \
-                                or video_duration == 0 \
-                                or video_url == 0:
-                            Common.logger(log_type, crawler).info("无效视频\n")
-                        elif int(time.time()) - publish_time_stamp > 3600 * 24 * 3:
-                            Common.logger(log_type, crawler).info(f"发布时间超过3天:{publish_time_str}\n")
-                            cls.next_t = None
-                            return
-                        else:
-                            video_dict = {
-                                "video_id": video_id,
-                                "video_title": video_title,
-                                "duration": video_duration,
-                                "play_cnt": video_play_cnt,
-                                "like_cnt": video_like_cnt,
-                                "comment_cnt": video_comment_cnt,
-                                "share_cnt": video_share_cnt,
-                                "user_name": user_name,
-                                "publish_time_stamp": publish_time_stamp,
-                                "publish_time_str": publish_time_str,
-                                "video_width": video_width,
-                                "video_height": video_height,
-                                "avatar_url": head_url,
-                                "profile_id": profile_id,
-                                "profile_mid": profile_mid,
-                                "cover_url": cover_url,
-                                "video_url": video_url,
-                                "session": f"xiaoniangao-follow-{int(time.time())}"
-                            }
-                            for k, v in video_dict.items():
-                                Common.logger(log_type, crawler).info(f"{k}:{v}")
-                            cls.download_publish(log_type=log_type,
-                                                 crawler=crawler,
-                                                 strategy=strategy,
-                                                 video_dict=video_dict,
-                                                 oss_endpoint=oss_endpoint,
-                                                 env=env)
-        except Exception as error:
-            Common.logger(log_type, crawler).error(f"获取个人主页视频异常:{error}\n")
+            }
+            urllib3.disable_warnings()
+            r = requests.post(url=url, headers=headers, json=json_text, proxies=proxies, verify=False)
+            if 'data' not in r.text or r.status_code != 200:
+                Common.logger(log_type, crawler).info(f"get_videoList:{r.text}\n")
+                cls.next_t = None
+                return
+            elif 'list' not in r.json()['data']:
+                Common.logger(log_type, crawler).info(f"get_videoList:{r.json()}\n")
+                cls.next_t = None
+                return
+            elif len(r.json()['data']['list']) == 0:
+                Common.logger(log_type, crawler).info(f"没有更多数据啦~\n")
+                cls.next_t = None
+                return
+            else:
+                cls.next_t = r.json()["data"]["next_t"]
+                feeds = r.json()["data"]["list"]
+                for i in range(len(feeds)):
+                    # 标题,表情随机加在片头、片尾,或替代句子中间的标点符号
+                    char_sheet = [['📍', '...'], ['⭕️', '~~'], ['🔥', None], ['📣', None], ['🎈', None], ['⚡', None],
+                                  ['🔔', None], ['🚩', None], ['💢', None], ['💎', None], ['👉', None], ['💓', None],
+                                  ['❗️', None], ['🔴', None], ['🔺', None], ['♦️', None], ['♥️', None], ['👉', None],
+                                  ['👈', None], ['🏆', None], ['❤️\u200d🔥', None]]
+                    expression_list = []
+                    char_list = []
+                    for q in range(len(char_sheet)):
+                        if char_sheet[q][0] is not None:
+                            expression_list.append(char_sheet[q][0])
+                        if char_sheet[q][1] is not None:
+                            char_list.append(char_sheet[q][1])
+                    befor_video_title = feeds[i]["title"].strip().replace("\n", "") \
+                        .replace("/", "").replace("\r", "").replace("#", "") \
+                        .replace(".", "。").replace("\\", "").replace("&NBSP", "") \
+                        .replace(":", "").replace("*", "").replace("?", "") \
+                        .replace("?", "").replace('"', "").replace("<", "") \
+                        .replace(">", "").replace("|", "").replace(" ", "").replace('"', '').replace("'", '')
+                    expression = random.choice(expression_list)
+                    expression_title_list = [expression + befor_video_title, befor_video_title + expression]
+                    # 标题,表情随机加在片头
+                    title_list1 = random.choice(expression_title_list)
+                    # 标题,表情随机加在片尾
+                    title_list2 = befor_video_title + random.choice(char_list)
+                    # # 替代句子中间的标点符号
+                    # title_list3 = befor_video_title.replace(
+                    #     ",", random.choice(expression_list)).replace(",", random.choice(expression_list))
+                    title_list4 = [title_list1, title_list2]
+                    video_title = random.choice(title_list4)
+
+                    # 用户名
+                    user_name = feeds[i]["album_user"]["nick"].strip().replace("\n", "") \
+                        .replace("/", "").replace("快手", "").replace(" ", "") \
+                        .replace(" ", "").replace("&NBSP", "").replace("\r", "")
+
+                    # 视频 ID
+                    if "vid" in feeds[i]:
+                        video_id = feeds[i]["vid"]
+                    else:
+                        video_id = 0
+
+                    # 播放量
+                    if "play_pv" in feeds[i]:
+                        video_play_cnt = feeds[i]["play_pv"]
+                    else:
+                        video_play_cnt = 0
+
+                    # 点赞
+                    if "total" in feeds[i]["favor"]:
+                        video_like_cnt = feeds[i]["favor"]["total"]
+                    else:
+                        video_like_cnt = 0
+
+                    # 评论数
+                    if "comment_count" in feeds[i]:
+                        video_comment_cnt = feeds[i]["comment_count"]
+                    else:
+                        video_comment_cnt = 0
+
+                    # 分享
+                    if "share" in feeds[i]:
+                        video_share_cnt = feeds[i]["share"]
+                    else:
+                        video_share_cnt = 0
+
+                    # 时长
+                    if "du" in feeds[i]:
+                        video_duration = int(feeds[i]["du"] / 1000)
+                    else:
+                        video_duration = 0
+
+                    # 发布时间
+                    if "t" in feeds[i]:
+                        publish_time_stamp = int(feeds[i]["t"] / 1000)
+                    else:
+                        publish_time_stamp = 0
+                    publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
+
+                    # 宽和高
+                    if "w" in feeds[i] or "h" in feeds[i]:
+                        video_width = feeds[i]["w"]
+                        video_height = feeds[i]["h"]
+                    else:
+                        video_width = 0
+                        video_height = 0
+
+                    # 头像
+                    if "hurl" in feeds[i]["album_user"]:
+                        head_url = feeds[i]["album_user"]["hurl"]
+                    else:
+                        head_url = 0
+
+                    # 用户 ID
+                    if "id" in feeds[i]:
+                        profile_id = feeds[i]["id"]
+                    else:
+                        profile_id = 0
+
+                    # 用户 mid
+                    if "mid" in feeds[i]:
+                        profile_mid = feeds[i]["mid"]
+                    else:
+                        profile_mid = 0
+
+                    # 封面
+                    if "url" in feeds[i]:
+                        cover_url = feeds[i]["url"]
+                    else:
+                        cover_url = 0
+
+                    # 视频播放地址
+                    if "v_url" in feeds[i]:
+                        video_url = feeds[i]["v_url"]
+                    else:
+                        video_url = 0
+
+                    # 过滤无效视频
+                    if video_id == 0 \
+                            or video_title == 0 \
+                            or publish_time_stamp == 0 \
+                            or video_duration == 0 \
+                            or video_url == 0:
+                        Common.logger(log_type, crawler).info("无效视频\n")
+                    elif int(time.time()) - publish_time_stamp > 3600 * 24 * 3:
+                        Common.logger(log_type, crawler).info(f"发布时间超过3天:{publish_time_str}\n")
+                        cls.next_t = None
+                        return
+                    else:
+                        video_dict = {
+                            "video_id": video_id,
+                            "video_title": video_title,
+                            "duration": video_duration,
+                            "play_cnt": video_play_cnt,
+                            "like_cnt": video_like_cnt,
+                            "comment_cnt": video_comment_cnt,
+                            "share_cnt": video_share_cnt,
+                            "user_name": user_name,
+                            "publish_time_stamp": publish_time_stamp,
+                            "publish_time_str": publish_time_str,
+                            "video_width": video_width,
+                            "video_height": video_height,
+                            "avatar_url": head_url,
+                            "profile_id": profile_id,
+                            "profile_mid": profile_mid,
+                            "cover_url": cover_url,
+                            "video_url": video_url,
+                            "session": f"xiaoniangao-follow-{int(time.time())}"
+                        }
+                        for k, v in video_dict.items():
+                            Common.logger(log_type, crawler).info(f"{k}:{v}")
+                        cls.download_publish(log_type=log_type,
+                                             crawler=crawler,
+                                             strategy=strategy,
+                                             video_dict=video_dict,
+                                             oss_endpoint=oss_endpoint,
+                                             env=env)
+        # except Exception as error:
+        #     Common.logger(log_type, crawler).error(f"获取个人主页视频异常:{error}\n")
 
     @classmethod
     def repeat_video(cls, log_type, crawler, video_id, env):
@@ -314,117 +314,117 @@ class XiaoniangaoFollow:
     # 下载/上传
     @classmethod
     def download_publish(cls, log_type, crawler, strategy, video_dict, oss_endpoint, env):
-        try:
-            if cls.download_rule(video_dict) is False:
-                Common.logger(log_type, crawler).info("不满足基础门槛\n")
-            elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
-                Common.logger(log_type, crawler).info('视频已下载\n')
-            elif any(str(word) if str(word) in video_dict['video_title'] else False for word in
-                     filter_word(log_type, crawler, "小年糕", env)) is True:
-                Common.logger(log_type, crawler).info("视频已中过滤词\n")
+        # try:
+        if cls.download_rule(video_dict) is False:
+            Common.logger(log_type, crawler).info("不满足基础门槛\n")
+        elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
+            Common.logger(log_type, crawler).info('视频已下载\n')
+        elif any(str(word) if str(word) in video_dict['video_title'] else False for word in
+                 filter_word(log_type, crawler, "小年糕", env)) is True:
+            Common.logger(log_type, crawler).info("视频已中过滤词\n")
+        else:
+            # 下载封面
+            Common.download_method(log_type=log_type, crawler=crawler, text="cover",
+                                   title=video_dict["video_title"], url=video_dict["cover_url"])
+            # 下载视频
+            Common.download_method(log_type=log_type, crawler=crawler, text="video",
+                                   title=video_dict["video_title"], url=video_dict["video_url"])
+            # 保存视频信息至 "./videos/{download_video_title}/info.txt"
+            Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
+
+            # 上传视频
+            Common.logger(log_type, crawler).info("开始上传视频...")
+            our_video_id = Publish.upload_and_publish(log_type=log_type,
+                                                      crawler=crawler,
+                                                      strategy=strategy,
+                                                      our_uid="follow",
+                                                      env=env,
+                                                      oss_endpoint=oss_endpoint)
+            if env == "dev":
+                our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
             else:
-                # 下载封面
-                Common.download_method(log_type=log_type, crawler=crawler, text="cover",
-                                       title=video_dict["video_title"], url=video_dict["cover_url"])
-                # 下载视频
-                Common.download_method(log_type=log_type, crawler=crawler, text="video",
-                                       title=video_dict["video_title"], url=video_dict["video_url"])
-                # 保存视频信息至 "./videos/{download_video_title}/info.txt"
-                Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
-
-                # 上传视频
-                Common.logger(log_type, crawler).info("开始上传视频...")
-                our_video_id = Publish.upload_and_publish(log_type=log_type,
-                                                          crawler=crawler,
-                                                          strategy=strategy,
-                                                          our_uid="follow",
-                                                          env=env,
-                                                          oss_endpoint=oss_endpoint)
-                if env == "dev":
-                    our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-                else:
-                    our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-                Common.logger(log_type, crawler).info("视频上传完成")
-
-                if our_video_id is None:
-                    # 删除视频文件夹
-                    shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
-                    return
-
-                # 视频信息保存数据库
-                rule_dict = {
-                    "duration": {"min": 40, "max": 100000000},
-                    "play_cnt": {"min": 500}
-                }
-
-                insert_sql = f""" insert into crawler_video(video_id,
-                                                out_user_id,
-                                                platform,
-                                                strategy,
-                                                out_video_id,
-                                                video_title,
-                                                cover_url,
-                                                video_url,
-                                                duration,
-                                                publish_time,
-                                                play_cnt,
-                                                crawler_rule,
-                                                width,
-                                                height)
-                                                values({our_video_id},
-                                                "{video_dict['profile_id']}",
-                                                "{cls.platform}",
-                                                "定向爬虫策略",
-                                                "{video_dict['video_id']}",
-                                                "{video_dict['video_title']}",
-                                                "{video_dict['cover_url']}",
-                                                "{video_dict['video_url']}",
-                                                {int(video_dict['duration'])},
-                                                "{video_dict['publish_time_str']}",
-                                                {int(video_dict['play_cnt'])},
-                                                '{json.dumps(rule_dict)}',
-                                                {int(video_dict['video_width'])},
-                                                {int(video_dict['video_height'])}) """
-                Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
-                MysqlHelper.update_values(log_type, crawler, insert_sql, env)
-                Common.logger(log_type, crawler).info('视频信息插入数据库成功!')
-
-                # 视频写入飞书
-                Feishu.insert_columns(log_type, crawler, "Wu0CeL", "ROWS", 1, 2)
-                # 视频ID工作表,首行写入数据
-                upload_time = int(time.time())
-                values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
-                           "用户主页",
-                           str(video_dict['video_id']),
-                           str(video_dict['video_title']),
-                           our_video_link,
-                           video_dict['play_cnt'],
-                           video_dict['comment_cnt'],
-                           video_dict['like_cnt'],
-                           video_dict['share_cnt'],
-                           video_dict['duration'],
-                           f"{video_dict['video_width']}*{video_dict['video_height']}",
-                           str(video_dict['publish_time_str']),
-                           str(video_dict['user_name']),
-                           str(video_dict['profile_id']),
-                           str(video_dict['profile_mid']),
-                           str(video_dict['avatar_url']),
-                           str(video_dict['cover_url']),
-                           str(video_dict['video_url'])]]
-                time.sleep(1)
-                Feishu.update_values(log_type, crawler, "Wu0CeL", "F2:Z2", values)
-                Common.logger(log_type, crawler).info('视频信息写入飞书成功\n')
-
-        except Exception as e:
-            Common.logger(log_type, crawler).error("下载/上传异常:{}", e)
+                our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
+            Common.logger(log_type, crawler).info("视频上传完成")
+
+            if our_video_id is None:
+                # 删除视频文件夹
+                shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
+                return
+
+            # 视频信息保存数据库
+            rule_dict = {
+                "duration": {"min": 40, "max": 100000000},
+                "play_cnt": {"min": 500}
+            }
+
+            insert_sql = f""" insert into crawler_video(video_id,
+                                            out_user_id,
+                                            platform,
+                                            strategy,
+                                            out_video_id,
+                                            video_title,
+                                            cover_url,
+                                            video_url,
+                                            duration,
+                                            publish_time,
+                                            play_cnt,
+                                            crawler_rule,
+                                            width,
+                                            height)
+                                            values({our_video_id},
+                                            "{video_dict['profile_id']}",
+                                            "{cls.platform}",
+                                            "定向爬虫策略",
+                                            "{video_dict['video_id']}",
+                                            "{video_dict['video_title']}",
+                                            "{video_dict['cover_url']}",
+                                            "{video_dict['video_url']}",
+                                            {int(video_dict['duration'])},
+                                            "{video_dict['publish_time_str']}",
+                                            {int(video_dict['play_cnt'])},
+                                            '{json.dumps(rule_dict)}',
+                                            {int(video_dict['video_width'])},
+                                            {int(video_dict['video_height'])}) """
+            Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+            MysqlHelper.update_values(log_type, crawler, insert_sql, env)
+            Common.logger(log_type, crawler).info('视频信息插入数据库成功!')
+
+            # 视频写入飞书
+            Feishu.insert_columns(log_type, crawler, "Wu0CeL", "ROWS", 1, 2)
+            # 视频ID工作表,首行写入数据
+            upload_time = int(time.time())
+            values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
+                       "用户主页",
+                       str(video_dict['video_id']),
+                       str(video_dict['video_title']),
+                       our_video_link,
+                       video_dict['play_cnt'],
+                       video_dict['comment_cnt'],
+                       video_dict['like_cnt'],
+                       video_dict['share_cnt'],
+                       video_dict['duration'],
+                       f"{video_dict['video_width']}*{video_dict['video_height']}",
+                       str(video_dict['publish_time_str']),
+                       str(video_dict['user_name']),
+                       str(video_dict['profile_id']),
+                       str(video_dict['profile_mid']),
+                       str(video_dict['avatar_url']),
+                       str(video_dict['cover_url']),
+                       str(video_dict['video_url'])]]
+            time.sleep(1)
+            Feishu.update_values(log_type, crawler, "Wu0CeL", "F2:Z2", values)
+            Common.logger(log_type, crawler).info('视频信息写入飞书成功\n')
+
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).error("下载/上传异常:{}", e)
 
     # 获取所有关注列表的用户视频
     @classmethod
     def get_follow_videos(cls, log_type, crawler, strategy, oss_endpoint, env):
-        try:
-            # 已关注的用户列表 mids
-            user_list = cls.get_users()
-            for user in user_list:
+        # 已关注的用户列表 mids
+        user_list = cls.get_users()
+        for user in user_list:
+            try:
                 user_name = user['user_name']
                 profile_mid = user['profile_mid']
                 Common.logger(log_type, crawler).info(f"获取 {user_name} 主页视频")
@@ -436,8 +436,8 @@ class XiaoniangaoFollow:
                                   env=env)
                 cls.next_t = None
                 time.sleep(1)
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"get_follow_videos:{e}\n")
+            except Exception as e:
+                Common.logger(log_type, crawler).error(f"get_follow_videos:{e}\n")
 
 
 if __name__ == "__main__":

+ 59 - 59
xiaoniangao/xiaoniangao_hour/xiaoniangao_hour.py

@@ -471,65 +471,65 @@ class XiaoniangaoHour:
         """
         更新小时榜数据
         """
-        try:
-            befor_yesterday = (datetime.date.today() + datetime.timedelta(days=-3)).strftime("%Y-%m-%d %H:%M:%S")
-            update_time_stamp = int(time.mktime(time.strptime(befor_yesterday, "%Y-%m-%d %H:%M:%S")))
-            select_sql = f""" select * from crawler_xiaoniangao_hour where crawler_time_stamp >= {update_time_stamp} GROUP BY out_video_id """
-            update_video_list = MysqlHelper.get_values(log_type, crawler, select_sql, env)
-            if len(update_video_list) == 0:
-                Common.logger(log_type, crawler).info("暂无需要更新的小时榜数据\n")
-                return
-            for update_video_info in update_video_list:
-                profile_id = update_video_info["profile_id"]
-                profile_mid = update_video_info["profile_mid"]
-                video_title = update_video_info["video_title"]
-                video_id = update_video_info["out_video_id"]
-                if datetime.datetime.now().hour == 10 and datetime.datetime.now().minute <= 10:
-                    video_info_dict = cls.get_video_info(log_type=log_type,
-                                                         crawler=crawler,
-                                                         p_id=profile_id,
-                                                         p_mid=profile_mid,
-                                                         v_title=video_title,
-                                                         v_id=video_id)
-                    ten_play_cnt = video_info_dict['play_cnt']
-                    Common.logger(log_type, crawler).info(f"ten_play_cnt:{ten_play_cnt}")
-                    update_sql = f""" update crawler_xiaoniangao_hour set ten_play_cnt={ten_play_cnt} WHERE out_video_id="{video_id}"; """
-                    # Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
-                    MysqlHelper.update_values(log_type, crawler, update_sql, env)
-                    cls.download_publish(log_type, crawler, video_info_dict, update_video_info, strategy, oss_endpoint,
-                                         env)
-                elif datetime.datetime.now().hour == 15 and datetime.datetime.now().minute <= 10:
-                    video_info_dict = cls.get_video_info(log_type=log_type,
-                                                         crawler=crawler,
-                                                         p_id=profile_id,
-                                                         p_mid=profile_mid,
-                                                         v_title=video_title,
-                                                         v_id=video_id)
-                    fifteen_play_cnt = video_info_dict['play_cnt']
-                    Common.logger(log_type, crawler).info(f"fifteen_play_cnt:{fifteen_play_cnt}")
-                    update_sql = f""" update crawler_xiaoniangao_hour set fifteen_play_cnt={fifteen_play_cnt} WHERE out_video_id="{video_id}"; """
-                    # Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
-                    MysqlHelper.update_values(log_type, crawler, update_sql, env)
-                    cls.download_publish(log_type, crawler, video_info_dict, update_video_info, strategy, oss_endpoint,
-                                         env)
-                elif datetime.datetime.now().hour == 20 and datetime.datetime.now().minute <= 10:
-                    video_info_dict = cls.get_video_info(log_type=log_type,
-                                                         crawler=crawler,
-                                                         p_id=profile_id,
-                                                         p_mid=profile_mid,
-                                                         v_title=video_title,
-                                                         v_id=video_id)
-                    twenty_play_cnt = video_info_dict['play_cnt']
-                    Common.logger(log_type, crawler).info(f"twenty_play_cnt:{twenty_play_cnt}")
-                    update_sql = f""" update crawler_xiaoniangao_hour set twenty_play_cnt={twenty_play_cnt} WHERE out_video_id="{video_id}"; """
-                    # Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
-                    MysqlHelper.update_values(log_type, crawler, update_sql, env)
-                    cls.download_publish(log_type, crawler, video_info_dict, update_video_info, strategy, oss_endpoint,
-                                         env)
-                else:
-                    pass
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"update_videoList:{e}\n")
+        # try:
+        befor_yesterday = (datetime.date.today() + datetime.timedelta(days=-3)).strftime("%Y-%m-%d %H:%M:%S")
+        update_time_stamp = int(time.mktime(time.strptime(befor_yesterday, "%Y-%m-%d %H:%M:%S")))
+        select_sql = f""" select * from crawler_xiaoniangao_hour where crawler_time_stamp >= {update_time_stamp} GROUP BY out_video_id """
+        update_video_list = MysqlHelper.get_values(log_type, crawler, select_sql, env)
+        if len(update_video_list) == 0:
+            Common.logger(log_type, crawler).info("暂无需要更新的小时榜数据\n")
+            return
+        for update_video_info in update_video_list:
+            profile_id = update_video_info["profile_id"]
+            profile_mid = update_video_info["profile_mid"]
+            video_title = update_video_info["video_title"]
+            video_id = update_video_info["out_video_id"]
+            if datetime.datetime.now().hour == 10 and datetime.datetime.now().minute <= 10:
+                video_info_dict = cls.get_video_info(log_type=log_type,
+                                                     crawler=crawler,
+                                                     p_id=profile_id,
+                                                     p_mid=profile_mid,
+                                                     v_title=video_title,
+                                                     v_id=video_id)
+                ten_play_cnt = video_info_dict['play_cnt']
+                Common.logger(log_type, crawler).info(f"ten_play_cnt:{ten_play_cnt}")
+                update_sql = f""" update crawler_xiaoniangao_hour set ten_play_cnt={ten_play_cnt} WHERE out_video_id="{video_id}"; """
+                # Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
+                MysqlHelper.update_values(log_type, crawler, update_sql, env)
+                cls.download_publish(log_type, crawler, video_info_dict, update_video_info, strategy, oss_endpoint,
+                                     env)
+            elif datetime.datetime.now().hour == 15 and datetime.datetime.now().minute <= 10:
+                video_info_dict = cls.get_video_info(log_type=log_type,
+                                                     crawler=crawler,
+                                                     p_id=profile_id,
+                                                     p_mid=profile_mid,
+                                                     v_title=video_title,
+                                                     v_id=video_id)
+                fifteen_play_cnt = video_info_dict['play_cnt']
+                Common.logger(log_type, crawler).info(f"fifteen_play_cnt:{fifteen_play_cnt}")
+                update_sql = f""" update crawler_xiaoniangao_hour set fifteen_play_cnt={fifteen_play_cnt} WHERE out_video_id="{video_id}"; """
+                # Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
+                MysqlHelper.update_values(log_type, crawler, update_sql, env)
+                cls.download_publish(log_type, crawler, video_info_dict, update_video_info, strategy, oss_endpoint,
+                                     env)
+            elif datetime.datetime.now().hour == 20 and datetime.datetime.now().minute <= 10:
+                video_info_dict = cls.get_video_info(log_type=log_type,
+                                                     crawler=crawler,
+                                                     p_id=profile_id,
+                                                     p_mid=profile_mid,
+                                                     v_title=video_title,
+                                                     v_id=video_id)
+                twenty_play_cnt = video_info_dict['play_cnt']
+                Common.logger(log_type, crawler).info(f"twenty_play_cnt:{twenty_play_cnt}")
+                update_sql = f""" update crawler_xiaoniangao_hour set twenty_play_cnt={twenty_play_cnt} WHERE out_video_id="{video_id}"; """
+                # Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
+                MysqlHelper.update_values(log_type, crawler, update_sql, env)
+                cls.download_publish(log_type, crawler, video_info_dict, update_video_info, strategy, oss_endpoint,
+                                     env)
+            else:
+                pass
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).error(f"update_videoList:{e}\n")
 
     @classmethod
     def download(cls, log_type, crawler, video_info_dict, strategy, oss_endpoint, env):

+ 147 - 147
xiaoniangao/xiaoniangao_play/xiaoniangao_play.py

@@ -149,165 +149,165 @@ class XiaoniangaoPlay:
             "wx_ver": "8.0.20",
             "code_ver": "3.62.0"
         }
-        try:
-            urllib3.disable_warnings()
-            r = requests.post(url=url, headers=headers, json=data, proxies=proxies, verify=False)
-            if "data" not in r.text or r.status_code != 200:
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}")
-                return
-            elif "data" not in r.json():
-                Common.logger(log_type, crawler).info(f"get_videoList:{r.json()}")
-                return
-            elif "list" not in r.json()["data"]:
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']}")
-                return
-            elif len(r.json()["data"]["list"]) == 0:
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']['list']}")
-                return
-            else:
-                # 视频列表数据
-                feeds = r.json()["data"]["list"]
-                for i in range(len(feeds)):
-                    # 标题,表情随机加在片头、片尾,或替代句子中间的标点符号
-                    if "title" in feeds[i]:
-                        befor_video_title = feeds[i]["title"].strip().replace("\n", "") \
-                            .replace("/", "").replace("\r", "").replace("#", "") \
-                            .replace(".", "。").replace("\\", "").replace("&NBSP", "") \
-                            .replace(":", "").replace("*", "").replace("?", "") \
-                            .replace("?", "").replace('"', "").replace("<", "") \
-                            .replace(">", "").replace("|", "").replace(" ", "").replace("#表情", "").replace("#符号", "").replace('"' ,'').replace("'", '')
-
-                        expression = cls.get_expression()
-                        expression_list = expression[0]
-                        char_list = expression[1]
-                        # 随机取一个表情
-                        expression = random.choice(expression_list)
-                        # 生成标题list[表情+title, title+表情]
-                        expression_title_list = [expression + befor_video_title, befor_video_title + expression]
-                        # 从标题list中随机取一个标题
-                        title_list1 = random.choice(expression_title_list)
-                        # 生成标题:原标题+符号
-                        title_list2 = befor_video_title + random.choice(char_list)
-                        # 表情和标题组合,与标题和符号组合,汇总成待使用的标题列表
-                        title_list4 = [title_list2, title_list1]
-                        # 最终标题
-                        video_title = random.choice(title_list4)
-                    else:
-                        video_title = 0
+        # try:
+        urllib3.disable_warnings()
+        r = requests.post(url=url, headers=headers, json=data, proxies=proxies, verify=False)
+        if "data" not in r.text or r.status_code != 200:
+            Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}")
+            return
+        elif "data" not in r.json():
+            Common.logger(log_type, crawler).info(f"get_videoList:{r.json()}")
+            return
+        elif "list" not in r.json()["data"]:
+            Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']}")
+            return
+        elif len(r.json()["data"]["list"]) == 0:
+            Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']['list']}")
+            return
+        else:
+            # 视频列表数据
+            feeds = r.json()["data"]["list"]
+            for i in range(len(feeds)):
+                # 标题,表情随机加在片头、片尾,或替代句子中间的标点符号
+                if "title" in feeds[i]:
+                    befor_video_title = feeds[i]["title"].strip().replace("\n", "") \
+                        .replace("/", "").replace("\r", "").replace("#", "") \
+                        .replace(".", "。").replace("\\", "").replace("&NBSP", "") \
+                        .replace(":", "").replace("*", "").replace("?", "") \
+                        .replace("?", "").replace('"', "").replace("<", "") \
+                        .replace(">", "").replace("|", "").replace(" ", "").replace("#表情", "").replace("#符号", "").replace('"' ,'').replace("'", '')
+
+                    expression = cls.get_expression()
+                    expression_list = expression[0]
+                    char_list = expression[1]
+                    # 随机取一个表情
+                    expression = random.choice(expression_list)
+                    # 生成标题list[表情+title, title+表情]
+                    expression_title_list = [expression + befor_video_title, befor_video_title + expression]
+                    # 从标题list中随机取一个标题
+                    title_list1 = random.choice(expression_title_list)
+                    # 生成标题:原标题+符号
+                    title_list2 = befor_video_title + random.choice(char_list)
+                    # 表情和标题组合,与标题和符号组合,汇总成待使用的标题列表
+                    title_list4 = [title_list2, title_list1]
+                    # 最终标题
+                    video_title = random.choice(title_list4)
+                else:
+                    video_title = 0
 
-                    # 视频 ID
-                    if "vid" in feeds[i]:
-                        video_id = feeds[i]["vid"]
-                    else:
-                        video_id = 0
+                # 视频 ID
+                if "vid" in feeds[i]:
+                    video_id = feeds[i]["vid"]
+                else:
+                    video_id = 0
 
-                    # 播放量
-                    if "play_pv" in feeds[i]:
-                        video_play_cnt = feeds[i]["play_pv"]
-                    else:
-                        video_play_cnt = 0
+                # 播放量
+                if "play_pv" in feeds[i]:
+                    video_play_cnt = feeds[i]["play_pv"]
+                else:
+                    video_play_cnt = 0
 
-                    # 评论量
-                    if "comment_count" in feeds[i]:
-                        video_comment_cnt = feeds[i]["comment_count"]
-                    else:
-                        video_comment_cnt = 0
+                # 评论量
+                if "comment_count" in feeds[i]:
+                    video_comment_cnt = feeds[i]["comment_count"]
+                else:
+                    video_comment_cnt = 0
 
-                    # 点赞量
-                    if "favor" in feeds[i]:
-                        video_like_cnt = feeds[i]["favor"]["total"]
-                    else:
-                        video_like_cnt = 0
+                # 点赞量
+                if "favor" in feeds[i]:
+                    video_like_cnt = feeds[i]["favor"]["total"]
+                else:
+                    video_like_cnt = 0
 
-                    # 分享量
-                    if "share" in feeds[i]:
-                        video_share_cnt = feeds[i]["share"]
-                    else:
-                        video_share_cnt = 0
+                # 分享量
+                if "share" in feeds[i]:
+                    video_share_cnt = feeds[i]["share"]
+                else:
+                    video_share_cnt = 0
 
-                    # 时长
-                    if "du" in feeds[i]:
-                        video_duration = int(feeds[i]["du"] / 1000)
-                    else:
-                        video_duration = 0
+                # 时长
+                if "du" in feeds[i]:
+                    video_duration = int(feeds[i]["du"] / 1000)
+                else:
+                    video_duration = 0
 
-                    # 宽和高
-                    if "w" or "h" in feeds[i]:
-                        video_width = feeds[i]["w"]
-                        video_height = feeds[i]["h"]
-                    else:
-                        video_width = 0
-                        video_height = 0
+                # 宽和高
+                if "w" or "h" in feeds[i]:
+                    video_width = feeds[i]["w"]
+                    video_height = feeds[i]["h"]
+                else:
+                    video_width = 0
+                    video_height = 0
 
-                    # 发布时间
-                    if "t" in feeds[i]:
-                        video_send_time = feeds[i]["t"]
-                    else:
-                        video_send_time = 0
-                    publish_time_stamp = int(int(video_send_time)/1000)
-                    publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
-
-                    # 用户名 / 头像
-                    if "user" in feeds[i]:
-                        user_name = feeds[i]["user"]["nick"].strip().replace("\n", "") \
-                            .replace("/", "").replace("快手", "").replace(" ", "") \
-                            .replace(" ", "").replace("&NBSP", "").replace("\r", "")
-                        head_url = feeds[i]["user"]["hurl"]
-                    else:
-                        user_name = 0
-                        head_url = 0
+                # 发布时间
+                if "t" in feeds[i]:
+                    video_send_time = feeds[i]["t"]
+                else:
+                    video_send_time = 0
+                publish_time_stamp = int(int(video_send_time)/1000)
+                publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
+
+                # 用户名 / 头像
+                if "user" in feeds[i]:
+                    user_name = feeds[i]["user"]["nick"].strip().replace("\n", "") \
+                        .replace("/", "").replace("快手", "").replace(" ", "") \
+                        .replace(" ", "").replace("&NBSP", "").replace("\r", "")
+                    head_url = feeds[i]["user"]["hurl"]
+                else:
+                    user_name = 0
+                    head_url = 0
 
-                    # 用户 ID
-                    profile_id = feeds[i]["id"]
+                # 用户 ID
+                profile_id = feeds[i]["id"]
 
-                    # 用户 mid
-                    profile_mid = feeds[i]["user"]["mid"]
+                # 用户 mid
+                profile_mid = feeds[i]["user"]["mid"]
 
-                    # 视频封面
-                    if "url" in feeds[i]:
-                        cover_url = feeds[i]["url"]
-                    else:
-                        cover_url = 0
+                # 视频封面
+                if "url" in feeds[i]:
+                    cover_url = feeds[i]["url"]
+                else:
+                    cover_url = 0
 
-                    # 视频播放地址
-                    if "v_url" in feeds[i]:
-                        video_url = feeds[i]["v_url"]
-                    else:
-                        video_url = 0
-
-                    video_dict = {
-                        "video_title": video_title,
-                        "video_id": video_id,
-                        "duration": video_duration,
-                        "play_cnt": video_play_cnt,
-                        "like_cnt": video_like_cnt,
-                        "comment_cnt": video_comment_cnt,
-                        "share_cnt": video_share_cnt,
-                        "user_name": user_name,
-                        "publish_time_stamp": publish_time_stamp,
-                        "publish_time_str": publish_time_str,
-                        "video_width": video_width,
-                        "video_height": video_height,
-                        "avatar_url": head_url,
-                        "profile_id": profile_id,
-                        "profile_mid": profile_mid,
-                        "cover_url": cover_url,
-                        "video_url": video_url,
-                        "session": f"xiaoniangao-play-{int(time.time())}"
-
-                    }
-                    for k, v in video_dict.items():
-                        Common.logger(log_type, crawler).info(f"{k}:{v}")
-
-                    cls.download_publish(log_type=log_type,
-                                         crawler=crawler,
-                                         video_dict=video_dict,
-                                         strategy=strategy,
-                                         oss_endpoint=oss_endpoint,
-                                         env=env)
-
-        except Exception as e:
-            Common.logger(log_type, crawler).error("get_play_feeds异常:{}", e)
+                # 视频播放地址
+                if "v_url" in feeds[i]:
+                    video_url = feeds[i]["v_url"]
+                else:
+                    video_url = 0
+
+                video_dict = {
+                    "video_title": video_title,
+                    "video_id": video_id,
+                    "duration": video_duration,
+                    "play_cnt": video_play_cnt,
+                    "like_cnt": video_like_cnt,
+                    "comment_cnt": video_comment_cnt,
+                    "share_cnt": video_share_cnt,
+                    "user_name": user_name,
+                    "publish_time_stamp": publish_time_stamp,
+                    "publish_time_str": publish_time_str,
+                    "video_width": video_width,
+                    "video_height": video_height,
+                    "avatar_url": head_url,
+                    "profile_id": profile_id,
+                    "profile_mid": profile_mid,
+                    "cover_url": cover_url,
+                    "video_url": video_url,
+                    "session": f"xiaoniangao-play-{int(time.time())}"
+
+                }
+                for k, v in video_dict.items():
+                    Common.logger(log_type, crawler).info(f"{k}:{v}")
+
+                cls.download_publish(log_type=log_type,
+                                     crawler=crawler,
+                                     video_dict=video_dict,
+                                     strategy=strategy,
+                                     oss_endpoint=oss_endpoint,
+                                     env=env)
+
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).error("get_play_feeds异常:{}", e)
 
     @classmethod
     def repeat_video(cls, log_type, crawler, video_id, env):

BIN
zhiqingtiantiankan/.DS_Store


+ 3 - 0
zhiqingtiantiankan/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/4/18

BIN
zhiqingtiantiankan/logs/.DS_Store


+ 3 - 0
zhiqingtiantiankan/logs/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/4/18

+ 3 - 0
zhiqingtiantiankan/zhiqingtiantiankan_main/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/4/18

+ 25 - 0
zhiqingtiantiankan/zhiqingtiantiankan_main/run_zhiqingtiantiankan_recommend.py

@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/4/18
+import argparse
+import os
+import sys
+sys.path.append(os.getcwd())
+from common.common import Common
+from zhiqingtiantiankan.zhiqingtiantiankan_recommend.zhiqingtiantiankan_recommend import ZhiqingtiantiankanRecommend
+
+
+def main(log_type, crawler, env):
+    Common.logger(log_type, crawler).info('开始抓取 知青天天看小程序\n')
+    ZhiqingtiantiankanRecommend.start_wechat(log_type, crawler, env)
+    Common.del_logs(log_type, crawler)
+    Common.logger(log_type, crawler).info('抓取完一轮\n')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
+    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
+    parser.add_argument('--crawler')  ## 添加参数
+    parser.add_argument('--env')  ## 添加参数
+    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
+    main(log_type=args.log_type, crawler=args.crawler, env=args.env)

BIN
zhiqingtiantiankan/zhiqingtiantiankan_recommend/.DS_Store


+ 3 - 0
zhiqingtiantiankan/zhiqingtiantiankan_recommend/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/4/18

+ 158 - 0
zhiqingtiantiankan/zhiqingtiantiankan_recommend/zhiqing_insert.py

@@ -0,0 +1,158 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/4/18
+import json
+import os
+import sys
+import time
+from datetime import date, timedelta
+from hashlib import md5
+
+sys.path.append(os.getcwd())
+from common.common import Common
+from common.feishu import Feishu
+from common.scheduling_db import MysqlHelper
+
+
+class Insert:
+    @classmethod
+    def get_config(cls, log_type, crawler, text, env):
+        select_sql = f"""select * from crawler_config where source="benshanzhufu" """
+        contents = MysqlHelper.get_values(log_type, crawler, select_sql, env, action='')
+        title_list = []
+        filter_list = []
+        for content in contents:
+            config = content['config']
+            config_dict = eval(config)
+            for k, v in config_dict.items():
+                if k == "title":
+                    title_list_config = v.split(",")
+                    for title in title_list_config:
+                        title_list.append(title)
+                if k == "filter":
+                    filter_list_config = v.split(",")
+                    for filter_word in filter_list_config:
+                        filter_list.append(filter_word)
+        if text == "title":
+            return title_list
+        elif text == "filter":
+            return filter_list
+
+    @classmethod
+    def before_day(cls):
+        publish_time_str_rule = (date.today() + timedelta(days=-30)).strftime("%Y-%m-%d %H:%M:%S")
+        publish_time_stamp_rule = int(time.mktime(time.strptime(publish_time_str_rule, "%Y-%m-%d %H:%M:%S")))
+        print(publish_time_str_rule)
+        print(publish_time_stamp_rule)
+
+    @classmethod
+    def insert_config(cls, log_type, crawler, env):
+        filter_sheet = Feishu.get_values_batch(log_type, crawler, "DjXfqG")
+        # title_sheet = Feishu.get_values_batch(log_type, crawler, "bHSW1p")
+        filter_list = []
+        # title_list = []
+        for x in filter_sheet:
+            for y in x:
+                if y is None:
+                    pass
+                else:
+                    filter_list.append(y)
+        # for x in title_sheet:
+        #     for y in x:
+        #         if y is None:
+        #             pass
+        #         else:
+        #             title_list.append(y)
+        # str_title = ','.join(title_list)
+        str_filter = ','.join(filter_list)
+        config_dict = {
+            # "title": str_title,
+            "filter": str_filter
+        }
+        str_config_dict = str(config_dict)
+        # print(f"config_dict:{config_dict}")
+        # print(f"str_config_dict:{str_config_dict}")
+        insert_sql = f""" insert into crawler_config(title, source, config) values("本山祝福小程序", "benshanzhufu", "{str_config_dict}") """
+        MysqlHelper.update_values(log_type, crawler, insert_sql, env)
+
+    @classmethod
+    def insert_video_from_feishu_to_mysql(cls, log_type, crawler, env):
+        zhiqing_sheetid = ['1a88b3']
+        for sheetid in zhiqing_sheetid:
+            xiaoniangao_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
+            for i in range(1, len(xiaoniangao_sheet)):
+            # for i in range(1, 5):
+                if xiaoniangao_sheet[i][5] is None or xiaoniangao_sheet[i][7] is None:
+                    continue
+                video_id = xiaoniangao_sheet[i][12].replace("https://admin.piaoquantv.com/cms/post-detail/", "").replace(
+                    "/info", "")
+                if video_id == "None":
+                    continue
+                video_id = int(video_id)
+                out_user_id = "zhiqingtiantiankan"
+                platform = "知青天天看"
+                strategy = "推荐榜爬虫策略"
+                video_title = str(xiaoniangao_sheet[i][7])
+                play_cnt = int(float(xiaoniangao_sheet[i][9].replace("阅读数", "").strip().split("万")[0])*10000)
+                duration = str(xiaoniangao_sheet[i][10])
+                width = int(xiaoniangao_sheet[i][11].split("*")[0])
+                height = int(xiaoniangao_sheet[i][11].split("*")[1])
+                cover_url = str(xiaoniangao_sheet[i][13])
+                video_url = str(xiaoniangao_sheet[i][14])
+                crawler_rule = json.dumps({})
+                out_video_id = md5(video_title.encode('utf8')).hexdigest()
+
+                # print(f"video_id:{video_id}, type:{type(video_id)}")
+                # print(f"out_user_id:{out_user_id}, type:{type(out_user_id)}")
+                # print(f"platform:{platform}, type:{type(platform)}")
+                # print(f"strategy:{strategy}, type:{type(strategy)}")
+                # print(f"video_title:{video_title}, type:{type(video_title)}")
+                # print(f"cover_url:{cover_url}, type:{type(cover_url)}")
+                # print(f"video_url:{video_url}, type:{type(video_url)}")
+                # print(f"crawler_rule:{crawler_rule}, type:{type(crawler_rule)}")
+
+                select_sql = f""" select * from crawler_video where platform="{platform}" and video_url="{video_url}" """
+                Common.logger(log_type, crawler).info(f"select_sql:{select_sql}")
+                repeat_video = MysqlHelper.get_values(log_type, crawler, select_sql, env)
+                Common.logger(log_type, crawler).info(f"repeat_video:{repeat_video}")
+
+                if repeat_video is not None and len(repeat_video) != 0:
+                    Common.logger(log_type, crawler).info(f"{video_title} 已存在数据库中\n")
+                else:
+                    # 视频信息保存数据库
+                    insert_sql = f""" insert into crawler_video(video_id,
+                                        out_user_id,
+                                        platform,
+                                        strategy,
+                                        out_video_id,
+                                        video_title,
+                                        cover_url,
+                                        video_url,
+                                        duration,
+                                        play_cnt,
+                                        crawler_rule,
+                                        width,
+                                        height)
+                                        values({video_id},
+                                        "{out_user_id}",
+                                        "{platform}",
+                                        "{strategy}",
+                                        "{out_video_id}",
+                                        "{video_title}",
+                                        "{cover_url}",
+                                        "{video_url}",
+                                        {duration},
+                                        {play_cnt},
+                                        '{crawler_rule}',
+                                        {width},
+                                        {height}) """
+                    Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+                    MysqlHelper.update_values(log_type, crawler, insert_sql, env, action='')
+                    Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
+
+
+
+if __name__ == "__main__":
+    # Insert.insert_video_from_feishu_to_mysql("insert-dev", "zhiqingtiantiankan", "dev")
+    Insert.insert_video_from_feishu_to_mysql("insert-prod", "zhiqingtiantiankan", "prod")
+    pass

+ 410 - 0
zhiqingtiantiankan/zhiqingtiantiankan_recommend/zhiqingtiantiankan_recommend.py

@@ -0,0 +1,410 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/4/18
+import json
+import os
+import shutil
+import sys
+import time
+from hashlib import md5
+from appium import webdriver
+from appium.webdriver.common.touch_action import TouchAction
+from appium.webdriver.extensions.android.nativekey import AndroidKey
+from appium.webdriver.webdriver import WebDriver
+from selenium.common.exceptions import NoSuchElementException
+from selenium.webdriver.common.by import By
+sys.path.append(os.getcwd())
+from common.common import Common
+from common.feishu import Feishu
+from common.publish import Publish
+from common.scheduling_db import MysqlHelper
+
+
+class ZhiqingtiantiankanRecommend:
+    platform = "知青天天看"
+    i = 0
+
+    @classmethod
+    def zhiqingtiantiankan_config(cls, log_type, crawler, text, env):
+        select_sql = f"""select * from crawler_config where source="zhiqingtiantiankan" """
+        contents = MysqlHelper.get_values(log_type, crawler, select_sql, env, action='')
+        title_list = []
+        filter_list = []
+        for content in contents:
+            config = content['config']
+            config_dict = eval(config)
+            for k, v in config_dict.items():
+                if k == "title":
+                    title_list_config = v.split(",")
+                    for title in title_list_config:
+                        title_list.append(title)
+                if k == "filter":
+                    filter_list_config = v.split(",")
+                    for filter_word in filter_list_config:
+                        filter_list.append(filter_word)
+        if text == "title":
+            return title_list
+        elif text == "filter":
+            return filter_list
+
+    @classmethod
+    def start_wechat(cls, log_type, crawler, env):
+        # try:
+        Common.logger(log_type, crawler).info('启动微信')
+        if env == "dev":
+            chromedriverExecutable = '/Users/wangkun/Downloads/chromedriver/chromedriver_v107/chromedriver'
+        else:
+            chromedriverExecutable = '/Users/piaoquan/Downloads/chromedriver'
+        caps = {
+            "platformName": "Android",  # 手机操作系统 Android / iOS
+            "deviceName": "Android",  # 连接的设备名(模拟器或真机),安卓可以随便写
+            "platforVersion": "11",  # 手机对应的系统版本(Android 11)
+            "appPackage": "com.tencent.mm",  # 被测APP的包名,乐活圈 Android
+            "appActivity": ".ui.LauncherUI",  # 启动的Activity名
+            "autoGrantPermissions": "true",  # 让 appium 自动授权 base 权限,
+            # 如果 noReset 为 True,则该条不生效(该参数为 Android 独有),对应的值为 True 或 False
+            "unicodekeyboard": True,  # 使用自带输入法,输入中文时填True
+            "resetkeyboard": True,  # 执行完程序恢复原来输入法
+            "noReset": True,  # 不重置APP
+            "printPageSourceOnFailure": True,  # 找不到元素时,appium log 会完整记录当前页面的 pagesource
+            "newCommandTimeout": 6000,  # 初始等待时间
+            "automationName": "UiAutomator2",  # 使用引擎,默认为 Appium,
+            # 其中 Appium、UiAutomator2、Selendroid、Espresso 用于 Android,XCUITest 用于 iOS
+            "showChromedriverLog": True,
+            'enableWebviewDetailsCollection': True,
+            'setWebContentsDebuggingEnabled': True,
+            'recreateChromeDriverSessions': True,
+            'chromedriverExecutable': chromedriverExecutable,
+            "chromeOptions": {"androidProcess": "com.tencent.mm:appbrand0"},
+            # "chromeOptions": {"androidProcess": "com.tencent.mm:tools"},
+            'browserName': ''
+        }
+        driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
+        driver.implicitly_wait(20)
+        # 向下滑动页面,展示出小程序选择面板
+        for i in range(120):
+            try:
+                # 发现微信消息 TAB,代表微信已启动成功
+                if driver.find_elements(By.ID, 'com.tencent.mm:id/f2s'):
+                    break
+                # 发现并关闭系统菜单栏
+                elif driver.find_element(By.ID, 'com.android.systemui:id/dismiss_view'):
+                    Common.logger(log_type, crawler).info('发现并关闭系统下拉菜单栏')
+                    driver.find_element(By.ID, 'com.android.systemui:id/dismiss_view').click()
+                else:
+                    pass
+            except NoSuchElementException:
+                time.sleep(1)
+        Common.logger(log_type, crawler).info('下滑,展示小程序选择面板')
+        size = driver.get_window_size()
+        driver.swipe(int(size['width'] * 0.5), int(size['height'] * 0.2), int(size['width'] * 0.5),
+                     int(size['height'] * 0.8), 200)
+        # 打开小程序"知青天天看"
+        time.sleep(5)
+        Common.logger(log_type, crawler).info('打开小程序"知青天天看"')
+        driver.find_elements(By.XPATH, '//*[@text="知青天天看"]')[-1].click()
+
+        # 获取视频信息
+        time.sleep(5)
+        cls.get_videoList(log_type, crawler, driver, env)
+
+        # 退出微信
+        cls.quit(log_type, crawler, driver)
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).error('start_wechat异常:{}\n', e)
+
+    # 退出 APP
+    @classmethod
+    def quit(cls, log_type, crawler, driver: WebDriver):
+        driver.quit()
+        Common.logger(log_type, crawler).info('退出微信APP成功\n')
+
+    # 切换 Handle
+    @classmethod
+    def search_elements(cls, driver: WebDriver, xpath):
+        windowHandles = driver.window_handles
+        for handle in windowHandles:
+            driver.switch_to.window(handle)
+            time.sleep(1)
+            try:
+                elements = driver.find_elements(By.XPATH, xpath)
+                if elements:
+                    return elements
+            except NoSuchElementException:
+                pass
+
+    @classmethod
+    def repeat_out_video_id(cls, log_type, crawler, out_video_id, env):
+        sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{out_video_id}"; """
+        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
+        return len(repeat_video)
+
+    @classmethod
+    def repeat_video_url(cls, log_type, crawler, video_url, env):
+        sql = f""" select * from crawler_video where platform="{cls.platform}" and video_url="{video_url}"; """
+        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
+        return len(repeat_video)
+
+    @classmethod
+    def check_to_applet(cls, log_type, crawler, driver: WebDriver):
+        while True:
+            webview = driver.contexts
+            driver.switch_to.context(webview[1])
+            windowHandles = driver.window_handles
+            for handle in windowHandles:
+                driver.switch_to.window(handle)
+                time.sleep(1)
+                try:
+                    applet = driver.find_element(By.XPATH, '//wx-view[@class="u-title navbar--u-title u-line-1 navbar--u-line-1 data-v-febd4d40 navbar--data-v-febd4d40"]')
+                    if applet:
+                        Common.logger(log_type, crawler).info('切换到小程序成功\n')
+                        return
+                except NoSuchElementException:
+                    time.sleep(1)
+            Common.logger(log_type, crawler).info("切换到小程序失败\n")
+            break
+
+    @classmethod
+    def find_ad(cls, log_type, crawler, driver: WebDriver):
+        windowHandles = driver.window_handles
+        # Common.logger(log_type, crawler).info('windowHandles:{}', windowHandles)
+        # 遍历所有的handles,找到当前页面所在的handle:如果pageSource有包含你想要的元素,就是所要找的handle
+        # 小程序的页面来回切换也需要:遍历所有的handles,切换到元素所在的handle
+        for handle in windowHandles:
+            driver.switch_to.window(handle)
+            time.sleep(1)
+            try:
+                Common.logger(log_type, crawler).info("寻找广告~~~~~~")
+                ad_element = driver.find_element(By.XPATH, '//div[@class="ad-text"]')
+                if ad_element:
+                    Common.logger(log_type, crawler).info("发现广告")
+                    for i in range(20):
+                        if driver.find_element(By.XPATH, '//div[@id="count_down_container"]/*[1]').text == "已完成浏览":
+                            Common.logger(log_type, crawler).info("广告播放完毕,点击返回")
+                            driver.press_keycode(AndroidKey.BACK)
+                            return
+                        else:
+                            Common.logger(log_type, crawler).info("广告未播放完毕,等待 1 秒")
+                            time.sleep(1)
+                else:
+                    Common.logger(log_type, crawler).info("未发现广告, 退出")
+                    return
+            except NoSuchElementException:
+                time.sleep(1)
+
+    @classmethod
+    def get_video_url(cls, log_type, crawler, driver: WebDriver, video_element):
+        video_element.click()
+        time.sleep(5)
+        cls.close_native_ad(log_type, crawler, driver)
+        windowHandles = driver.window_handles
+        for handle in windowHandles:
+            driver.switch_to.window(handle)
+            time.sleep(1)
+            try:
+                video_url_element = driver.find_element(By.XPATH, '//*[@class="wx-swiper-slide-frame"]/*[2]//*[@class="video_item videoswiper--video_item"]')
+                video_url = video_url_element.get_attribute("src")
+                # cls.find_ad(log_type, crawler, driver)
+                return video_url
+            except NoSuchElementException:
+                time.sleep(1)
+
+    @classmethod
+    def close_native_ad(cls, log_type, crawler, driver: WebDriver):
+        Common.logger(log_type, crawler).info('关闭广告')
+        size = driver.get_window_size()
+        time.sleep(3)
+        TouchAction(driver).tap(x=int(size['width'] * 0.4), y=int(size['height'] * 0.1)).perform()
+
+
+    @classmethod
+    def get_videoList(cls, log_type, crawler, driver: WebDriver, env):
+        # 关闭广告
+        cls.close_native_ad(log_type, crawler, driver)
+        # 切换到小程序
+        cls.check_to_applet(log_type, crawler, driver)
+
+        time.sleep(5)
+        index = 0
+        while True:
+            try:
+                if cls.search_elements(driver, '//wx-view[@class="listbox"]') is None:
+                    Common.logger(log_type, crawler).info('窗口已销毁\n')
+                    return
+
+                Common.logger(log_type, crawler).info('获取视频列表\n')
+                video_elements = cls.search_elements(driver, '//wx-view[@class="videolist-box videolist--videolist-box"]')
+                if video_elements is None:
+                    Common.logger(log_type, crawler).warning(f'video_elements:{video_elements}')
+                    return
+
+                video_element_temp = video_elements[index:]
+                if len(video_element_temp) == 0:
+                    Common.logger(log_type, crawler).info('到底啦~~~~~~~~~~~~~\n')
+                    return
+
+                for i, video_element in enumerate(video_element_temp):
+                    if video_element is None:
+                        Common.logger(log_type, crawler).info('到底啦~\n')
+                        return
+                    cls.i += 1
+                    cls.search_elements(driver, '//wx-view[@class="videolist-box videolist--videolist-box"]')
+
+                    Common.logger(log_type, crawler).info(f'拖动"视频"列表第{cls.i}个至屏幕中间')
+                    time.sleep(3)
+                    driver.execute_script("arguments[0].scrollIntoView({block:'center',inline:'center'})", video_element)
+
+                    # video_title = video_element.find_elements(By.XPATH, '//wx-view[@class="video_title videolist--video_title"]')[cls.i-1].text
+                    # cover_url = video_element.find_elements(By.XPATH, '//wx-image[@class="itemimage videolist--itemimage"]')[cls.i-1].get_attribute('src')
+                    # play_cnt = video_element.find_elements(By.XPATH, '//wx-view[@class="clickbox videolist--clickbox"]')[cls.i-1].text
+
+                    video_title = video_element.find_elements(By.XPATH, '//wx-view[@class="video_title videolist--video_title"]')[index+i].text
+                    cover_url = video_element.find_elements(By.XPATH, '//wx-image[@class="itemimage videolist--itemimage"]')[index+i].get_attribute('src')
+                    play_cnt = video_element.find_elements(By.XPATH, '//wx-view[@class="clickbox videolist--clickbox"]')[index+i].text
+
+                    play_cnt = int(float(play_cnt.replace("阅读数", "").strip().split("万")[0]) * 10000)
+                    out_video_id = md5(video_title.encode('utf8')).hexdigest()
+                    video_dict = {
+                        'video_title': video_title,
+                        'video_id': out_video_id,
+                        'play_cnt': play_cnt,
+                        'comment_cnt': 0,
+                        'like_cnt': 0,
+                        'share_cnt': 0,
+                        'publish_time_stamp': int(time.time()),
+                        'publish_time_str': time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
+                        'user_name': "zhiqingtiantiankan",
+                        'user_id': "zhiqingtiantiankan",
+                        'avatar_url': cover_url,
+                        'cover_url': cover_url,
+                        'session': f"zhiqingtiantiankan-{int(time.time())}"
+                    }
+                    for k, v in video_dict.items():
+                        Common.logger(log_type, crawler).info(f"{k}:{v}")
+
+                    if video_title is None or cover_url is None:
+                        Common.logger(log_type, crawler).info("无效视频\n")
+                    elif any(str(word) if str(word) in video_title else False for word in
+                             cls.zhiqingtiantiankan_config(log_type, crawler, "filter", env)) is True:
+                        Common.logger(log_type, crawler).info('已中过滤词\n')
+                    elif cls.repeat_out_video_id(log_type, crawler, out_video_id, env) != 0:
+                        Common.logger(log_type, crawler).info('视频已下载\n')
+                    else:
+                        video_url = cls.get_video_url(log_type, crawler, driver, video_element)
+
+                        if video_url is None:
+                            Common.logger(log_type, crawler).info("未获取到视频播放地址\n")
+                            driver.press_keycode(AndroidKey.BACK)
+                        elif cls.repeat_video_url(log_type, crawler, video_url, env) != 0:
+                            Common.logger(log_type, crawler).info('视频已下载\n')
+                            driver.press_keycode(AndroidKey.BACK)
+                        else:
+                            video_dict["video_url"] = video_url
+                            Common.logger(log_type, crawler).info(f"video_url:{video_url}\n")
+                            # driver.press_keycode(AndroidKey.BACK)
+                            cls.download_publish(log_type, crawler, video_dict, env, driver)
+                Common.logger(log_type, crawler).info('已抓取完一组视频,休眠10秒\n')
+                time.sleep(10)
+                index = index + len(video_element_temp)
+            except Exception as e:
+                Common.logger(log_type, crawler).info(f"get_videoList:{e}\n")
+                cls.i = 0
+
+    @classmethod
+    def download_publish(cls, log_type, crawler, video_dict, env, driver: WebDriver):
+        # 下载视频
+        Common.download_method(log_type=log_type, crawler=crawler, text='video', title=video_dict['video_title'],
+                               url=video_dict['video_url'])
+        ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
+        if ffmpeg_dict is None:
+            md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
+            shutil.rmtree(f"./{crawler}/videos/{md_title}/")
+            Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
+            return
+        video_dict["duration"] = ffmpeg_dict["duration"]
+        video_dict["video_width"] = ffmpeg_dict["width"]
+        video_dict["video_height"] = ffmpeg_dict["height"]
+        # 下载封面
+        Common.download_method(log_type=log_type, crawler=crawler, text='cover', title=video_dict['video_title'],
+                               url=video_dict['cover_url'])
+        # 保存视频信息至txt
+        Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
+
+        # 上传视频
+        Common.logger(log_type, crawler).info("开始上传视频...")
+        our_video_id = Publish.upload_and_publish(log_type=log_type,
+                                                  crawler=crawler,
+                                                  strategy="推荐榜爬虫策略",
+                                                  our_uid="recommend",
+                                                  env=env,
+                                                  oss_endpoint="out")
+        if env == 'dev':
+            our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
+        else:
+            our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
+        Common.logger(log_type, crawler).info("视频上传完成")
+
+        if our_video_id is None:
+            # 删除视频文件夹
+            shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
+            return
+
+        # 视频信息保存至飞书
+        Feishu.insert_columns(log_type, crawler, "1a88b3", "ROWS", 1, 2)
+        # 视频ID工作表,首行写入数据
+        upload_time = int(time.time())
+        values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
+                   "推荐榜爬虫策略",
+                   video_dict["video_title"],
+                   video_dict["video_id"],
+                   video_dict["play_cnt"],
+                   video_dict["duration"],
+                   f'{video_dict["video_width"]}*{video_dict["video_height"]}',
+                   our_video_link,
+                   video_dict["cover_url"],
+                   video_dict["video_url"]]]
+        time.sleep(1)
+        Feishu.update_values(log_type, crawler, "1a88b3", "F2:V2", values)
+        Common.logger(log_type, crawler).info(f"视频已保存至飞书文档\n")
+
+        rule_dict = {}
+        # 视频信息保存数据库
+        insert_sql = f""" insert into crawler_video(video_id,
+                                                            out_user_id,
+                                                            platform,
+                                                            strategy,
+                                                            out_video_id,
+                                                            video_title,
+                                                            cover_url,
+                                                            video_url,
+                                                            duration,
+                                                            publish_time,
+                                                            play_cnt,
+                                                            crawler_rule,
+                                                            width,
+                                                            height)
+                                                            values({our_video_id},
+                                                            "{video_dict['user_id']}",
+                                                            "{cls.platform}",
+                                                            "推荐榜爬虫策略",
+                                                            "{video_dict['video_id']}",
+                                                            "{video_dict['video_title']}",
+                                                            "{video_dict['cover_url']}",
+                                                            "{video_dict['video_url']}",
+                                                            {int(video_dict['duration'])},
+                                                            "{video_dict['publish_time_str']}",
+                                                            {int(video_dict['play_cnt'])},
+                                                            '{json.dumps(rule_dict)}',
+                                                            {int(video_dict['video_width'])},
+                                                            {int(video_dict['video_height'])}) """
+        Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+        MysqlHelper.update_values(log_type, crawler, insert_sql, env, action='')
+        Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
+        driver.press_keycode(AndroidKey.BACK)
+
+
+if __name__ == '__main__':
+    # Recommend.start_wechat('recommend', 'prod')
+    print(ZhiqingtiantiankanRecommend.zhiqingtiantiankan_config('recommend', 'zhiqingtiantiankan', 'filter', 'dev'))
+
+    pass

BIN
zhongmiaoyinxin/.DS_Store


+ 3 - 0
zhongmiaoyinxin/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/4/17

BIN
zhongmiaoyinxin/logs/.DS_Store


+ 3 - 0
zhongmiaoyinxin/logs/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/4/18

+ 3 - 0
zhongmiaoyinxin/zhongmiaoyinxin_main/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/4/17

+ 25 - 0
zhongmiaoyinxin/zhongmiaoyinxin_main/run_zhongmiaoyinxin_recommend.py

@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/4/17
+import argparse
+import os
+import sys
+sys.path.append(os.getcwd())
+from common.common import Common
+from zhongmiaoyinxin.zhongmiaoyinxin_recommend.zhongmiaoyinxin_recommend import ZhongmiaoyinxinRecommend
+
+
+def main(log_type, crawler, env):
+    Common.logger(log_type, crawler).info('开始抓取 众妙音信小程序\n')
+    ZhongmiaoyinxinRecommend.start_wechat(log_type, crawler, env)
+    Common.del_logs(log_type, crawler)
+    Common.logger(log_type, crawler).info('抓取完一轮\n')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
+    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
+    parser.add_argument('--crawler')  ## 添加参数
+    parser.add_argument('--env')  ## 添加参数
+    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
+    main(log_type=args.log_type, crawler=args.crawler, env=args.env)

BIN
zhongmiaoyinxin/zhongmiaoyinxin_recommend/.DS_Store


+ 3 - 0
zhongmiaoyinxin/zhongmiaoyinxin_recommend/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/4/17

+ 158 - 0
zhongmiaoyinxin/zhongmiaoyinxin_recommend/insert.py

@@ -0,0 +1,158 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/4/17
+import json
+import os
+import sys
+import time
+from datetime import date, timedelta
+from hashlib import md5
+
+sys.path.append(os.getcwd())
+from common.common import Common
+from common.feishu import Feishu
+from common.scheduling_db import MysqlHelper
+
+
+class Insert:
+    @classmethod
+    def get_config(cls, log_type, crawler, text, env):
+        select_sql = f"""select * from crawler_config where source="benshanzhufu" """
+        contents = MysqlHelper.get_values(log_type, crawler, select_sql, env, action='')
+        title_list = []
+        filter_list = []
+        for content in contents:
+            config = content['config']
+            config_dict = eval(config)
+            for k, v in config_dict.items():
+                if k == "title":
+                    title_list_config = v.split(",")
+                    for title in title_list_config:
+                        title_list.append(title)
+                if k == "filter":
+                    filter_list_config = v.split(",")
+                    for filter_word in filter_list_config:
+                        filter_list.append(filter_word)
+        if text == "title":
+            return title_list
+        elif text == "filter":
+            return filter_list
+
+    @classmethod
+    def before_day(cls):
+        publish_time_str_rule = (date.today() + timedelta(days=-30)).strftime("%Y-%m-%d %H:%M:%S")
+        publish_time_stamp_rule = int(time.mktime(time.strptime(publish_time_str_rule, "%Y-%m-%d %H:%M:%S")))
+        print(publish_time_str_rule)
+        print(publish_time_stamp_rule)
+
+    @classmethod
+    def insert_config(cls, log_type, crawler, env):
+        filter_sheet = Feishu.get_values_batch(log_type, crawler, "DjXfqG")
+        # title_sheet = Feishu.get_values_batch(log_type, crawler, "bHSW1p")
+        filter_list = []
+        # title_list = []
+        for x in filter_sheet:
+            for y in x:
+                if y is None:
+                    pass
+                else:
+                    filter_list.append(y)
+        # for x in title_sheet:
+        #     for y in x:
+        #         if y is None:
+        #             pass
+        #         else:
+        #             title_list.append(y)
+        # str_title = ','.join(title_list)
+        str_filter = ','.join(filter_list)
+        config_dict = {
+            # "title": str_title,
+            "filter": str_filter
+        }
+        str_config_dict = str(config_dict)
+        # print(f"config_dict:{config_dict}")
+        # print(f"str_config_dict:{str_config_dict}")
+        insert_sql = f""" insert into crawler_config(title, source, config) values("本山祝福小程序", "benshanzhufu", "{str_config_dict}") """
+        MysqlHelper.update_values(log_type, crawler, insert_sql, env)
+
+    @classmethod
+    def insert_video_from_feishu_to_mysql(cls, log_type, crawler, env):
+        jixiangxingfu_sheetid = ['19c772']
+        for sheetid in jixiangxingfu_sheetid:
+            xiaoniangao_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
+            for i in range(1, len(xiaoniangao_sheet)):
+            # for i in range(1, 5):
+                if xiaoniangao_sheet[i][5] is None or xiaoniangao_sheet[i][7] is None:
+                    continue
+                video_id = xiaoniangao_sheet[i][12].replace("https://admin.piaoquantv.com/cms/post-detail/", "").replace(
+                    "/info", "")
+                if video_id == "None":
+                    continue
+                video_id = int(video_id)
+                out_user_id = "zhongmiaoyinxin"
+                platform = "众妙音信"
+                strategy = "推荐榜爬虫策略"
+                video_title = str(xiaoniangao_sheet[i][7])
+                play_cnt = int(xiaoniangao_sheet[i][9].split("万")[0])*10000
+                duration = str(xiaoniangao_sheet[i][10])
+                width = int(xiaoniangao_sheet[i][11].split("*")[0])
+                height = int(xiaoniangao_sheet[i][11].split("*")[1])
+                cover_url = str(xiaoniangao_sheet[i][13])
+                video_url = str(xiaoniangao_sheet[i][14])
+                crawler_rule = json.dumps({})
+                out_video_id = md5(video_title.encode('utf8')).hexdigest()
+
+                # print(f"video_id:{video_id}, type:{type(video_id)}")
+                # print(f"out_user_id:{out_user_id}, type:{type(out_user_id)}")
+                # print(f"platform:{platform}, type:{type(platform)}")
+                # print(f"strategy:{strategy}, type:{type(strategy)}")
+                # print(f"video_title:{video_title}, type:{type(video_title)}")
+                # print(f"cover_url:{cover_url}, type:{type(cover_url)}")
+                # print(f"video_url:{video_url}, type:{type(video_url)}")
+                # print(f"crawler_rule:{crawler_rule}, type:{type(crawler_rule)}")
+
+                select_sql = f""" select * from crawler_video where platform="{platform}" and video_url="{video_url}" """
+                Common.logger(log_type, crawler).info(f"select_sql:{select_sql}")
+                repeat_video = MysqlHelper.get_values(log_type, crawler, select_sql, env)
+                Common.logger(log_type, crawler).info(f"repeat_video:{repeat_video}")
+
+                if repeat_video is not None and len(repeat_video) != 0:
+                    Common.logger(log_type, crawler).info(f"{video_title} 已存在数据库中\n")
+                else:
+                    # 视频信息保存数据库
+                    insert_sql = f""" insert into crawler_video(video_id,
+                                        out_user_id,
+                                        platform,
+                                        strategy,
+                                        out_video_id,
+                                        video_title,
+                                        cover_url,
+                                        video_url,
+                                        duration,
+                                        play_cnt,
+                                        crawler_rule,
+                                        width,
+                                        height)
+                                        values({video_id},
+                                        "{out_user_id}",
+                                        "{platform}",
+                                        "{strategy}",
+                                        "{out_video_id}",
+                                        "{video_title}",
+                                        "{cover_url}",
+                                        "{video_url}",
+                                        {duration},
+                                        {play_cnt},
+                                        '{crawler_rule}',
+                                        {width},
+                                        {height}) """
+                    Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+                    MysqlHelper.update_values(log_type, crawler, insert_sql, env, action='')
+                    Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
+
+
+
+if __name__ == "__main__":
+    Insert.insert_video_from_feishu_to_mysql("insert-dev", "zhongmiaoyinxin", "dev")
+    # Insert.insert_video_from_feishu_to_mysql("insert-prod", "zhongmiaoyinxin", "prod")
+    pass

+ 403 - 0
zhongmiaoyinxin/zhongmiaoyinxin_recommend/zhongmiaoyinxin_recommend.py

@@ -0,0 +1,403 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/4/17
+import json
+import os
+import shutil
+import sys
+import time
+from hashlib import md5
+from appium import webdriver
+from appium.webdriver.common.touch_action import TouchAction
+from appium.webdriver.extensions.android.nativekey import AndroidKey
+from appium.webdriver.webdriver import WebDriver
+from selenium.common import NoSuchElementException
+from selenium.webdriver.common.by import By
+sys.path.append(os.getcwd())
+from common.common import Common
+from common.publish import Publish
+from common.feishu import Feishu
+from common.scheduling_db import MysqlHelper
+
+
+class ZhongmiaoyinxinRecommend:
+    platform = "众妙音信"
+    i = 0
+
+    @classmethod
+    def zhongmiaoyinxin_config(cls, log_type, crawler, text, env):
+        select_sql = f"""select * from crawler_config where source="zhongmiaoyinxin" """
+        contents = MysqlHelper.get_values(log_type, crawler, select_sql, env, action='')
+        title_list = []
+        filter_list = []
+        for content in contents:
+            config = content['config']
+            config_dict = eval(config)
+            for k, v in config_dict.items():
+                if k == "title":
+                    title_list_config = v.split(",")
+                    for title in title_list_config:
+                        title_list.append(title)
+                if k == "filter":
+                    filter_list_config = v.split(",")
+                    for filter_word in filter_list_config:
+                        filter_list.append(filter_word)
+        if text == "title":
+            return title_list
+        elif text == "filter":
+            return filter_list
+
+    @classmethod
+    def start_wechat(cls, log_type, crawler, env):
+        try:
+            if env == "dev":
+                chromedriverExecutable = "/Users/wangkun/Downloads/chromedriver/chromedriver_v107/chromedriver"
+            else:
+                chromedriverExecutable = '/Users/piaoquan/Downloads/chromedriver'
+            Common.logger(log_type, crawler).info('启动微信')
+            caps = {
+                "platformName": "Android",  # 手机操作系统 Android / iOS
+                "deviceName": "a0a65126",  # 连接的设备名(模拟器或真机),安卓可以随便写
+                "platforVersion": "11",  # 手机对应的系统版本(Android 11)
+                "appPackage": "com.tencent.mm",  # 被测APP的包名,乐活圈 Android
+                "appActivity": ".ui.LauncherUI",  # 启动的Activity名
+                "autoGrantPermissions": "true",  # 让 appium 自动授权 base 权限,
+                # 如果 noReset 为 True,则该条不生效(该参数为 Android 独有),对应的值为 True 或 False
+                "unicodekeyboard": True,  # 使用自带输入法,输入中文时填True
+                "resetkeyboard": True,  # 执行完程序恢复原来输入法
+                "noReset": True,  # 不重置APP
+                "printPageSourceOnFailure": True,  # 找不到元素时,appium log 会完整记录当前页面的 pagesource
+                "newCommandTimeout": 6000,  # 初始等待时间
+                "automationName": "UiAutomator2",  # 使用引擎,默认为 Appium,
+                # 其中 Appium、UiAutomator2、Selendroid、Espresso 用于 Android,XCUITest 用于 iOS
+                "showChromedriverLog": True,
+                'enableWebviewDetailsCollection': True,
+                'setWebContentsDebuggingEnabled': True,
+                'recreateChromeDriverSessions': True,
+                'chromedriverExecutable': chromedriverExecutable,
+                "chromeOptions": {"androidProcess": "com.tencent.mm:appbrand0"},
+                # "chromeOptions": {"androidProcess": "com.tencent.mm:tools"},
+                'browserName': ''
+            }
+            driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
+            driver.implicitly_wait(20)
+            # 向下滑动页面,展示出小程序选择面板
+            for i in range(120):
+                try:
+                    # 发现微信消息 TAB,代表微信已启动成功
+                    if driver.find_elements(By.ID, 'com.tencent.mm:id/f2s'):
+                        break
+                    # 发现并关闭系统菜单栏
+                    elif driver.find_element(By.ID, 'com.android.systemui:id/dismiss_view'):
+                        Common.logger(log_type, crawler).info('发现并关闭系统下拉菜单栏')
+                        driver.find_element(By.ID, 'com.android.systemui:id/dismiss_view').click()
+                    else:
+                        pass
+                except NoSuchElementException:
+                    time.sleep(1)
+            Common.logger(log_type, crawler).info('下滑,展示小程序选择面板')
+            size = driver.get_window_size()
+            driver.swipe(int(size['width'] * 0.5), int(size['height'] * 0.2),
+                         int(size['width'] * 0.5), int(size['height'] * 0.8), 200)
+            # 打开小程序"众妙之上"
+            time.sleep(5)
+            Common.logger(log_type, crawler).info('打开小程序"火山趣事"')
+            driver.find_elements(By.XPATH, '//*[@text="火山趣事"]')[-1].click()
+
+            time.sleep(5)
+            cls.get_videoList(log_type, crawler, driver, env)
+
+            cls.quit(log_type, crawler, driver)
+        except Exception as e:
+            Common.logger(log_type, crawler).error('start_wechat异常:{}\n', e)
+
+    @classmethod
+    def quit(cls, log_type, crawler, driver: WebDriver):
+        driver.quit()
+        Common.logger(log_type, crawler).info('退出 APP 成功\n')
+
+    @classmethod
+    def check_to_applet(cls, log_type, crawler, driver: WebDriver):
+        while True:
+            webview = driver.contexts
+            driver.switch_to.context(webview[1])
+            windowHandles = driver.window_handles
+            for handle in windowHandles:
+                driver.switch_to.window(handle)
+                time.sleep(1)
+                try:
+                    video_list = driver.find_element(By.XPATH, '//*[@class="index--navbar-list"]/*[1]')
+                    video_list.click()
+                    Common.logger(log_type, crawler).info('切换到小程序视频列表成功\n')
+                    return
+                except NoSuchElementException:
+                    time.sleep(1)
+            Common.logger(log_type, crawler).info("切换到小程序失败\n")
+            break
+
+    # 查找元素
+    @classmethod
+    def search_elements(cls, driver: WebDriver, xpath):
+        time.sleep(1)
+        windowHandles = driver.window_handles
+        for handle in windowHandles:
+            driver.switch_to.window(handle)
+            time.sleep(1)
+            try:
+                elements = driver.find_elements(By.XPATH, xpath)
+                if elements:
+                    # cls.find_ad(log_type, crawler, driver)
+                    return elements
+            except NoSuchElementException:
+                pass
+
+    @classmethod
+    def repeat_out_video_id(cls, log_type, crawler, out_video_id, env):
+        sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{out_video_id}"; """
+        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
+        return len(repeat_video)
+
+    @classmethod
+    def repeat_video_url(cls, log_type, crawler, video_url, env):
+        sql = f""" select * from crawler_video where platform="{cls.platform}" and video_url="{video_url}"; """
+        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
+        return len(repeat_video)
+
+    @classmethod
+    def find_ad(cls, log_type, crawler, driver: WebDriver):
+        windowHandles = driver.window_handles
+        for handle in windowHandles:
+            driver.switch_to.window(handle)
+            time.sleep(1)
+            try:
+                Common.logger(log_type, crawler).info("寻找广告~~~~~~")
+                ad_element = driver.find_element(By.XPATH, '//div[@class="ad-text"]')
+                if ad_element:
+                    Common.logger(log_type, crawler).info("发现广告")
+                    for i in range(20):
+                        if driver.find_element(By.XPATH, '//div[@id="count_down_container"]/*[1]').text == "已完成浏览":
+                            Common.logger(log_type, crawler).info("广告播放完毕,点击返回")
+                            driver.press_keycode(AndroidKey.BACK)
+                            return
+                        else:
+                            Common.logger(log_type, crawler).info("广告未播放完毕,等待 1 秒")
+                            time.sleep(1)
+                else:
+                    Common.logger(log_type, crawler).info("未发现广告, 退出")
+                    return
+            except NoSuchElementException:
+                time.sleep(1)
+
+    @classmethod
+    def get_video_url(cls, log_type, crawler, driver: WebDriver, video_element):
+        video_element.click()
+        time.sleep(5)
+        windowHandles = driver.window_handles
+        for handle in windowHandles:
+            driver.switch_to.window(handle)
+            time.sleep(1)
+            try:
+                video_url_element = driver.find_element(By.XPATH, '//wx-video[@class="videoh"]')
+                video_url = video_url_element.get_attribute("src")
+                cls.find_ad(log_type, crawler, driver)
+                return video_url
+            except NoSuchElementException:
+                time.sleep(1)
+
+    @classmethod
+    def get_videoList(cls, log_type, crawler, driver: WebDriver, env):
+        # 鼠标左键点击, 1为x坐标, 2为y坐标
+        Common.logger(log_type, crawler).info('关闭广告')
+        size = driver.get_window_size()
+        TouchAction(driver).tap(x=int(size['width'] * 0.5), y=int(size['height'] * 0.1)).perform()
+        # 切换到小程序
+        cls.check_to_applet(log_type, crawler, driver)
+
+        time.sleep(5)
+        index = 0
+        while True:
+            try:
+                if cls.search_elements(driver, '//*[@id="scrollContainer"]') is None:
+                    Common.logger(log_type, crawler).info('窗口已销毁\n')
+                    return
+
+                Common.logger(log_type, crawler).info('获取视频列表\n')
+                video_elements = cls.search_elements(driver, '//wx-view[@class="cover"]')
+                if video_elements is None:
+                    Common.logger(log_type, crawler).warning(f'video_elements:{video_elements}')
+                    return
+
+                video_element_temp = video_elements[index:]
+                if len(video_element_temp) == 0:
+                    Common.logger(log_type, crawler).info('到底啦~~~~~~~~~~~~~\n')
+                    return
+
+                for i, video_element in enumerate(video_element_temp):
+                    if video_element is None:
+                        Common.logger(log_type, crawler).info('到底啦~\n')
+                        return
+                    cls.i += 1
+                    cls.search_elements(driver, '//wx-view[@class="cover"]')
+
+                    Common.logger(log_type, crawler).info(f'拖动"视频"列表第{cls.i}个至屏幕中间')
+                    time.sleep(3)
+                    driver.execute_script("arguments[0].scrollIntoView({block:'center',inline:'center'})",
+                                          video_element)
+
+                    # video_title = video_element.find_elements(By.XPATH, '//wx-view[@class="playImgs"]')[cls.i-1].text
+                    # cover_url = video_element.find_elements(By.XPATH, '//wx-image[@class="coverImg"]')[cls.i-1].get_attribute('src')
+                    # play_cnt = video_element.find_elements(By.XPATH, '//wx-image[@class="coverImg"]/span/*[2]')[cls.i-1].text
+
+                    video_title = video_element.find_elements(By.XPATH, '//wx-view[@class="playImgs"]')[index+i].text
+                    cover_url = video_element.find_elements(By.XPATH, '//wx-image[@class="coverImg"]')[index+i].get_attribute('src')
+                    play_cnt = video_element.find_elements(By.XPATH, '//wx-image[@class="coverImg"]/span/*[2]')[index+i].text
+
+                    if "万" in play_cnt:
+                        play_cnt = int(play_cnt.split("万")[0]) * 10000
+                    out_video_id = md5(video_title.encode('utf8')).hexdigest()
+                    video_dict = {
+                        'video_title': video_title,
+                        'video_id': out_video_id,
+                        'play_cnt': play_cnt,
+                        'comment_cnt': 0,
+                        'like_cnt': 0,
+                        'share_cnt': 0,
+                        'publish_time_stamp': int(time.time()),
+                        'publish_time_str': time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
+                        'user_name': "zhongmiaoyinxin",
+                        'user_id': "zhongmiaoyinxin",
+                        'avatar_url': cover_url,
+                        'cover_url': cover_url,
+                        'session': f"zhongmiaoyinxin-{int(time.time())}"
+                    }
+                    for k, v in video_dict.items():
+                        Common.logger(log_type, crawler).info(f"{k}:{v}")
+
+                    if video_title is None or cover_url is None:
+                        Common.logger(log_type, crawler).info("无效视频\n")
+                    elif any(str(word) if str(word) in video_title else False for word in
+                             cls.zhongmiaoyinxin_config(log_type, crawler, "filter", env)) is True:
+                        Common.logger(log_type, crawler).info('已中过滤词\n')
+                    elif cls.repeat_out_video_id(log_type, crawler, out_video_id, env) != 0:
+                        Common.logger(log_type, crawler).info('视频已下载\n')
+                    else:
+                        video_url = cls.get_video_url(log_type, crawler, driver, video_element)
+
+                        if video_url is None:
+                            Common.logger(log_type, crawler).info("未获取到视频播放地址\n")
+                            driver.press_keycode(AndroidKey.BACK)
+                        elif cls.repeat_video_url(log_type, crawler, video_url, env) != 0:
+                            Common.logger(log_type, crawler).info('视频已下载\n')
+                            driver.press_keycode(AndroidKey.BACK)
+                        else:
+                            video_dict["video_url"] = video_url
+                            Common.logger(log_type, crawler).info(f"video_url:{video_url}\n")
+                            # driver.press_keycode(AndroidKey.BACK)
+                            cls.download_publish(log_type, crawler, video_dict, env, driver)
+                Common.logger(log_type, crawler).info('已抓取完一组视频,休眠10秒\n')
+                time.sleep(10)
+                index = index + len(video_element_temp)
+            except Exception as e:
+                Common.logger(log_type, crawler).info(f"get_videoList:{e}\n")
+                cls.i = 0
+
+    @classmethod
+    def download_publish(cls, log_type, crawler, video_dict, env, driver: WebDriver):
+        # try:
+        # 下载视频
+        Common.download_method(log_type=log_type, crawler=crawler, text='video', title=video_dict['video_title'],
+                               url=video_dict['video_url'])
+        ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
+        if ffmpeg_dict is None:
+            md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
+            shutil.rmtree(f"./{crawler}/videos/{md_title}/")
+            Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
+            return
+        video_dict["duration"] = ffmpeg_dict["duration"]
+        video_dict["video_width"] = ffmpeg_dict["width"]
+        video_dict["video_height"] = ffmpeg_dict["height"]
+        # 下载封面
+        Common.download_method(log_type=log_type, crawler=crawler, text='cover', title=video_dict['video_title'],
+                               url=video_dict['cover_url'])
+        # 保存视频信息至txt
+        Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
+
+        # 上传视频
+        Common.logger(log_type, crawler).info("开始上传视频...")
+        our_video_id = Publish.upload_and_publish(log_type=log_type,
+                                                  crawler=crawler,
+                                                  strategy="推荐榜爬虫策略",
+                                                  our_uid="recommend",
+                                                  env=env,
+                                                  oss_endpoint="out")
+        if env == 'dev':
+            our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
+        else:
+            our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
+        Common.logger(log_type, crawler).info("视频上传完成")
+
+        if our_video_id is None:
+            # 删除视频文件夹
+            shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
+            return
+
+        # 视频信息保存至飞书
+        Feishu.insert_columns(log_type, crawler, "19c772", "ROWS", 1, 2)
+        # 视频ID工作表,首行写入数据
+        upload_time = int(time.time())
+        values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
+                   "推荐榜爬虫策略",
+                   video_dict["video_title"],
+                   video_dict["video_id"],
+                   video_dict["play_cnt"],
+                   video_dict["duration"],
+                   f'{video_dict["video_width"]}*{video_dict["video_height"]}',
+                   our_video_link,
+                   video_dict["cover_url"],
+                   video_dict["video_url"]]]
+        time.sleep(1)
+        Feishu.update_values(log_type, crawler, "19c772", "F2:V2", values)
+        Common.logger(log_type, crawler).info(f"视频已保存至飞书文档\n")
+
+        rule_dict = {}
+        # 视频信息保存数据库
+        insert_sql = f""" insert into crawler_video(video_id,
+                                                            out_user_id,
+                                                            platform,
+                                                            strategy,
+                                                            out_video_id,
+                                                            video_title,
+                                                            cover_url,
+                                                            video_url,
+                                                            duration,
+                                                            publish_time,
+                                                            play_cnt,
+                                                            crawler_rule,
+                                                            width,
+                                                            height)
+                                                            values({our_video_id},
+                                                            "{video_dict['user_id']}",
+                                                            "{cls.platform}",
+                                                            "推荐榜爬虫策略",
+                                                            "{video_dict['video_id']}",
+                                                            "{video_dict['video_title']}",
+                                                            "{video_dict['cover_url']}",
+                                                            "{video_dict['video_url']}",
+                                                            {int(video_dict['duration'])},
+                                                            "{video_dict['publish_time_str']}",
+                                                            {int(video_dict['play_cnt'])},
+                                                            '{json.dumps(rule_dict)}',
+                                                            {int(video_dict['video_width'])},
+                                                            {int(video_dict['video_height'])}) """
+        Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+        MysqlHelper.update_values(log_type, crawler, insert_sql, env, action='')
+        Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
+        driver.press_keycode(AndroidKey.BACK)
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).error(f'download_publish异常:{e}\n')
+
+
+if __name__ == '__main__':
+    # print(ZhongmiaoyinxinRecommend.zhongmiaoyinxin_config('recommend', 'zhongmiaoyinxin', "filter", "dev"))
+    ZhongmiaoyinxinRecommend.start_wechat("recommend", "zhongmiaoyinxin", "dev")
+    pass