lierqiang 2 years ago
parent
commit
50518ea008
1 changed files with 113 additions and 110 deletions
  1. 113 110
      xigua/xigua_search/xigua_search.py

+ 113 - 110
xigua/xigua_search/xigua_search.py

@@ -651,6 +651,7 @@ class Search:
                 res = requests.request("GET", url, headers=headers, proxies=Common.tunnel_proxies())
                 search_list = res.json()['data']['data']
             except Exception as e:
+                Common.logger(log_type, crawler).error(f'关键词:{search_word},没有获取到视频列表:offset{offset}')
                 search_list = []
             if not search_list:
                 return
@@ -702,7 +703,10 @@ class Search:
                             video_dict = cls.get_video_info(log_type, crawler, item_id)
                             if not video_dict:
                                 continue
-                            if cls.is_ruled(log_type, crawler, video_dict, rule_dict):
+                            if not cls.is_ruled(log_type, crawler, video_dict, rule_dict):
+                                Common.logger(log_type, crawler).info(f'gid:{item_id},不符合抓取规则\n')
+                                continue
+                            else:
                                 video_url_dict = cls.get_video_url(log_type, crawler, video_dict['gid'])
                                 video_dict['video_width'] = video_url_dict["video_width"]
                                 video_dict['video_height'] = video_url_dict["video_height"]
@@ -710,21 +714,20 @@ class Search:
                                 video_dict['video_url'] = video_url_dict["video_url"]
                                 video_dict['session'] = signature
                                 break
-                            else:
-                                continue
                         except Exception as e:
                             Common.logger(log_type, crawler).error(f'视频:{item_id},没有获取到视频详情,原因:{e}')
                             continue
 
                 if not cls.is_ruled(log_type, crawler, video_dict, rule_dict):
+                    Common.logger(log_type, crawler).info(f'gid:{item_id},不符合抓取规则\n')
+                    continue
+                if cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
+                    Common.logger(log_type, crawler).info(f'gid:{item_id},视频已下载,无需重复下载\n')
                     continue
                 for k, v in video_dict.items():
                     Common.logger(log_type, crawler).info(f"{k}:{v}")
-                # print(f'title:{video_dict["video_title"]},gid:{video_dict["gid"]},offset:{offset}, total:{total_count}')
                 try:
-                    if cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
-                        Common.logger(log_type, crawler).info(f'gid:{item_id},视频已下载,无需重复下载\n')
-                        continue
+
                     cls.download_publish(
                         search_word=search_word,
                         log_type=log_type,
@@ -738,8 +741,11 @@ class Search:
                         machine=machine
                     )
                 except Exception as e:
+                    Common.logger(log_type, crawler).error(f'视频:{item_id},download_publish异常:{e}\n')
                     continue
+
                 total_count += 1
+                # print(f'search_word:{search_word},title:{video_dict["video_title"]},gid:{video_dict["gid"]},offset:{offset}, total:{total_count}')
                 if total_count >= 30:
                     return
             offset += 10
@@ -754,112 +760,109 @@ class Search:
     @classmethod
     def download_publish(cls, log_type, crawler, search_word, strategy, video_dict, rule_dict, our_uid, oss_endpoint,
                          env, machine):
-        try:
 
-            Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video',
-                                   title=video_dict['video_title'], url=video_dict['video_url'])
-            # 下载音频
-            Common.download_method(log_type=log_type, crawler=crawler, text='xigua_audio',
-                                   title=video_dict['video_title'], url=video_dict['audio_url'])
-            # 合成音视频
-            Common.video_compose(log_type=log_type, crawler=crawler,
-                                 video_dir=f"./{crawler}/videos/{video_dict['video_title']}")
-            ffmpeg_dict = Common.ffmpeg(log_type, crawler,
-                                        f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
-            if ffmpeg_dict is None or ffmpeg_dict['size'] == 0:
-                Common.logger(log_type, crawler).warning(f"下载的视频无效,已删除\n")
-                # 删除视频文件夹
-                shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
-                return
-            # 下载封面
-            Common.download_method(log_type=log_type, crawler=crawler, text='cover',
-                                   title=video_dict['video_title'], url=video_dict['cover_url'])
-            # 保存视频信息至txt
-            Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
-
-            # 上传视频
-            Common.logger(log_type, crawler).info("开始上传视频...")
-            our_video_id = Publish.upload_and_publish(log_type=log_type,
-                                                      crawler=crawler,
-                                                      strategy=strategy,
-                                                      our_uid=our_uid,
-                                                      env=env,
-                                                      oss_endpoint=oss_endpoint)
-            if env == 'dev':
-                our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-            else:
-                our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
-            Common.logger(log_type, crawler).info("视频上传完成")
+        Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video',
+                               title=video_dict['video_title'], url=video_dict['video_url'])
+        # 下载音频
+        Common.download_method(log_type=log_type, crawler=crawler, text='xigua_audio',
+                               title=video_dict['video_title'], url=video_dict['audio_url'])
+        # 合成音视频
+        Common.video_compose(log_type=log_type, crawler=crawler,
+                             video_dir=f"./{crawler}/videos/{video_dict['video_title']}")
+        ffmpeg_dict = Common.ffmpeg(log_type, crawler,
+                                    f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
+        if ffmpeg_dict is None or ffmpeg_dict['size'] == 0:
+            Common.logger(log_type, crawler).warning(f"下载的视频无效,已删除\n")
+            # 删除视频文件夹
+            shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
+            return
+        # 下载封面
+        Common.download_method(log_type=log_type, crawler=crawler, text='cover',
+                               title=video_dict['video_title'], url=video_dict['cover_url'])
+        # 保存视频信息至txt
+        Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
+
+        # 上传视频
+        Common.logger(log_type, crawler).info("开始上传视频...")
+        our_video_id = Publish.upload_and_publish(log_type=log_type,
+                                                  crawler=crawler,
+                                                  strategy=strategy,
+                                                  our_uid=our_uid,
+                                                  env=env,
+                                                  oss_endpoint=oss_endpoint)
+        if env == 'dev':
+            our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
+        else:
+            our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
+        Common.logger(log_type, crawler).info("视频上传完成")
 
-            if our_video_id is None:
-                # 删除视频文件夹
-                shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
-                return
+        if our_video_id is None:
+            # 删除视频文件夹
+            shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
+            return
 
-            # 视频写入飞书
-            Feishu.insert_columns(log_type, 'xigua', "BUNvGC", "ROWS", 1, 2)
-            upload_time = int(time.time())
-            values = [[
-                search_word,
-                time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
-                "关键词搜索",
-                video_dict['video_title'],
-                str(video_dict['video_id']),
-                our_video_link,
-                video_dict['gid'],
-                video_dict['play_cnt'],
-                video_dict['comment_cnt'],
-                video_dict['like_cnt'],
-                video_dict['share_cnt'],
-                video_dict['duration'],
-                str(video_dict['video_width']) + '*' + str(video_dict['video_height']),
-                video_dict['publish_time_str'],
-                video_dict['user_name'],
-                video_dict['user_id'],
-                video_dict['avatar_url'],
-                video_dict['cover_url'],
-                video_dict['video_url'],
-                video_dict['audio_url']]]
-            time.sleep(1)
-            Feishu.update_values(log_type, 'xigua', "BUNvGC", "E2:Z2", values)
-            Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
-
-            # 视频信息保存数据库
-            insert_sql = f""" insert into crawler_video(video_id,
-                                    user_id,
-                                    out_user_id,
-                                    platform,
-                                    strategy,
-                                    out_video_id,
-                                    video_title,
-                                    cover_url,
-                                    video_url,
-                                    duration,
-                                    publish_time,
-                                    play_cnt,
-                                    crawler_rule,
-                                    width,
-                                    height)
-                                    values({our_video_id},
-                                    {our_uid},
-                                    "{video_dict['user_id']}",
-                                    "{cls.platform}",
-                                    "定向爬虫策略",
-                                    "{video_dict['video_id']}",
-                                    "{video_dict['video_title']}",
-                                    "{video_dict['cover_url']}",
-                                    "{video_dict['video_url']}",
-                                    {int(video_dict['duration'])},
-                                    "{video_dict['publish_time_str']}",
-                                    {int(video_dict['play_cnt'])},
-                                    '{json.dumps(rule_dict)}',
-                                    {int(video_dict['video_width'])},
-                                    {int(video_dict['video_height'])}) """
-            Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
-            MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine)
-            Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f'download_publish异常:{e}\n')
+        # 视频写入飞书
+        Feishu.insert_columns(log_type, 'xigua', "BUNvGC", "ROWS", 1, 2)
+        upload_time = int(time.time())
+        values = [[
+            search_word,
+            time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
+            "关键词搜索",
+            video_dict['video_title'],
+            str(video_dict['video_id']),
+            our_video_link,
+            video_dict['gid'],
+            video_dict['play_cnt'],
+            video_dict['comment_cnt'],
+            video_dict['like_cnt'],
+            video_dict['share_cnt'],
+            video_dict['duration'],
+            str(video_dict['video_width']) + '*' + str(video_dict['video_height']),
+            video_dict['publish_time_str'],
+            video_dict['user_name'],
+            video_dict['user_id'],
+            video_dict['avatar_url'],
+            video_dict['cover_url'],
+            video_dict['video_url'],
+            video_dict['audio_url']]]
+        time.sleep(1)
+        Feishu.update_values(log_type, 'xigua', "BUNvGC", "E2:Z2", values)
+        Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
+
+        # 视频信息保存数据库
+        insert_sql = f""" insert into crawler_video(video_id,
+                                user_id,
+                                out_user_id,
+                                platform,
+                                strategy,
+                                out_video_id,
+                                video_title,
+                                cover_url,
+                                video_url,
+                                duration,
+                                publish_time,
+                                play_cnt,
+                                crawler_rule,
+                                width,
+                                height)
+                                values({our_video_id},
+                                {our_uid},
+                                "{video_dict['user_id']}",
+                                "{cls.platform}",
+                                "定向爬虫策略",
+                                "{video_dict['video_id']}",
+                                "{video_dict['video_title']}",
+                                "{video_dict['cover_url']}",
+                                "{video_dict['video_url']}",
+                                {int(video_dict['duration'])},
+                                "{video_dict['publish_time_str']}",
+                                {int(video_dict['play_cnt'])},
+                                '{json.dumps(rule_dict)}',
+                                {int(video_dict['video_width'])},
+                                {int(video_dict['video_height'])}) """
+        Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+        MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine)
+        Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
 
     @classmethod
     def get_search_videos(cls, log_type, crawler, strategy, oss_endpoint, env, machine):