2 years ago · 50518ea008
--- a/xigua/xigua_search/xigua_search.py
+++ b/xigua/xigua_search/xigua_search.py
@@ -651,6 +651,7 @@ class Search:
 
				                 res = requests.request("GET", url, headers=headers, proxies=Common.tunnel_proxies())
			
 
				                 search_list = res.json()['data']['data']
			
 
				             except Exception as e:
			
 
				+                Common.logger(log_type, crawler).error(f'关键词:{search_word},没有获取到视频列表:offset{offset}')
			
 
				                 search_list = []
			
 
				             if not search_list:
			
 
				                 return
			
@@ -702,7 +703,10 @@ class Search:
 
				                             video_dict = cls.get_video_info(log_type, crawler, item_id)
			
 
				                             if not video_dict:
			
 
				                                 continue
			
 
				-                            if cls.is_ruled(log_type, crawler, video_dict, rule_dict):
			
 
				+                            if not cls.is_ruled(log_type, crawler, video_dict, rule_dict):
			
 
				+                                Common.logger(log_type, crawler).info(f'gid:{item_id},不符合抓取规则\n')
			
 
				+                                continue
			
 
				+                            else:
			
 
				                                 video_url_dict = cls.get_video_url(log_type, crawler, video_dict['gid'])
			
 
				                                 video_dict['video_width'] = video_url_dict["video_width"]
			
 
				                                 video_dict['video_height'] = video_url_dict["video_height"]
			
@@ -710,21 +714,20 @@ class Search:
 
				                                 video_dict['video_url'] = video_url_dict["video_url"]
			
 
				                                 video_dict['session'] = signature
			
 
				                                 break
			
 
				-                            else:
			
 
				-                                continue
			
 
				                         except Exception as e:
			
 
				                             Common.logger(log_type, crawler).error(f'视频:{item_id},没有获取到视频详情,原因:{e}')
			
 
				                             continue
			
 
				 
			
 
				                 if not cls.is_ruled(log_type, crawler, video_dict, rule_dict):
			
 
				+                    Common.logger(log_type, crawler).info(f'gid:{item_id},不符合抓取规则\n')
			
 
				+                    continue
			
 
				+                if cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
			
 
				+                    Common.logger(log_type, crawler).info(f'gid:{item_id},视频已下载,无需重复下载\n')
			
 
				                     continue
			
 
				                 for k, v in video_dict.items():
			
 
				                     Common.logger(log_type, crawler).info(f"{k}:{v}")
			
 
				-                # print(f'title:{video_dict["video_title"]},gid:{video_dict["gid"]},offset:{offset}, total:{total_count}')
			
 
				                 try:
			
 
				-                    if cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
			
 
				-                        Common.logger(log_type, crawler).info(f'gid:{item_id},视频已下载,无需重复下载\n')
			
 
				-                        continue
			
 
				+
			
 
				                     cls.download_publish(
			
 
				                         search_word=search_word,
			
 
				                         log_type=log_type,
			
@@ -738,8 +741,11 @@ class Search:
 
				                         machine=machine
			
 
				                     )
			
 
				                 except Exception as e:
			
 
				+                    Common.logger(log_type, crawler).error(f'视频:{item_id},download_publish异常:{e}\n')
			
 
				                     continue
			
 
				+
			
 
				                 total_count += 1
			
 
				+                # print(f'search_word:{search_word},title:{video_dict["video_title"]},gid:{video_dict["gid"]},offset:{offset}, total:{total_count}')
			
 
				                 if total_count >= 30:
			
 
				                     return
			
 
				             offset += 10
			
@@ -754,112 +760,109 @@ class Search:
 
				     @classmethod
			
 
				     def download_publish(cls, log_type, crawler, search_word, strategy, video_dict, rule_dict, our_uid, oss_endpoint,
			
 
				                          env, machine):
			
 
				-        try:
			
 
				 
			
 
				-            Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video',
			
 
				-                                   title=video_dict['video_title'], url=video_dict['video_url'])
			
 
				-            # 下载音频
			
 
				-            Common.download_method(log_type=log_type, crawler=crawler, text='xigua_audio',
			
 
				-                                   title=video_dict['video_title'], url=video_dict['audio_url'])
			
 
				-            # 合成音视频
			
 
				-            Common.video_compose(log_type=log_type, crawler=crawler,
			
 
				-                                 video_dir=f"./{crawler}/videos/{video_dict['video_title']}")
			
 
				-            ffmpeg_dict = Common.ffmpeg(log_type, crawler,
			
 
				-                                        f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
			
 
				-            if ffmpeg_dict is None or ffmpeg_dict['size'] == 0:
			
 
				-                Common.logger(log_type, crawler).warning(f"下载的视频无效，已删除\n")
			
 
				-                # 删除视频文件夹
			
 
				-                shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
			
 
				-                return
			
 
				-            # 下载封面
			
 
				-            Common.download_method(log_type=log_type, crawler=crawler, text='cover',
			
 
				-                                   title=video_dict['video_title'], url=video_dict['cover_url'])
			
 
				-            # 保存视频信息至txt
			
 
				-            Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
			
 
				-
			
 
				-            # 上传视频
			
 
				-            Common.logger(log_type, crawler).info("开始上传视频...")
			
 
				-            our_video_id = Publish.upload_and_publish(log_type=log_type,
			
 
				-                                                      crawler=crawler,
			
 
				-                                                      strategy=strategy,
			
 
				-                                                      our_uid=our_uid,
			
 
				-                                                      env=env,
			
 
				-                                                      oss_endpoint=oss_endpoint)
			
 
				-            if env == 'dev':
			
 
				-                our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
			
 
				-            else:
			
 
				-                our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
			
 
				-            Common.logger(log_type, crawler).info("视频上传完成")
			
 
				+        Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video',
			
 
				+                               title=video_dict['video_title'], url=video_dict['video_url'])
			
 
				+        # 下载音频
			
 
				+        Common.download_method(log_type=log_type, crawler=crawler, text='xigua_audio',
			
 
				+                               title=video_dict['video_title'], url=video_dict['audio_url'])
			
 
				+        # 合成音视频
			
 
				+        Common.video_compose(log_type=log_type, crawler=crawler,
			
 
				+                             video_dir=f"./{crawler}/videos/{video_dict['video_title']}")
			
 
				+        ffmpeg_dict = Common.ffmpeg(log_type, crawler,
			
 
				+                                    f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
			
 
				+        if ffmpeg_dict is None or ffmpeg_dict['size'] == 0:
			
 
				+            Common.logger(log_type, crawler).warning(f"下载的视频无效，已删除\n")
			
 
				+            # 删除视频文件夹
			
 
				+            shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
			
 
				+            return
			
 
				+        # 下载封面
			
 
				+        Common.download_method(log_type=log_type, crawler=crawler, text='cover',
			
 
				+                               title=video_dict['video_title'], url=video_dict['cover_url'])
			
 
				+        # 保存视频信息至txt
			
 
				+        Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
			
 
				+
			
 
				+        # 上传视频
			
 
				+        Common.logger(log_type, crawler).info("开始上传视频...")
			
 
				+        our_video_id = Publish.upload_and_publish(log_type=log_type,
			
 
				+                                                  crawler=crawler,
			
 
				+                                                  strategy=strategy,
			
 
				+                                                  our_uid=our_uid,
			
 
				+                                                  env=env,
			
 
				+                                                  oss_endpoint=oss_endpoint)
			
 
				+        if env == 'dev':
			
 
				+            our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
			
 
				+        else:
			
 
				+            our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
			
 
				+        Common.logger(log_type, crawler).info("视频上传完成")
			
 
				 
			
 
				-            if our_video_id is None:
			
 
				-                # 删除视频文件夹
			
 
				-                shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
			
 
				-                return
			
 
				+        if our_video_id is None:
			
 
				+            # 删除视频文件夹
			
 
				+            shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
			
 
				+            return
			
 
				 
			
 
				-            # 视频写入飞书
			
 
				-            Feishu.insert_columns(log_type, 'xigua', "BUNvGC", "ROWS", 1, 2)
			
 
				-            upload_time = int(time.time())
			
 
				-            values = [[
			
 
				-                search_word,
			
 
				-                time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
			
 
				-                "关键词搜索",
			
 
				-                video_dict['video_title'],
			
 
				-                str(video_dict['video_id']),
			
 
				-                our_video_link,
			
 
				-                video_dict['gid'],
			
 
				-                video_dict['play_cnt'],
			
 
				-                video_dict['comment_cnt'],
			
 
				-                video_dict['like_cnt'],
			
 
				-                video_dict['share_cnt'],
			
 
				-                video_dict['duration'],
			
 
				-                str(video_dict['video_width']) + '*' + str(video_dict['video_height']),
			
 
				-                video_dict['publish_time_str'],
			
 
				-                video_dict['user_name'],
			
 
				-                video_dict['user_id'],
			
 
				-                video_dict['avatar_url'],
			
 
				-                video_dict['cover_url'],
			
 
				-                video_dict['video_url'],
			
 
				-                video_dict['audio_url']]]
			
 
				-            time.sleep(1)
			
 
				-            Feishu.update_values(log_type, 'xigua', "BUNvGC", "E2:Z2", values)
			
 
				-            Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
			
 
				-
			
 
				-            # 视频信息保存数据库
			
 
				-            insert_sql = f""" insert into crawler_video(video_id,
			
 
				-                                    user_id,
			
 
				-                                    out_user_id,
			
 
				-                                    platform,
			
 
				-                                    strategy,
			
 
				-                                    out_video_id,
			
 
				-                                    video_title,
			
 
				-                                    cover_url,
			
 
				-                                    video_url,
			
 
				-                                    duration,
			
 
				-                                    publish_time,
			
 
				-                                    play_cnt,
			
 
				-                                    crawler_rule,
			
 
				-                                    width,
			
 
				-                                    height)
			
 
				-                                    values({our_video_id},
			
 
				-                                    {our_uid},
			
 
				-                                    "{video_dict['user_id']}",
			
 
				-                                    "{cls.platform}",
			
 
				-                                    "定向爬虫策略",
			
 
				-                                    "{video_dict['video_id']}",
			
 
				-                                    "{video_dict['video_title']}",
			
 
				-                                    "{video_dict['cover_url']}",
			
 
				-                                    "{video_dict['video_url']}",
			
 
				-                                    {int(video_dict['duration'])},
			
 
				-                                    "{video_dict['publish_time_str']}",
			
 
				-                                    {int(video_dict['play_cnt'])},
			
 
				-                                    '{json.dumps(rule_dict)}',
			
 
				-                                    {int(video_dict['video_width'])},
			
 
				-                                    {int(video_dict['video_height'])}) """
			
 
				-            Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
			
 
				-            MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine)
			
 
				-            Common.logger(log_type, crawler).info('视频信息插入数据库成功！\n')
			
 
				-        except Exception as e:
			
 
				-            Common.logger(log_type, crawler).error(f'download_publish异常:{e}\n')
			
 
				+        # 视频写入飞书
			
 
				+        Feishu.insert_columns(log_type, 'xigua', "BUNvGC", "ROWS", 1, 2)
			
 
				+        upload_time = int(time.time())
			
 
				+        values = [[
			
 
				+            search_word,
			
 
				+            time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
			
 
				+            "关键词搜索",
			
 
				+            video_dict['video_title'],
			
 
				+            str(video_dict['video_id']),
			
 
				+            our_video_link,
			
 
				+            video_dict['gid'],
			
 
				+            video_dict['play_cnt'],
			
 
				+            video_dict['comment_cnt'],
			
 
				+            video_dict['like_cnt'],
			
 
				+            video_dict['share_cnt'],
			
 
				+            video_dict['duration'],
			
 
				+            str(video_dict['video_width']) + '*' + str(video_dict['video_height']),
			
 
				+            video_dict['publish_time_str'],
			
 
				+            video_dict['user_name'],
			
 
				+            video_dict['user_id'],
			
 
				+            video_dict['avatar_url'],
			
 
				+            video_dict['cover_url'],
			
 
				+            video_dict['video_url'],
			
 
				+            video_dict['audio_url']]]
			
 
				+        time.sleep(1)
			
 
				+        Feishu.update_values(log_type, 'xigua', "BUNvGC", "E2:Z2", values)
			
 
				+        Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
			
 
				+
			
 
				+        # 视频信息保存数据库
			
 
				+        insert_sql = f""" insert into crawler_video(video_id,
			
 
				+                                user_id,
			
 
				+                                out_user_id,
			
 
				+                                platform,
			
 
				+                                strategy,
			
 
				+                                out_video_id,
			
 
				+                                video_title,
			
 
				+                                cover_url,
			
 
				+                                video_url,
			
 
				+                                duration,
			
 
				+                                publish_time,
			
 
				+                                play_cnt,
			
 
				+                                crawler_rule,
			
 
				+                                width,
			
 
				+                                height)
			
 
				+                                values({our_video_id},
			
 
				+                                {our_uid},
			
 
				+                                "{video_dict['user_id']}",
			
 
				+                                "{cls.platform}",
			
 
				+                                "定向爬虫策略",
			
 
				+                                "{video_dict['video_id']}",
			
 
				+                                "{video_dict['video_title']}",
			
 
				+                                "{video_dict['cover_url']}",
			
 
				+                                "{video_dict['video_url']}",
			
 
				+                                {int(video_dict['duration'])},
			
 
				+                                "{video_dict['publish_time_str']}",
			
 
				+                                {int(video_dict['play_cnt'])},
			
 
				+                                '{json.dumps(rule_dict)}',
			
 
				+                                {int(video_dict['video_width'])},
			
 
				+                                {int(video_dict['video_height'])}) """
			
 
				+        Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
			
 
				+        MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine)
			
 
				+        Common.logger(log_type, crawler).info('视频信息插入数据库成功！\n')
			
 
				 
			
 
				     @classmethod
			
 
				     def get_search_videos(cls, log_type, crawler, strategy, oss_endpoint, env, machine):